Home > Blockchain >  change date in column 2 where numbers do not match column 3
change date in column 2 where numbers do not match column 3

Time:06-23

I have tried this before but need to change direction. I was changing the numbers but I found I need to change the date after the numbers differ. My text has 3 columns and the 2nd and 3rd are similar but occasionally there is a 1 - 4 digit number that do not match. I have added a sample so you can see what I am talking about. I have added my substr command but that was to change the numbers and reality I need to make the date after those numbers 220102 or a user entered date. When the number in bold in column 2 do not match column 3 then the date needs to change to the user input or 220102 in column 2 after the numbers in bold whichever is easiest. I have a script for user input date change I could possibly point it to. Thanks in advance.

126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220419    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220419    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220419    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220419    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

sample text columns

BEGIN{
}
{
    part1= substr($0,1,40)
    part2= substr($0,49)
    qty= substr($2,4,7)
    print part1""qty""part2
}
END{
}

Expected outcome is;

126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220102    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220102    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220102    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220102    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

CodePudding user response:

Using sed

$ cat script.sed
/[^ ]* \ 0\ \([1-9]\)\([0-9]\{1,3\}\)\([0-9]\{6\}\).* 0\ \1\?\2/!{ 
    s/[0-9]\{6\} /220102 /
}
$ sed -f script.sed input_file
126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220102    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220102    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220102    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220102    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

CodePudding user response:

Using GNU awk for the 3rd arg to match():

$ cat tst.awk
BEGIN { rep = (rep == "" ? "220102" : rep) }
match($0,/((\s*\S \s \S{3})(\S{7}))(\S )((\s )(\S{7})(.*))/,a) {
    if ( a[3] != a[7] ) {
        $0 = a[1] rep a[5]
    }
}
{ print }

$ awk -f tst.awk file
126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220102    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220102    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220102    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220102    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

$ awk -v rep='111111' -f tst.awk file
126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052111111    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034111111    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059111111    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122111111    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

CodePudding user response:

  • Assuming the initial zeroes in second column stays at 7 or 8 consecutively and doesn't increase or decrease

cat script

{
    c2=$2
    c3=$3
    sub("0 ","",c2)
    sub("0 ","",c3)
    sub("-.*","",c3)
    if (length(c2) == 8) {
        c2_value=substr(c2,1,2)
    } else if (length(c2) == 9) {
        c2_value=substr(c2,1,3)
    }

    if (length(c3) == 10) {
        c3_value=substr(c3,1,2)
    } else if (length(c3) == 11) {
        c3_value=substr(c3,1,3)
    }

    if(c2_value != c3_value) {
        sub("[1-9].*$","",$2)
        date="220102"
        print $1"             "$2 c2_value date"   "$3
    } else {
        print $0
    }
}

cat data

126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220419    000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220419    000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220419    000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220419    000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

awk -f script data

126731E             0000000033220422    000003312634460-0003-1
134180D             0000000052220102   000006012622399-0003-1
 134307-004K         0000000016220420    000001612635621-0003-0
 141014-001B         0000000040220419    000004012632585-0003-0
134886C             0000000034220102   000007612620821-0003-1
 123899B             0000000050220412    000005012635007-0003-1
121543C             0000000059220102   000007512621925-0003-1
 118238C             0000000070220419    000007012632584-0003-0
121852A             0000000122220102   000013512622569-0003-1
 123124A             0000000141220419    000014112631954-0003-0
 123157C             0000000344220422    000034412634707-0003-1

  • Related