Home > Blockchain >  SAS Question to remove row based on Group
SAS Question to remove row based on Group

Time:07-08

I have 2 datasets

I need help with SAS query

Table 1
ID      Prin    Int     Tot     TranType    TranDt
101     100     15      115     P           1/1/2021
101     100     15      115     P           1/1/2021
102     500     15      515     P           5/1/2021
103     300     15      315     P           6/1/2021
104     250     10      260     P           8/1/2021
105     350     10      360     P           9/1/2021
105     300     10      310     P           10/1/2021
Table 2
ID      Prin    Int     Tot     TranType    TranDt
101     100     15      115     R           1/15/2021
103     300     15      315     R           6/2/2021
105     350     10      360     R           9/5/2021
105     300     10      310     R           10/5/2021

Final Output

Table 3
ID      Prin    Int     Tot     TranType    TranDt     Cat
101     100     15      115     P           1/1/2021   Rev
101     100     15      115     P           1/1/2021   Good
102     500     15      515     P           5/1/2021   Good
103     300     15      315     P           6/1/2021   Rev
104     250     10      260     P           8/1/2021   Good
105     350     10      360     P           9/1/2021   Rev
105     300     10      310     P           10/1/2021  Rev

I need to combine table 1 and table 2 and if combination of (ID Prin Int Tot) and TranType= P matches Table 2 (ID Prin Int Tot) and TranType= R then mark that row as Rev but 2nd row if duplicate should be marked as Good. Output is shown in Table 3. TranDt of Table 2 is good as long as it is >= Table1.TranDt.

I would appreciate any help on this query.

I tried grouping but I can't make it work.

CodePudding user response:

Lengthy but produce the desired output.

proc sql;
    create table stage1 as
        select t1.id,
            t1.prin,
            t1.int,
            t1.tot,
            t1.trantype,
            t1.trandt,
            t2.trantype as trtype
        from table1 t1
            left join
                table2 t2
                on t1.id = t2.id
                and t1.prin = t2.prin
                and t1.int = t2.int
                and t1.tot = t2.tot
    ;
quit;

data stage2;
    set stage1;
    by id prin int tot notsorted;
    if first.tot then group 1;
run;

proc sort data=stage2 out=stage3; by id prin int tot group; quit;

data want;
    length cat $4.;

    do until (last.tot);
        set stage3;
        by id prin int tot group;

        if first.group then
            do;
                if trantype = 'P' and trtype = 'R' then
                    Cat = 'Rev';
                else Cat = 'Good';
            end;

        if last.group and not first.group then
            do;
                if trantype = 'P' and trtype = 'R' then
                    Cat = 'Good';
            end;

        output;
    end;
    drop trtype group;
run;

                    enter image description here

CodePudding user response:

proc sort data=a1; by ID Prin Int Tot; run;
proc sort data=a2(rename=(TranType=TranType2 TranDt=TranDt2)); by ID Prin Int 
 Tot; run;

data a12(drop=TranType2 TranDt2 t); merge a1 a2; by ID Prin Int Tot;
 if TranDt<=TranDt2 or TranDt2='';
 if TranType='P' and TranType2='R' then cat='Rev ';
 if first.Tot then t=0; t 1;
 if t>1 or TranDt2='' then cat='Good';
run;
  • Related