Home > Back-end >  Using duplicate and if_else to mark all involved rows in third column
Using duplicate and if_else to mark all involved rows in third column

Time:10-05

I have a dataset like this:

structure(list(var = c("round", "childid_public", "form.region.cen", 
"form.district.cen", "form.district.cen", "form.district.cen", 
"form.district.cen", "form.district.cen", "form.district.cen", 
"form.district.cen", "form.district.cen", "form.hhconsent", "form.hhreasonrefused", 
"form.hhreasonrefusedother", "form.noofchildren", "form.noofchildren", 
"form.noofchildren", "form.noofchildren", "form.noofchildren", 
"form.noofchildren", "form.noofchildren", "form.noofchildren", 
"form.noofchildren", "form.noofchildren", "form.noofchildren", 
"form.childrenrepeat.childrengroup.childsex", "form.childrenrepeat.childrengroup.childsex", 
"form.childrenrepeat.childrengroup.ageinmonths", "form.childrenrepeat.childrengroup.dobsource", 
"form.childrenrepeat.childrengroup.dobsource", "form.childrenrepeat.childrengroup.dobsource", 
"form.childrenrepeat.childrengroup.dobsource", "form.childrenrepeat.childrengroup.dobsource", 
"form.childrenrepeat.childrengroup.dobsourceotherdoc", "form.childrenrepeat.childrengroup.dobsourcewho", 
"form.childrenrepeat.childrengroup.dobsourcewho", "form.childrenrepeat.childrengroup.dobsourcewho", 
"form.childrenrepeat.childrengroup.dobsourcewho", "form.childrenrepeat.childrengroup.dobsourcewho", 
"form.childrenrepeat.childrengroup.dobsourcewhoother", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.hhnumber", "form.hohsex", 
"form.hohsex", "form.childrenrepeat.childrengroup.treatmentconsent", 
"form.childrenrepeat.childrengroup.treatmentconsent", "form.childrenrepeat.childrengroup.treatmentconsent", 
"form.childrenrepeat.childrengroup.treatmentconsent", "form.childrenrepeat.childrengroup.treatmentconsent", 
"form.childrenrepeat.childrengroup.notreatreason", "form.childrenrepeat.childrengroup.notreatreason", 
"form.childrenrepeat.childrengroup.notreatreason", "form.childrenrepeat.childrengroup.notreatreason", 
"form.childrenrepeat.childrengroup.notreatreasonother", "form.childpresent", 
"form.childpresent", "form.childpresent", "form.childreturnhome", 
"form.childreturnhome", "form.childreturnhome", "form.weight", 
"form.doseheight", "form.doseheight", "form.doseheight", "form.doseheight", 
"form.doseheight", "form.doseheight", "form.doseheight", "form.doseheight", 
"form.doseheight", "form.doseheight", "form.dose_ml", "form.isadministered", 
"form.isadministered", "form.isadministered", "form.notreatreason", 
"form.notreatreason", "form.notreatreason", "form.notreatreason", 
"round", "childid_public", "form.region.cen", "form.district", 
"form.district", "form.district", "form.district", "form.district", 
"form.district", "form.district", "form.district", "form.hhconsent", 
"form.hhreasonrefused", "form.hhreasonrefusedother", "num_child", 
"num_child", "num_child", "num_child", "num_child", "num_child", 
"num_child", "num_child", "num_child", "num_child", "num_child", 
"childsex", "childsex", "ageinmonths", "dobsource", "dobsource", 
"dobsource", "dobsource", "dobsource", "dobsourceotherdoc", "dobsourceotherdoc", 
"dobsourceotherdoc", "dobsourceotherdoc", "dobsourceotherdoc", 
"dobsourceotherdoc", "dobsourceotherdoc", "dobsourceotherdoc", 
"dobsourcewho", "dobsourcewho", "dobsourcewho", "dobsourcewho", 
"dobsourcewho", "dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "dobsourcewhoother", "dobsourcewhoother", 
"dobsourcewhoother", "form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_1_11.cen", "form.treatment_letter_1_11.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.treatment_letter_12_59.cen", "form.treatment_letter_12_59.cen", 
"form.hohsex", "form.hohsex", "form.hohsex", "form.hohage"), 
    mappedTerm = c("Round 1", ":::UNDEF:::", "Dosso", "Boboye", 
    "Dioundiou", "Dogondoutchi", "Dosso", "Falmey", "Gaya", "Loga", 
    "Tibiri (Doutchi)", "Yes", ":::UNDEF:::", ":::UNDEF:::", 
    "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "14", 
    "Male", "Female", ":::UNDEF:::", "Health record", "Birth certificate", 
    "Other document", ":::UNDEF:::", "Told by someone", ":::UNDEF:::", 
    "Mother", "Father", "Neighbor", "Others", ":::UNDEF:::", 
    ":::UNDEF:::", ":::UNDEF:::", "b", "c", "f", "g", "k", "m", 
    "q", "s", "t", "v", "x", "y", "A", "D", "E", "H", "J", "L", 
    "N", ":::UNDEF:::", "P", "R", "U", "W", "Z", ":::UNDEF:::", 
    "Male", "Female", "No", "Yes", "AMR grappe", "M54 grappe", 
    ":::UNDEF:::", "Allergy", "Parent refusal", "Other", ":::UNDEF:::", 
    ":::UNDEF:::", "No", "Yes", ":::UNDEF:::", "Less than a week", 
    "More than a week", ":::UNDEF:::", ":::UNDEF:::", "10", "12", 
    "14", "16", "2", "25", "4", "6", "8", ":::UNDEF:::", ":::UNDEF:::", 
    "No", "Yes", ":::UNDEF:::", "Allergy", "Parent refusal", 
    "Other", ":::UNDEF:::", "Round 2", ":::UNDEF:::", "Dosso", 
    "Boboye", "Dioundiou", "Dogondoutchi", "Dosso", "Falmey", 
    "Gaya", "Loga", "Tibiri (Doutchi)", "Yes", ":::UNDEF:::", 
    ":::UNDEF:::", "1", "2", "3", "4", "5", "6", "7", "8", "9", 
    "10", "14", "Male", "Female", ":::UNDEF:::", "Father", "Mother", 
    "Neighbor", "Others", ":::UNDEF:::", ":::UNDEF:::", "Birth certificate", 
    "Notebook", "Distribution book", "Mass distribution book", 
    "Vaccination record", "Distribution card", "Vaccination card", 
    "Father", "Mother", "Neighbor", "Others", ":::UNDEF:::", 
    "Guardian", "Grand mother", "Grand mother", ":::UNDEF:::", 
    "Brother", "Aunt", "Sister in law", "Co-wife", "grand mere", 
    "Grand father", "Grand mother", "grande mere", "Grand mother", 
    "Older sister", "Grand mother", "Uncle", "Main relay", "Sister in law", 
    "Brother", "Uncle", "tante", "Uncle", "b", "c", "f", "g", 
    "k", "m", "q", "s", "t", "v", "x", "y", "A", "D", "E", "H", 
    "J", "L", "N", "P", "R", "U", "W", "Z", "Male", "Female", 
    ":::UNDEF:::", ":::UNDEF:::"), dataset = c("avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 1", 
    "avenir census and treatment data clinepi round 1", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2", "avenir census and treatment data clinepi round 2", 
    "avenir census and treatment data clinepi round 2")), class = "data.frame", row.names = c(NA, 
-200L))

Now I want to see in all duplicates values in col of "mappedTerm" given the group by each var across the different datasets. If there are duplicates value, then mark as "yes" in new col.

My code is like this:

vm_check<-vm %>%
  filter(mappedTerm!=":::UNDEF:::")%>%
  group_by(var)%>%
  mutate(rep=if_else(duplicated(mappedTerm), "yes", ""))  

But the problem is that it only marks "yes" for rest of cols not the first col. Like:

var1.  var2.  var3. rep
xxx.   abc.   df1.
xxx.   abc.   df2.   yes
xxx.   abc.   df3.   yes

instead:

var1.  var2.  var3. rep
xxx.   abc.   df1.   yes
xxx.   abc.   df2.   yes
xxx.   abc.   df3.   yes

So my questions are:

  1. How can I get the rep=yes for all duplicates rows instead of the rows with "yes" but for first row?

  2. Any better solution to get the desired output as I mentioned at begin?(Output for all duplicates values in col of "mappedTerm" given the group_by each var across the different datasets)

Thanks a lot for your help~~!

CodePudding user response:

There are no duplicates of mappedTerm within each var with the dataset you provided, so maybe I did not understand your question entirely. If there are duplicates, and I understood your question correctly, then this should work:

library(dplyr)
vm_check <-vm %>%
  filter(mappedTerm!=":::UNDEF:::")%>%
  group_by(var, mappedTerm) %>%
  mutate(rep=if_else(n()>1, "yes", "")) 

It checks for number of rows for each group of var and mappedTerm. If there are duplicates, this number is >1

edit: or, alternatively, this will return a logical vector, which returns TRUE if there is a duplicate:

vm_check <-vm %>%
  filter(mappedTerm!=":::UNDEF:::")%>%
  group_by(var, mappedTerm) %>%
  mutate(rep=n()>1) 

CodePudding user response:

I don't think that your sample data has any rows where mappedTerm is duplicated in a var group. But the issue you are facing is almost certainly because of how duplicated() works.

Try this:

vec  <- c(1,1,1,2,2,2)
duplicated(vec)
# FALSE  TRUE  TRUE FALSE  TRUE  TRUE

Note that the first time each value it appears, R has not seen it before so it is not considered duplicated. We can address this by running duplicated() backwards:

duplicated(vec, fromLast = TRUE)
# TRUE  TRUE FALSE  TRUE  TRUE FALSE

Of course, the issue now is that the last item is not considered duplicated.

Putting it all together, we can do:

duplicated(vec) | duplicated(vec, fromLast = TRUE)
# TRUE TRUE TRUE TRUE TRUE TRUE

In your case, assuming your data is dat, we can do:

dat  |>
    filter(mappedTerm!=":::UNDEF:::")  |>
    group_by(var)  |>
    mutate(
        rep = ifelse(
            (duplicated(mappedTerm) | duplicated(mappedTerm, fromLast = TRUE)), 
            "yes", 
            ""
        )
    )

However, I would prefer to test whether the number of distinct mappedTerm values are greater than the total rows per group:

dat  |>
    filter(mappedTerm!=":::UNDEF:::")  |>
    group_by(var)  |>
    mutate(
        rep = n_distinct(mappedTerm) < n()  
    )

This will give you a TRUE value every time there is more than one mappedTerm per group, otherwise it will be FALSE.

  •  Tags:  
  • r
  • Related