Home > Software design >  comparing the names and options of two similar meta da
comparing the names and options of two similar meta da

Time:09-29

I have two similar meta data of approx. 1000 records and more than 500 columns . and I want to check the consistency between two data frame. now i want to create a new data frame for which it would show all column names of df1 in first row and same with columns names of df2 in second row and also their option in column 3 and 4 respectively. and then mutate new columns to show TRUE or FALSE if column names and their options are matching.

basically i have to check if column names in df1 is excact with df 2 and if options in all columns of df1 is exactly matching with df2

df1 <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","mum","nav","pun","bang","chen","triv","vish","del","mum","bang","vish","bhop","kol","noi","gurg"),
                  Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","m","m","m","m","f","m","f","m","m"))

df2 <- data.frame(ID =c("DEV2962","KTN2152","ANA2719","ITs2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
                  city=c("del","MUm"," nav","pun","bang","chen"," ddgy ","vish","del","mum","bang","vish","bhol","nhus","huay","gurg"),
                  Name= c("dev","singh,rahul","abbas,salman","lal,ram","singh,nkunj","huna,ghalak","khan,fhalt","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
                  gender =c("m","f","m","f","m","m","m","male","m","male","m","f","m","f","m","m"))


df <- select(df1,matches("^[A-Z]"))
comp <- do.call(rbind, lapply(df[, 4:ncol(df)], function(option) as.data.frame(table(option)))) 
comp$variable <- gsub("[.](.*)","", rownames(comp)) 
rownames(comp) <- NULL
comp <- comp[, c(3,1,2)] 
comp <- comp[order(-comp$Freq), ]

I am unable to attach the file but i have showed the pic below the required output should look like.

The output should be look like . enter image description here

enter image description here

CodePudding user response:

  1. Bring both dataframes in long format with pivot_longer
  2. cbind them to df_result
  3. mutate with an ifelse statement if the columns match
library(dplyr)
library(tidyr)

df1_long <- df1 %>% 
  pivot_longer(
    cols = everything(),
    names_to = "Names_df1",
    values_to = "options_df1"
  ) %>%   
  arrange(Names_df1)

df2_long <- df2 %>% 
  pivot_longer(
    cols = everything(),
    names_to = "Names_df2",
    values_to = "options_df2"
  ) %>%   
  arrange(Names_df2)

df_result <- cbind(df1_long, df2_long) %>% 
  mutate(Names_matching = ifelse(Names_df1==Names_df2, TRUE, FALSE),
         Options_matching = ifelse(options_df1==options_df2, TRUE, FALSE)) %>% 
  arrange(factor(Names_df1, levels = c("ID", "city", "Name", "gender")))

output:

Names_df1    options_df1 Names_df2    options_df2 Names_matching Options_matching
1         ID        DEV2962        ID        DEV2962           TRUE             TRUE
2         ID        KTN2252        ID        KTN2152           TRUE            FALSE
3         ID        ANA2719        ID        ANA2719           TRUE             TRUE
4         ID        ITI2624        ID        ITs2624           TRUE            FALSE
5         ID        DEV2698        ID        DEV2698           TRUE             TRUE
6         ID        HRT2921        ID        HRT2921           TRUE             TRUE
7         ID           <NA>        ID           <NA>           TRUE               NA
8         ID        KTN2624        ID        KTN2624           TRUE             TRUE
9         ID        ANA2548        ID        ANA2548           TRUE             TRUE
10        ID        ITI2535        ID        ITI2535           TRUE             TRUE
11        ID        DEV2732        ID        DEV2732           TRUE             TRUE
12        ID        HRT2837        ID        HRT2837           TRUE             TRUE
13        ID        ERV2951        ID        ERV2951           TRUE             TRUE
14        ID        KTN2542        ID        KTN2542           TRUE             TRUE
15        ID        ANA2813        ID        ANA2813           TRUE             TRUE
16        ID        ITI2210        ID        ITI2210           TRUE             TRUE
17      city            del      city            del           TRUE             TRUE
18      city            mum      city            MUm           TRUE            FALSE
19      city            nav      city            nav           TRUE            FALSE
20      city            pun      city            pun           TRUE             TRUE
21      city           bang      city           bang           TRUE             TRUE
22      city           chen      city           chen           TRUE             TRUE
23      city           triv      city          ddgy            TRUE            FALSE
24      city           vish      city           vish           TRUE             TRUE
25      city            del      city            del           TRUE             TRUE
26      city            mum      city            mum           TRUE             TRUE
27      city           bang      city           bang           TRUE             TRUE
28      city           vish      city           vish           TRUE             TRUE
29      city           bhop      city           bhol           TRUE            FALSE
30      city            kol      city           nhus           TRUE            FALSE
31      city            noi      city           huay           TRUE            FALSE
32      city           gurg      city           gurg           TRUE             TRUE
33      Name      dev,akash      Name            dev           TRUE            FALSE
34      Name    singh,rahul      Name    singh,rahul           TRUE             TRUE
35      Name   abbas,salman      Name   abbas,salman           TRUE             TRUE
36      Name        lal,ram      Name        lal,ram           TRUE             TRUE
37      Name    singh,nkunj      Name    singh,nkunj           TRUE             TRUE
38      Name    garg,prabal      Name    huna,ghalak           TRUE            FALSE
39      Name       ali,sanu      Name     khan,fhalt           TRUE            FALSE
40      Name    singh,kunal      Name    singh,kunal           TRUE             TRUE
41      Name   tomar,lakhan      Name   tomar,lakhan           TRUE             TRUE
42      Name thakur,praveen      Name thakur,praveen           TRUE             TRUE
43      Name     ali,sarman      Name     ali,sarman           TRUE             TRUE
44      Name     khan,zuber      Name     khan,zuber           TRUE             TRUE
45      Name  singh,giriraj      Name  singh,giriraj           TRUE             TRUE
46      Name  sharma,lokesh      Name  sharma,lokesh           TRUE             TRUE
47      Name    pawar,pooja      Name    pawar,pooja           TRUE             TRUE
48      Name  sharma,nikita      Name  sharma,nikita           TRUE             TRUE
49    gender              m    gender              m           TRUE             TRUE
50    gender              f    gender              f           TRUE             TRUE
51    gender              m    gender              m           TRUE             TRUE
52    gender              f    gender              f           TRUE             TRUE
53    gender              m    gender              m           TRUE             TRUE
54    gender              m    gender              m           TRUE             TRUE
55    gender              m    gender              m           TRUE             TRUE
56    gender              m    gender           male           TRUE            FALSE
57    gender              m    gender              m           TRUE             TRUE
58    gender              m    gender           male           TRUE            FALSE
59    gender              m    gender              m           TRUE             TRUE
60    gender              f    gender              f           TRUE             TRUE
61    gender              m    gender              m           TRUE             TRUE
62    gender              f    gender              f           TRUE             TRUE
63    gender              m    gender              m           TRUE             TRUE
64    gender              m    gender              m           TRUE             TRUE

CodePudding user response:

A tidyverse option similar to that of Tarjae.

library(dplyr)
library(tidyr)

one <-  pivot_longer(df1, everything()) %>%
  rename(names_df1 = name, options_df1 = value)

two <-  pivot_longer(df2, everything()) %>%
  rename(names_df2 = name, options_df2 = value)

one %>%
  bind_cols(two) %>%
  mutate(names_matching = names_df1 == names_df2,
         options_matching = options_df1 == options_df2) %>%
  arrange(names_df1, names_df2)

# # A tibble: 64 x 6
# names_df1 options_df1 names_df2 options_df2 names_matching options_matching
#   <chr>     <chr>       <chr>      <chr>      <lgl>          <lgl>           
# 1 city      del         city      "del"       TRUE           TRUE            
# 2 city      mum         city      "MUm"       TRUE           FALSE           
# 3 city      nav         city      " nav"      TRUE           FALSE           
# 4 city      pun         city      "pun"       TRUE           TRUE            
# 5 city      bang        city      "bang"      TRUE           TRUE            
# 6 city      chen        city      "chen"      TRUE           TRUE            
# 7 city      triv        city      " ddgy "    TRUE           FALSE           
# 8 city      vish        city      "vish"      TRUE           TRUE            
# 9 city      del         city      "del"       TRUE           TRUE
  • Related