I have two similar meta data of approx. 1000 records and more than 500 columns . and I want to check the consistency between two data frame. now i want to create a new data frame for which it would show all column names of df1 in first row and same with columns names of df2 in second row and also their option in column 3 and 4 respectively. and then mutate new columns to show TRUE
or FALSE
if column names and their options are matching.
basically i have to check if column names in df1 is excact with df 2 and if options in all columns of df1 is exactly matching with df2
df1 <- data.frame(ID =c("DEV2962","KTN2252","ANA2719","ITI2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
city=c("del","mum","nav","pun","bang","chen","triv","vish","del","mum","bang","vish","bhop","kol","noi","gurg"),
Name= c("dev,akash","singh,rahul","abbas,salman","lal,ram","singh,nkunj","garg,prabal","ali,sanu","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
gender =c("m","f","m","f","m","m","m","m","m","m","m","f","m","f","m","m"))
df2 <- data.frame(ID =c("DEV2962","KTN2152","ANA2719","ITs2624","DEV2698","HRT2921",NA,"KTN2624","ANA2548","ITI2535","DEV2732","HRT2837","ERV2951","KTN2542","ANA2813","ITI2210"),
city=c("del","MUm"," nav","pun","bang","chen"," ddgy ","vish","del","mum","bang","vish","bhol","nhus","huay","gurg"),
Name= c("dev","singh,rahul","abbas,salman","lal,ram","singh,nkunj","huna,ghalak","khan,fhalt","singh,kunal","tomar,lakhan","thakur,praveen","ali,sarman","khan,zuber","singh,giriraj","sharma,lokesh","pawar,pooja","sharma,nikita"),
gender =c("m","f","m","f","m","m","m","male","m","male","m","f","m","f","m","m"))
df <- select(df1,matches("^[A-Z]"))
comp <- do.call(rbind, lapply(df[, 4:ncol(df)], function(option) as.data.frame(table(option))))
comp$variable <- gsub("[.](.*)","", rownames(comp))
rownames(comp) <- NULL
comp <- comp[, c(3,1,2)]
comp <- comp[order(-comp$Freq), ]
I am unable to attach the file but i have showed the pic below the required output should look like.
The output should be look like .
CodePudding user response:
- Bring both dataframes in long format with
pivot_longer
cbind
them todf_result
mutate
with anifelse
statement if the columns match
library(dplyr)
library(tidyr)
df1_long <- df1 %>%
pivot_longer(
cols = everything(),
names_to = "Names_df1",
values_to = "options_df1"
) %>%
arrange(Names_df1)
df2_long <- df2 %>%
pivot_longer(
cols = everything(),
names_to = "Names_df2",
values_to = "options_df2"
) %>%
arrange(Names_df2)
df_result <- cbind(df1_long, df2_long) %>%
mutate(Names_matching = ifelse(Names_df1==Names_df2, TRUE, FALSE),
Options_matching = ifelse(options_df1==options_df2, TRUE, FALSE)) %>%
arrange(factor(Names_df1, levels = c("ID", "city", "Name", "gender")))
output:
Names_df1 options_df1 Names_df2 options_df2 Names_matching Options_matching
1 ID DEV2962 ID DEV2962 TRUE TRUE
2 ID KTN2252 ID KTN2152 TRUE FALSE
3 ID ANA2719 ID ANA2719 TRUE TRUE
4 ID ITI2624 ID ITs2624 TRUE FALSE
5 ID DEV2698 ID DEV2698 TRUE TRUE
6 ID HRT2921 ID HRT2921 TRUE TRUE
7 ID <NA> ID <NA> TRUE NA
8 ID KTN2624 ID KTN2624 TRUE TRUE
9 ID ANA2548 ID ANA2548 TRUE TRUE
10 ID ITI2535 ID ITI2535 TRUE TRUE
11 ID DEV2732 ID DEV2732 TRUE TRUE
12 ID HRT2837 ID HRT2837 TRUE TRUE
13 ID ERV2951 ID ERV2951 TRUE TRUE
14 ID KTN2542 ID KTN2542 TRUE TRUE
15 ID ANA2813 ID ANA2813 TRUE TRUE
16 ID ITI2210 ID ITI2210 TRUE TRUE
17 city del city del TRUE TRUE
18 city mum city MUm TRUE FALSE
19 city nav city nav TRUE FALSE
20 city pun city pun TRUE TRUE
21 city bang city bang TRUE TRUE
22 city chen city chen TRUE TRUE
23 city triv city ddgy TRUE FALSE
24 city vish city vish TRUE TRUE
25 city del city del TRUE TRUE
26 city mum city mum TRUE TRUE
27 city bang city bang TRUE TRUE
28 city vish city vish TRUE TRUE
29 city bhop city bhol TRUE FALSE
30 city kol city nhus TRUE FALSE
31 city noi city huay TRUE FALSE
32 city gurg city gurg TRUE TRUE
33 Name dev,akash Name dev TRUE FALSE
34 Name singh,rahul Name singh,rahul TRUE TRUE
35 Name abbas,salman Name abbas,salman TRUE TRUE
36 Name lal,ram Name lal,ram TRUE TRUE
37 Name singh,nkunj Name singh,nkunj TRUE TRUE
38 Name garg,prabal Name huna,ghalak TRUE FALSE
39 Name ali,sanu Name khan,fhalt TRUE FALSE
40 Name singh,kunal Name singh,kunal TRUE TRUE
41 Name tomar,lakhan Name tomar,lakhan TRUE TRUE
42 Name thakur,praveen Name thakur,praveen TRUE TRUE
43 Name ali,sarman Name ali,sarman TRUE TRUE
44 Name khan,zuber Name khan,zuber TRUE TRUE
45 Name singh,giriraj Name singh,giriraj TRUE TRUE
46 Name sharma,lokesh Name sharma,lokesh TRUE TRUE
47 Name pawar,pooja Name pawar,pooja TRUE TRUE
48 Name sharma,nikita Name sharma,nikita TRUE TRUE
49 gender m gender m TRUE TRUE
50 gender f gender f TRUE TRUE
51 gender m gender m TRUE TRUE
52 gender f gender f TRUE TRUE
53 gender m gender m TRUE TRUE
54 gender m gender m TRUE TRUE
55 gender m gender m TRUE TRUE
56 gender m gender male TRUE FALSE
57 gender m gender m TRUE TRUE
58 gender m gender male TRUE FALSE
59 gender m gender m TRUE TRUE
60 gender f gender f TRUE TRUE
61 gender m gender m TRUE TRUE
62 gender f gender f TRUE TRUE
63 gender m gender m TRUE TRUE
64 gender m gender m TRUE TRUE
CodePudding user response:
A tidyverse option similar to that of Tarjae.
library(dplyr)
library(tidyr)
one <- pivot_longer(df1, everything()) %>%
rename(names_df1 = name, options_df1 = value)
two <- pivot_longer(df2, everything()) %>%
rename(names_df2 = name, options_df2 = value)
one %>%
bind_cols(two) %>%
mutate(names_matching = names_df1 == names_df2,
options_matching = options_df1 == options_df2) %>%
arrange(names_df1, names_df2)
# # A tibble: 64 x 6
# names_df1 options_df1 names_df2 options_df2 names_matching options_matching
# <chr> <chr> <chr> <chr> <lgl> <lgl>
# 1 city del city "del" TRUE TRUE
# 2 city mum city "MUm" TRUE FALSE
# 3 city nav city " nav" TRUE FALSE
# 4 city pun city "pun" TRUE TRUE
# 5 city bang city "bang" TRUE TRUE
# 6 city chen city "chen" TRUE TRUE
# 7 city triv city " ddgy " TRUE FALSE
# 8 city vish city "vish" TRUE TRUE
# 9 city del city "del" TRUE TRUE