Home > front end >  Collapse two dataframes and make an array strcuture
Collapse two dataframes and make an array strcuture

Time:10-23

data_1 <- data.frame(V1 = c("123","345","546","890"), V2 = c("J10","K12","R34","J17"),V3=c("N12","M34","W57","Q90"))

data_1 | V1 | V2 | V3 | |:---- |:------:| -----:| | 123 | J10 | N12 | | 345 | K12 | M34 | | 546 | N12 | R34 | | 890 | J17 | J10 |

data_2 <-  data.frame(V1 = c("123","345","546","890"), V2 = c("01/02/90","10/04/21","09/03/95","29/03/90"),V3=c("28/07/86","16/02/87","17/10/56","14/01/60"))

data_2 | V1 | V2 | V3 | |:---- |:------:| -----:| | 123 | 01/02/90 | 28/07/86 | | 345 | 10/04/21 | 16/02/87 | | 546 | 09/03/95 | 17/10/56 | | 890 | 29/03/90 | 14/01/60 |

I would like to have a common first column and collapse the data into a array structure

Result: | V1 | J10 | N12 | K12 | M34 | R34 | J17 | |:---- |:----:| :----:| :----: | :----: | :----: | ----:| | 123 | 01/02/90 | 28/07/86 || | | | | 345 | | |10/04/21|16/02/87 | | | | 546 | | 09/03/95 || |17/10/56 | | | 890 |14/01/60 | || | | 29/03/90 |

CodePudding user response:

We may reshape to 'long' format, bind the datasets and then reshape back to 'wide'

library(dplyr)
library(tidyr)
bind_cols(data_1 %>% 
       pivot_longer(cols = -V1), 
          data_2 %>% 
              pivot_longer(cols = -V1) %>% 
           select(-V1)) %>%
      select(-starts_with('name')) %>% 
   pivot_wider(names_from = value...3, values_from = value...5)

-output

# A tibble: 4 × 9
  V1    J10      N12      K12      M34      R34      W57      J17      Q90     
  <chr> <chr>    <chr>    <chr>    <chr>    <chr>    <chr>    <chr>    <chr>   
1 123   01/02/90 28/07/86 <NA>     <NA>     <NA>     <NA>     <NA>     <NA>    
2 345   <NA>     <NA>     10/04/21 16/02/87 <NA>     <NA>     <NA>     <NA>    
3 546   <NA>     <NA>     <NA>     <NA>     09/03/95 17/10/56 <NA>     <NA>    
4 890   <NA>     <NA>     <NA>     <NA>     <NA>     <NA>     29/03/90 14/01/60

CodePudding user response:

data_1 <- data.frame(V1 = c("123","345","546","890"), V2 = c("J10","K12","R34","J17"),V3=c("N12","M34","W57","Q90"))
data_2 <- data.frame(V1 = c("123","345","546","890"), V2 = c("01/02/90","10/04/21","09/03/95","29/03/90"),V3=c("28/07/86","16/02/87","17/10/56","14/01/60"))

var_1 <- data.frame( V1= data_1$V1, VAR = data_1$V2, stringsAsFactors = F)
var_2 <- data.frame( V1= data_1$V1, VAR = data_1$V3, stringsAsFactors = F)
var <- bind_rows(var_1,var_2)

date_1 <- data.frame( V1= data_2$V1, DATE = data_2$V2, stringsAsFactors = F)
date_2 <- data.frame( V1= data_2$V1, DATE = data_2$V2, stringsAsFactors = F)
date <- bind_rows(date_1,date_2)

result <- left_join(var, date) %>% mutate_all(as.character) %>% distinct()

result <- result %>% pivot_wider(names_from = VAR, values_from = DATE)
result

V1    J10      K12      R34      J17      N12      M34      W57      Q90     
  <chr> <chr>    <chr>    <chr>    <chr>    <chr>    <chr>    <chr>    <chr>   
1 123   01/02/90 NA       NA       NA       01/02/90 NA       NA       NA      
2 345   NA       10/04/21 NA       NA       NA       10/04/21 NA       NA      
3 546   NA       NA       09/03/95 NA       NA       NA       09/03/95 NA      
4 890   NA       NA       NA       29/03/90 NA       NA       NA       29/03/90
  • Related