Home > Net >  Concatenate every two columns in dataframe
Concatenate every two columns in dataframe

Time:07-28

I have the following data frame:

X1   X2   X3   X4   X5   X6   X7
p1   H    I    K    J    K    H
p2   H    K    J    K    I    J
p3   J    K    H    I    J    K
p4   K    I    H    J    I    J

I want to create a new data frame with the column X1 and concatenate every two columns starting from X2 so the final table looks like:

X1   X2    X3    X4   
p1   HI    KJ    KH
p2   HK    JK    IJ
p3   JK    HI    JK
p4   KI    HJ    IJ

CodePudding user response:

Using mapply:

cbind(df[ 1 ],
      mapply(paste0, df[, seq(2, 7, 2)], df[, seq(3, 7, 2)]))
#   X1 X2 X4 X6
# 1 p1 HI KJ KH
# 2 p2 HK JK IJ
# 3 p3 JK HI JK
# 4 p4 KI HJ IJ

CodePudding user response:

Base R way

df=structure(list(X1 = c("p1", "p2", "p3", "p4"), X2 = c("H", "H", 
"J", "K"), X3 = c("I", "K", "K", "I"), X4 = c("K", "J", "H", 
"H"), X5 = c("J", "K", "I", "J"), X6 = c("K", "I", "J", "I"), 
    X7 = c("H", "J", "K", "J")), class = "data.frame", row.names = c(NA, 
-4L))

df1=data.frame(t(df))
df1$G=c(0,rep(1:((nrow(df1)-1)/2),each=2))

data.frame(
  t(
    aggregate(
      .~G,
      data=df1,
      paste0,
      collapse=""
    )[,-1]
  )
)

resulting in

   X1 X2 X3 X4
X1 p1 HI KJ KH
X2 p2 HK JK IJ
X3 p3 JK HI JK
X4 p4 KI HJ IJ

CodePudding user response:

While tidyr's unite is good for small datasets:

library(tidyr)

df |>
  unite("X2", X2:X3, sep = "") |>
  unite("X4", X4:X5, sep = "") |>
  unite("X6", X6:X7, sep = "")

.. we might want to explore another way for general approach. One such is to pivot to a longer format, change all odd numbered columns to the preceeding even number (using the modulo operator) and then pivot longer collapsing the strings with paste0.

library(tidyr)
library(dplyr)

df |>
    pivot_longer(-X1,
                 names_prefix = "X",
                 names_transform = as.numeric) |>
    mutate(name = if_else(name %% 2 == 1, name - 1, name)) |>
    pivot_wider(names_from = name,
                names_prefix = "X",
                values_fn = ~ paste0(., collapse = ""))

Output:

# A tibble: 4 × 4
  X1    X2    X4    X6   
  <chr> <chr> <chr> <chr>
1 p1    HI    KJ    KH   
2 p2    HK    JK    IJ   
3 p3    JK    HI    JK   
4 p4    KI    HJ    IJ   

Data:

library(readr)

df <- read_table("X1   X2   X3   X4   X5   X6   X7
p1   H    I    K    J    K    H
p2   H    K    J    K    I    J
p3   J    K    H    I    J    K
p4   K    I    H    J    I    J")

Update:

If we want to start from X3 instead you'll need to change the code in two places. First, by not pivoting two columns (-c(X1, X2)) and then by subtracting 1 from the even columns instead (name %% 2 == 0). E.g.

library(tidyr)
library(dplyr)

df |>
    pivot_longer(-c(X1, X2),
                 names_prefix = "X",
                 names_transform = as.numeric) |>
    mutate(name = if_else(name %% 2 == 0, name - 1, name)) |>
    pivot_wider(names_from = name,
                names_prefix = "X",
                values_fn = ~ paste0(., collapse = ""))

Output:

# A tibble: 4 × 5
  X1    X2    X3    X5    X7   
  <chr> <chr> <chr> <chr> <chr>
1 p1    H     IK    JK    H    
2 p2    H     KJ    KI    J    
3 p3    J     KH    IJ    K    
4 p4    K     IH    JI    J    

(There is of course no X8 to combine with here.)

CodePudding user response:

A base solution:

df2 <- df[-1]
cbind(df[1],
  lapply(
    split(as.list(df2), paste0('V', ceiling(1:ncol(df2) / 2))),
    do.call, what = paste0
  )
)

#   X1 V1 V2 V3
# 1 p1 HI KJ KH
# 2 p2 HK JK IJ
# 3 p3 JK HI JK
# 4 p4 KI HJ IJ

CodePudding user response:

An option with dplyr:

df %>%
 transmute(X1,
           across(c(seq(2, length(.), 2)), 
                  ~ paste0(., get(names(cur_data())[match(cur_column(), names(cur_data()))   1])))) %>%
 rename_with(~ paste0("X", seq_along(.)), everything())

  X1 X2 X3 X4
1 p1 HI KJ KH
2 p2 HK JK IJ
3 p3 JK HI JK
4 p4 KI HJ IJ

CodePudding user response:

A general function in Base R:

df <- data.frame(
  X1 = c("p1", "p2", "p3", "p4"),
  X2 = c("H", "H", "J", "K"),
  X3 = c("I", "K", "K", "I"),
  X4 = c("K", "J", "H", "H"),
  X5 = c("J", "K", "I", "J"),
  X6 = c("K", "I", "J", "I"), 
  X7 = c("H", "J", "K", "J")
)

catcols <- function(df, start = 1, by = 2) {
  start1 <- start - 1
  by1 <- by - 1
  n <- ncol(df)
  setNames(
    cbind(
      cbind(
        df[, seq_len(start1)],
        mapply(
          function(i) do.call(paste0, df[,i:(i   by1)]),
          seq(start, n - by1, by)
        )
      ),
      df[, c(0, (n:1)[(n - start1) %% by])]
    ),
    names(df)[1:(ceiling((n - start1)/by)   start1)]
  )
}

catcols(df, 2)
#>   X1 X2 X3 X4
#> 1 p1 HI KJ KH
#> 2 p2 HK JK IJ
#> 3 p3 JK HI JK
#> 4 p4 KI HJ IJ
catcols(df, 3)
#>   X1 X2 X3 X4 X5
#> 1 p1  H IK JK  H
#> 2 p2  H KJ KI  J
#> 3 p3  J KH IJ  K
#> 4 p4  K IH JI  J
catcols(df)
#>    X1 X2 X3 X4
#> 1 p1H IK JK  H
#> 2 p2H KJ KI  J
#> 3 p3J KH IJ  K
#> 4 p4K IH JI  J
catcols(df, 2, 3)
#>   X1  X2  X3
#> 1 p1 HIK JKH
#> 2 p2 HKJ KIJ
#> 3 p3 JKH IJK
#> 4 p4 KIH JIJ
  •  Tags:  
  • r
  • Related