How to merge columns in to single column using R-CodePudding

how to convert this two column into one

    structure(list(a = c("Q1-2020", "Q1-2020", "Q1-2020", "Q2-2020", 
"Q2-2020", "Q2-2020", "Q3-2020", "Q3-2020", "Q3-2020", "Q4-2021", 
"Q4-2021", "Q4-2021"), b = c("Apr-2020", "May-2020", "Jun-2020", 
"Jul-2020", "Aug-2020", "Sep-2020", "Oct-2020", "Nov-2020", "Dec-2020", 
"Jan-2021", "Feb-2021", "Mar-2021")), row.names = c(1L, 21L, 
41L, 61L, 81L, 101L, 121L, 141L, 161L, 181L, 201L, 221L), class = "data.frame")

expected output

output = c("q1-2020", 
    "Apr-20", "May-20", "Jun-20", "q2-2020", "Jul-20", "Aug-20", 
    "Sep-20", "q3-2020", "Oct-20", "Nov-20", "Dec-20", "q4-2021", 
    "Jan-21", "Feb-21", "Mar-21")

CodePudding user response：

lapply over the unique "a"'s.

unlist(lapply(unique(dat$a), \(x) c(x, dat[dat$a == x, 'b'])))
#  [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
#  [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
# [13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"

CodePudding user response：

We may replace the duplicated elements in 'a' with NA, while extracting the substring from 'b', transpose the data and remove the NAs with na.omit

out2 <- c(na.omit(c(t(transform(df1,
      a = tolower(replace(a, duplicated(a), NA)), 
      b = sub("-\\d{2}", "-", b))))))

-checking with OP's output

> identical(output, out2)
[1] TRUE

CodePudding user response：

We can make use of the regular structure of your data and just insert the unique value of df$a into df$b as in this great answer.

v <- unique(df$a)
c(rbind(v, matrix(df$b, ncol = length(v))))
#>  [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
#>  [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
#> [13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"

or you can use unlist, after splitting your data frame by your repeated id column (I call this a "quasi nest"), then bind each unique id with the other column with lapply.

quasi_nest <- lapply(split(df, df$a), function(x) {
  c(unique(x$a), x$b)
}
)
unlist(quasi_nest, use.names = FALSE)
#>  [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
#>  [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
#> [13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"

CodePudding user response：

We can try base R option

> c(apply(aggregate(. ~ a, df, c), 1, c))
 [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
 [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"

> unlist(Map(c, names(lst <- with(df, split(b, a))), lst), use.names = FALSE)
 [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
 [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"

Or, we can try the following igraph option

> library(igraph)

> do.call(c, Map(function(x) names(V(x)), decompose(graph_from_data_frame(df))))
 [1] "Q1-2020"  "Apr-2020" "May-2020" "Jun-2020" "Q2-2020"  "Jul-2020"
 [7] "Aug-2020" "Sep-2020" "Q3-2020"  "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021"  "Jan-2021" "Feb-2021" "Mar-2021"