how to convert this two column into one
structure(list(a = c("Q1-2020", "Q1-2020", "Q1-2020", "Q2-2020",
"Q2-2020", "Q2-2020", "Q3-2020", "Q3-2020", "Q3-2020", "Q4-2021",
"Q4-2021", "Q4-2021"), b = c("Apr-2020", "May-2020", "Jun-2020",
"Jul-2020", "Aug-2020", "Sep-2020", "Oct-2020", "Nov-2020", "Dec-2020",
"Jan-2021", "Feb-2021", "Mar-2021")), row.names = c(1L, 21L,
41L, 61L, 81L, 101L, 121L, 141L, 161L, 181L, 201L, 221L), class = "data.frame")
expected output
output = c("q1-2020",
"Apr-20", "May-20", "Jun-20", "q2-2020", "Jul-20", "Aug-20",
"Sep-20", "q3-2020", "Oct-20", "Nov-20", "Dec-20", "q4-2021",
"Jan-21", "Feb-21", "Mar-21")
CodePudding user response:
lapply
over the unique
"a"
's.
unlist(lapply(unique(dat$a), \(x) c(x, dat[dat$a == x, 'b'])))
# [1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
# [7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
# [13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"
CodePudding user response:
We may replace
the duplicated
elements in 'a' with NA
, while extracting the substring from 'b', t
ranspose the data and remove the NA
s with na.omit
out2 <- c(na.omit(c(t(transform(df1,
a = tolower(replace(a, duplicated(a), NA)),
b = sub("-\\d{2}", "-", b))))))
-checking with OP's output
> identical(output, out2)
[1] TRUE
CodePudding user response:
We can make use of the regular structure of your data and just insert the unique value of df$a into df$b as in this great answer.
v <- unique(df$a)
c(rbind(v, matrix(df$b, ncol = length(v))))
#> [1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
#> [7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
#> [13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"
or you can use unlist, after splitting your data frame by your repeated id column (I call this a "quasi nest"), then bind each unique id with the other column with lapply.
quasi_nest <- lapply(split(df, df$a), function(x) {
c(unique(x$a), x$b)
}
)
unlist(quasi_nest, use.names = FALSE)
#> [1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
#> [7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
#> [13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"
CodePudding user response:
We can try base R option
> c(apply(aggregate(. ~ a, df, c), 1, c))
[1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
[7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"
or
> unlist(Map(c, names(lst <- with(df, split(b, a))), lst), use.names = FALSE)
[1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
[7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"
Or, we can try the following igraph
option
> library(igraph)
> do.call(c, Map(function(x) names(V(x)), decompose(graph_from_data_frame(df))))
[1] "Q1-2020" "Apr-2020" "May-2020" "Jun-2020" "Q2-2020" "Jul-2020"
[7] "Aug-2020" "Sep-2020" "Q3-2020" "Oct-2020" "Nov-2020" "Dec-2020"
[13] "Q4-2021" "Jan-2021" "Feb-2021" "Mar-2021"