Here's my dataframe:
df1 = structure(list(item = c("HY04SB", "HY04SB", "GSP8Y1", "GSP8Y1",
"9PLM0A", "9PLM0A"), weird = c("w'`tc7>", NA, NA, "($^-yk1%",
"jd(,*'7_", NA), simple = c(NA, 14661746, 58489634, NA, NA, NA
), code = c("WX&}Awx:65Dgn9A3", "WX&}Awx:65Dgn9A3", "$8@g44;T7w'{8e^=",
NA, NA, "(X<f..\\Js9da]CE")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -6L))
# A tibble: 6 × 4
item weird simple code
<chr> <chr> <dbl> <chr>
1 HY04SB w'`tc7> NA "WX&}Awx:65Dgn9A3"
2 HY04SB NA 14661746 "WX&}Awx:65Dgn9A3"
3 GSP8Y1 NA 58489634 "$8@g44;T7w'{8e^="
4 GSP8Y1 ($^-yk1% NA NA
5 9PLM0A jd(,*'7_ NA NA
6 9PLM0A NA NA "(X<f..\\Js9da]CE"
This is what I would like to achieve:
structure(list(item = c("HY04SB", "GSP8Y1", "9PLM0A"), weird = c("w'`tc7>",
"($^-yk1%", "jd(,*'7_"), simple = c(14661746, 58489634, NA),
code = c("WX&}Awx:65Dgn9A3", "$8@g44;T7w'{8e^=", "(X<f..\\Js9da]CE"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-3L))
# A tibble: 3 × 4
item weird simple code
<chr> <chr> <dbl> <chr>
1 HY04SB w'`tc7> 14661746 "WX&}Awx:65Dgn9A3"
2 GSP8Y1 ($^-yk1% 58489634 "$8@g44;T7w'{8e^="
3 9PLM0A jd(,*'7_ NA "(X<f..\\Js9da]CE"
I tried the solution from Filling missing values from other rows in group (including duplicates) but without any success and don't think it's what I want because it's not in the reduced dataframe with incomplete variables removed, and not sure how to retain one of the row with NA as it's more complete than previous duplicates...
new_df <- df1 %>%
group_by(item) %>%
mutate(valnew = str_c(weird, simple, code, sep=":")) %>%
select(-weird, -simple, -code) %>%
complete(ID, valnew = unique(valnew[!is.na(valnew)])) %>%
filter(!is.na(valnew)) %>%
separate(valnew, into = c('weird', 'simple', 'code'), sep=":") %>%
mutate(rn = row_number()) %>%
left_join(df1 %>%
select(ID)) %>%
filter(!duplicated(rn)) %>%
select(names(df1))
Can someone assist this, how to produce the result in 3 x 4 as shown above?
CodePudding user response:
You can use fil1
after grouping to give rows that share the same item the same information and then slice
to select n amount of observations per group
library(dplyr)
library(tidyr)
df1 |>
group_by(item) |>
fill(-item, .direction = "updown") |>
slice(1) |>
ungroup()
item weird simple code
<chr> <chr> <dbl> <chr>
1 9PLM0A jd(,*'7_ NA "(X<f..\\Js9da]CE"
2 GSP8Y1 ($^-yk1% 58489634 "$8@g44;T7w'{8e^="
3 HY04SB w'`tc7> 14661746 "WX&}Awx:65Dgn9A3"
CodePudding user response:
A solution with sapply
df1 = as_tibble(t(
sapply(unique(df1$item), function(X) # "loop" over items
sapply(colnames(df1), function(COL) # get unique value of each column else give NA
ifelse(length((REF = unique(df1[df1$item == X, COL][!is.na(df1[df1$item == X, COL])]))), REF, NA)))
))
df1<-readr::type_convert(df1) #auto change column type.