I have a dataframe with multiple NULL values. The class type of the columns are LIST not NUMERIC. Is it possible to replace all the NULL values with the median value of the column? I tried a manual way was to change the NULL value of the column 1 by 1, using the as.numeric function and subsequently apply the median () function. Is there a more efficient way to do this?
i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0
dput()
structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35,
40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0,
0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0,
490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0,
0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005,
1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL,
965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965,
965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690,
645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL,
NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL),
Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140,
1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130,
1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160,
1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
If you wish to keep the entries as length-one lists you can do:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
ifelse(lengths(x) == 1, x, list(median(unlist(x))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
Or, if you want them as numeric columns, do:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
Created on 2022-05-22 by the reprex package (v2.0.1)
CodePudding user response:
replace_na()
from tidyr
can be used to replace NULL
s in a list. (NULL
s are the list-column equivalent of NA
s)
library(tidyverse)
replace_na(df, map(keep(df, is.list), ~ median(unlist(.x))))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <list> <list> <list> <list> <list> <list>
# 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# # … with 21 more rows
If you want these list-columns to be flattened, try
replace_na(df, map(keep(df, is.list), ~ median(unlist(.x)))) %>%
mutate(across(where(is.list), unlist))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0 440 490 1005 690 1390
# 2 2 20 440 490 1005 645 1360
# 3 3 35 440 490 1005 615 1285
# 4 4 40 440 490 1005 540 1270
# 5 5 50 440 0 965 540 1390
# 6 6 0 440 0 965 540 1140
# 7 7 40 440 490 965 540 1140
# 8 8 0 440 490 965 540 1140
# 9 9 0 440 490 965 540 1130
# 10 10 40 440 490 965 540 1135