Home > Back-end >  Fill the data with group data
Fill the data with group data

Time:03-20

I have a data.table as following:-

k <- data.table(name = rep(letters[1:4], each = 5),
            year = rep(2015:2019, times = 4),
            values = c(19, 19, 18, NA, 19, 44, NA, NA, NA, NA, 33, 32, NA, NA, NA, NA, NA, NA, NA, NA))

k

#     name year values
#  1:    a 2015     19
#  2:    a 2016     19
#  3:    a 2017     18
#  4:    a 2018     NA
#  5:    a 2019     19
#  6:    b 2015     44
#  7:    b 2016     NA
#  8:    b 2017     NA
#  9:    b 2018     NA
# 10:    b 2019     NA
# 11:    c 2015     33
# 12:    c 2016     32
# 13:    c 2017     NA
# 14:    c 2018     NA
# 15:    c 2019     NA
# 16:    d 2015     NA
# 17:    d 2016     NA
# 18:    d 2017     NA
# 19:    d 2018     NA
# 20:    d 2019     NA

Each group is defined by the column name and each names has 5 year values.

I want to fill each NA values in the group by the first unique value in the group. Hence, my resulting data.table will be as following:-

#     name year values
#  1:    a 2015     19
#  2:    a 2016     19
#  3:    a 2017     18
#  4:    a 2018     19
#  5:    a 2019     19
#  6:    b 2015     44
#  7:    b 2016     44
#  8:    b 2017     44
#  9:    b 2018     44
# 10:    b 2019     44
# 11:    c 2015     33
# 12:    c 2016     32
# 13:    c 2017     33
# 14:    c 2018     33
# 15:    c 2019     33
# 16:    d 2015     NA
# 17:    d 2016     NA
# 18:    d 2017     NA
# 19:    d 2018     NA
# 20:    d 2019     NA

CodePudding user response:

You may use replace -

library(data.table)
k[, values := replace(values, is.na(values), first(values)), name]
k

#    name year values
# 1:    a 2015     19
# 2:    a 2016     19
# 3:    a 2017     18
# 4:    a 2018     19
# 5:    a 2019     19
# 6:    b 2015     44
# 7:    b 2016     44
# 8:    b 2017     44
# 9:    b 2018     44
#10:    b 2019     44
#11:    c 2015     33
#12:    c 2016     32
#13:    c 2017     33
#14:    c 2018     33
#15:    c 2019     33
#16:    d 2015     NA
#17:    d 2016     NA
#18:    d 2017     NA
#19:    d 2018     NA
#20:    d 2019     NA 

CodePudding user response:

Or with nafill:

k[,values:=nafill(values,fill=first(values,na.rm=T)),by=name][]

      name  year values
    <char> <int>  <num>
 1:      a  2015     19
 2:      a  2016     19
 3:      a  2017     18
 4:      a  2018     18
 5:      a  2019     19
 6:      b  2015     44
 7:      b  2016     44
 8:      b  2017     44

CodePudding user response:

A possible solution, using tidyverse:

library(data.table)
library(tidyverse)

k %>% 
  group_by(name) %>% 
  mutate(values = if_else(is.na(values), first(values), values)) %>% 
  ungroup() %>% as.data.table

#>     name year values
#>  1:    a 2015     19
#>  2:    a 2016     19
#>  3:    a 2017     18
#>  4:    a 2018     19
#>  5:    a 2019     19
#>  6:    b 2015     44
#>  7:    b 2016     44
#>  8:    b 2017     44
#>  9:    b 2018     44
#> 10:    b 2019     44
#> 11:    c 2015     33
#> 12:    c 2016     32
#> 13:    c 2017     33
#> 14:    c 2018     33
#> 15:    c 2019     33
#> 16:    d 2015     NA
#> 17:    d 2016     NA
#> 18:    d 2017     NA
#> 19:    d 2018     NA
#> 20:    d 2019     NA

Or using only data.table:

library(data.table)

k[, values := fifelse(is.na(values), first(values), values), by = name][]

#>     name year values
#>  1:    a 2015     19
#>  2:    a 2016     19
#>  3:    a 2017     18
#>  4:    a 2018     19
#>  5:    a 2019     19
#>  6:    b 2015     44
#>  7:    b 2016     44
#>  8:    b 2017     44
#>  9:    b 2018     44
#> 10:    b 2019     44
#> 11:    c 2015     33
#> 12:    c 2016     32
#> 13:    c 2017     33
#> 14:    c 2018     33
#> 15:    c 2019     33
#> 16:    d 2015     NA
#> 17:    d 2016     NA
#> 18:    d 2017     NA
#> 19:    d 2018     NA
#> 20:    d 2019     NA

CodePudding user response:

k %>% 
  group_by(name) %>% 
  mutate(values = replace_na(values, first(values))) %>% 
  ungroup



  name   year values
   <chr> <int>  <dbl>
 1 a      2015     19
 2 a      2016     19
 3 a      2017     18
 4 a      2018     19
 5 a      2019     19
 6 b      2015     44
 7 b      2016     44
 8 b      2017     44
 9 b      2018     44
10 b      2019     44
11 c      2015     33
12 c      2016     32
13 c      2017     33
14 c      2018     33
15 c      2019     33
16 d      2015     NA
17 d      2016     NA
18 d      2017     NA
19 d      2018     NA
20 d      2019     NA
  • Related