Home > front end >  How fill NA by condition?
How fill NA by condition?

Time:07-08

I have sales data by years and models. Here sales of J model in each year is missing. Now I want the following condition:

Fill NA of J model with a maximum value of sales in each year 100. For instance, max sale in 2015 was 984, so J has to be 984 100 in 2015

df <- data.frame (model  = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
                  Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020),
                  sales = c(450,678,456,344,984,456,234,244,655,"NA",234,567,234,567,232,900,1005,1900,450,"NA",567,235,456,345,144,333,555,777,111,"NA",222,223,445,776,331,788,980,1003,456,"NA",345,2222,3456,456,678,8911,4560,4567,4566,"NA",6666,7777,8888,1233,1255,5677,3411,2344,6122,"NA"))

CodePudding user response:

You may try(NA is "NA" so it needed to be as numeric)

library(dplyr)
df %>%
  group_by(Year) %>%
  mutate(sales = as.numeric(sales)) %>%
  mutate(sales = ifelse(is.na(sales) & (model == "J"), max(sales, na.rm = T)   100, sales))

  model  Year sales
   <chr> <dbl> <dbl>
 1 A      2015   450
 2 B      2015   678
 3 C      2015   456
 4 D      2015   344
 5 E      2015   984
 6 F      2015   456
 7 G      2015   234
 8 H      2015   244
 9 I      2015   655
10 J      2015  1084
# … with 50 more rows

CodePudding user response:

base R option:

df <- data.frame (model  = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
                  Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020),
                  sales = c(450,678,456,344,984,456,234,244,655,"NA",234,567,234,567,232,900,1005,1900,450,"NA",567,235,456,345,144,333,555,777,111,"NA",222,223,445,776,331,788,980,1003,456,"NA",345,2222,3456,456,678,8911,4560,4567,4566,"NA",6666,7777,8888,1233,1255,5677,3411,2344,6122,"NA"))


df$sales <- as.numeric(df$sales)
#> Warning: NAs introduced by coercion
df$sales <- with(df, ave(sales, Year, FUN = function(x) ifelse(is.na(x) & model == "J", max(x, na.rm = TRUE)   100, sales)))
df
#>    model Year sales
#> 1      A 2015   450
#> 2      B 2015   678
#> 3      C 2015   456
#> 4      D 2015   344
#> 5      E 2015   984
#> 6      F 2015   456
#> 7      G 2015   234
#> 8      H 2015   244
#> 9      I 2015   655
#> 10     J 2015  1084
#> 11     A 2016   450
#> 12     B 2016   678
#> 13     C 2016   456
#> 14     D 2016   344
#> 15     E 2016   984
#> 16     F 2016   456
#> 17     G 2016   234
#> 18     H 2016   244
#> 19     I 2016   655
#> 20     J 2016  2000
#> 21     A 2017   450
#> 22     B 2017   678
#> 23     C 2017   456
#> 24     D 2017   344
#> 25     E 2017   984
#> 26     F 2017   456
#> 27     G 2017   234
#> 28     H 2017   244
#> 29     I 2017   655
#> 30     J 2017   877
#> 31     A 2018   450
#> 32     B 2018   678
#> 33     C 2018   456
#> 34     D 2018   344
#> 35     E 2018   984
#> 36     F 2018   456
#> 37     G 2018   234
#> 38     H 2018   244
#> 39     I 2018   655
#> 40     J 2018  1103
#> 41     A 2019   450
#> 42     B 2019   678
#> 43     C 2019   456
#> 44     D 2019   344
#> 45     E 2019   984
#> 46     F 2019   456
#> 47     G 2019   234
#> 48     H 2019   244
#> 49     I 2019   655
#> 50     J 2019  9011
#> 51     A 2020   450
#> 52     B 2020   678
#> 53     C 2020   456
#> 54     D 2020   344
#> 55     E 2020   984
#> 56     F 2020   456
#> 57     G 2020   234
#> 58     H 2020   244
#> 59     I 2020   655
#> 60     J 2020  8988

Created on 2022-07-08 by the reprex package (v2.0.1)

  • Related