I have sales data by years and models. Here sales of J model in each year is missing. Now I want the following condition:
Fill NA of J model with a maximum value of sales in each year 100. For instance, max sale in 2015 was 984, so J has to be 984 100 in 2015
df <- data.frame (model = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020),
sales = c(450,678,456,344,984,456,234,244,655,"NA",234,567,234,567,232,900,1005,1900,450,"NA",567,235,456,345,144,333,555,777,111,"NA",222,223,445,776,331,788,980,1003,456,"NA",345,2222,3456,456,678,8911,4560,4567,4566,"NA",6666,7777,8888,1233,1255,5677,3411,2344,6122,"NA"))
CodePudding user response:
You may try(NA
is "NA"
so it needed to be as numeric
)
library(dplyr)
df %>%
group_by(Year) %>%
mutate(sales = as.numeric(sales)) %>%
mutate(sales = ifelse(is.na(sales) & (model == "J"), max(sales, na.rm = T) 100, sales))
model Year sales
<chr> <dbl> <dbl>
1 A 2015 450
2 B 2015 678
3 C 2015 456
4 D 2015 344
5 E 2015 984
6 F 2015 456
7 G 2015 234
8 H 2015 244
9 I 2015 655
10 J 2015 1084
# … with 50 more rows
CodePudding user response:
base R
option:
df <- data.frame (model = c("A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J","A","B","C","D","E","F","G","H","I","J"),
Year = c(2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020,2020,2020,2020,2020,2020,2020,2020),
sales = c(450,678,456,344,984,456,234,244,655,"NA",234,567,234,567,232,900,1005,1900,450,"NA",567,235,456,345,144,333,555,777,111,"NA",222,223,445,776,331,788,980,1003,456,"NA",345,2222,3456,456,678,8911,4560,4567,4566,"NA",6666,7777,8888,1233,1255,5677,3411,2344,6122,"NA"))
df$sales <- as.numeric(df$sales)
#> Warning: NAs introduced by coercion
df$sales <- with(df, ave(sales, Year, FUN = function(x) ifelse(is.na(x) & model == "J", max(x, na.rm = TRUE) 100, sales)))
df
#> model Year sales
#> 1 A 2015 450
#> 2 B 2015 678
#> 3 C 2015 456
#> 4 D 2015 344
#> 5 E 2015 984
#> 6 F 2015 456
#> 7 G 2015 234
#> 8 H 2015 244
#> 9 I 2015 655
#> 10 J 2015 1084
#> 11 A 2016 450
#> 12 B 2016 678
#> 13 C 2016 456
#> 14 D 2016 344
#> 15 E 2016 984
#> 16 F 2016 456
#> 17 G 2016 234
#> 18 H 2016 244
#> 19 I 2016 655
#> 20 J 2016 2000
#> 21 A 2017 450
#> 22 B 2017 678
#> 23 C 2017 456
#> 24 D 2017 344
#> 25 E 2017 984
#> 26 F 2017 456
#> 27 G 2017 234
#> 28 H 2017 244
#> 29 I 2017 655
#> 30 J 2017 877
#> 31 A 2018 450
#> 32 B 2018 678
#> 33 C 2018 456
#> 34 D 2018 344
#> 35 E 2018 984
#> 36 F 2018 456
#> 37 G 2018 234
#> 38 H 2018 244
#> 39 I 2018 655
#> 40 J 2018 1103
#> 41 A 2019 450
#> 42 B 2019 678
#> 43 C 2019 456
#> 44 D 2019 344
#> 45 E 2019 984
#> 46 F 2019 456
#> 47 G 2019 234
#> 48 H 2019 244
#> 49 I 2019 655
#> 50 J 2019 9011
#> 51 A 2020 450
#> 52 B 2020 678
#> 53 C 2020 456
#> 54 D 2020 344
#> 55 E 2020 984
#> 56 F 2020 456
#> 57 G 2020 234
#> 58 H 2020 244
#> 59 I 2020 655
#> 60 J 2020 8988
Created on 2022-07-08 by the reprex package (v2.0.1)