Im trying to replace NaN,Inf,-Inf
in my dataframe with NA
but it does not work.
agg<-structure(list(`Product family` = c("PF_10", "PF_101", "PF_102",
"PF_102", "PF_102", "PF_103", "PF_105", "PF_106", "PF_106", "PF_106",
"PF_107", "PF_110", "PF_110", "PF_111", "PF_112", "PF_112", "PF_113"
), `individual serving unit` = c("volume_6", "volume_16", "volume_11",
"volume_16", "volume_3", "volume_20", "volume_12", "volume_1",
"volume_52", "volume_79", "volume_16", "volume_1", "volume_80",
"volume_16", "volume_1", "volume_16", "volume_31"), Recipe = c("Rp_23",
"Rp_193", "Rp_194", "Rp_197", "Rp_197", "Rp_16", "Rp_210", "Rp_211",
"Rp_211", "Rp_211", "Rp_216", "Rp_101", "Rp_101", "Rp_231", "Rp_236",
"Rp_240", "Rp_241"), `number reference markets` = c(1, 2, 1,
1, 2, 1, 1, 1, 0, 2, 1, 2, 1, 1, 3, 4, 1), `Country Price` = c(" NA",
" 6.11", " NA", " NA", " NA", " 8.66", " NA", "13.49",
" 0.04", "23.27", " NA", " NA", "22.47", " NA", " NA",
" NA", " NA"), `Estimated reference price` = c("11.8", " 6.9",
" 6.9", " 6.9", " 4.9", "13.5", " 5.6", "13.3", " NaN", "21.6",
" 9.2", "16.2", "22.7", " 2.6", "13.9", " 4.0", "10.0"), `price difference (IRP Current)` = c(" NA",
" 13.58", " NA", " NA", " NA", " 55.43", " NA", " -1.11",
" NaN", " -7.09", " NA", " NA", " 0.93", " NA", " NA",
" NA", " NA"), `Median price` = c("11.8", " 6.9", " 6.9",
" 6.9", " 4.9", "13.5", " 5.6", "13.3", " NA", "21.6", " 9.2",
"16.2", "22.7", " 2.6", "13.6", " 4.7", "10.0"), `Average price` = c("11.8",
" 6.9", " 6.9", " 6.9", " 4.9", "13.5", " 5.6", "13.3", " NaN",
"21.6", " 9.2", "16.2", "22.7", " 2.6", "13.9", " 4.8", "10.0"
), `Minimun price` = c("11.8", " 6.2", " 6.9", " 6.9", " 3.7",
"13.5", " 5.6", "13.3", " Inf", "20.3", " 9.2", "15.8", "22.7",
" 2.6", "12.0", " 2.5", "10.0"), `Maximum price` = c("11.8",
" 7.7", " 6.9", " 6.9", " 6.1", "13.5", " 5.6", "13.3", "-Inf",
"22.9", " 9.2", "16.7", "22.7", " 2.6", "16.1", " 7.2", "10.0"
), `Percentage lowest of the maximum price` = c("100", " 80",
"100", "100", " 60", "100", "100", "100", "NaN", " 89", "100",
" 94", "100", "100", " 75", " 35", "100")), row.names = c(95L,
28L, 18L, 29L, 71L, 67L, 19L, 1L, 93L, 127L, 30L, 2L, 129L, 31L,
3L, 32L, 72L), class = "data.frame")
agg$`number reference markets`<-as.numeric(agg$`number reference markets`)
> agg$`Country Price`<-as.numeric(agg$`Country Price`)
> agg$`Estimated reference price`<-agg$`Estimated reference price`
> agg$`price difference (IRP Current)`<-as.numeric(agg$`price difference (IRP Current)`)
> agg$`Median price`<-agg$`Median price`
> agg$`Average price`<-agg$`Average price`
> agg$`Minimun price`<-as.numeric(agg$`Minimun price`)
> agg$`Maximum price`<-as.numeric(agg$`Maximum price`)
> agg$`Percentage lowest of the maximum price`<-as.numeric(agg$`Percentage lowest of the maximum price`)
agg <- agg %>% mutate_all(~ifelse(is.nan(.), NA, .))
my_data <- do.call(data.frame, # Replace Inf in data by NA
lapply(agg,
function(x) replace(x, is.infinite(x), NA)))
CodePudding user response:
Data
df <- data.frame(x = letters[1:4], y = c(1, 2, NaN, 4), z = c(5, Inf, 7, 8))
# x y z
# 1 a 1 5
# 2 b 2 Inf
# 3 c NaN 7
# 4 d 4 8
In R, NaN
and Inf
are both numeric (texts "NaN"
and "Inf"
are exceptions). You can use replace()
on all numeric columns.
library(dplyr)
df %>%
mutate(across(where(is.numeric),
~ replace(.x, is.nan(.x) | is.infinite(.x), NA)))
# # A tibble: 4 × 3
# x y z
# <chr> <dbl> <dbl>
# 1 a 1 5
# 2 b 2 NA
# 3 c NA 7
# 4 d 4 8
A general way to take character "NaN"
and "Inf"
into account:
df[df == "NaN" | df == "Inf"] <- NA
This works no matter numeric or character they are.