Home > Mobile >  Inf and NaN values cannot be replaced by NAs in dataframe
Inf and NaN values cannot be replaced by NAs in dataframe

Time:04-28

Im trying to replace NaN,Inf,-Inf in my dataframe with NA but it does not work.

agg<-structure(list(`Product family` = c("PF_10", "PF_101", "PF_102", 
"PF_102", "PF_102", "PF_103", "PF_105", "PF_106", "PF_106", "PF_106", 
"PF_107", "PF_110", "PF_110", "PF_111", "PF_112", "PF_112", "PF_113"
), `individual serving unit` = c("volume_6", "volume_16", "volume_11", 
"volume_16", "volume_3", "volume_20", "volume_12", "volume_1", 
"volume_52", "volume_79", "volume_16", "volume_1", "volume_80", 
"volume_16", "volume_1", "volume_16", "volume_31"), Recipe = c("Rp_23", 
"Rp_193", "Rp_194", "Rp_197", "Rp_197", "Rp_16", "Rp_210", "Rp_211", 
"Rp_211", "Rp_211", "Rp_216", "Rp_101", "Rp_101", "Rp_231", "Rp_236", 
"Rp_240", "Rp_241"), `number reference markets` = c(1, 2, 1, 
1, 2, 1, 1, 1, 0, 2, 1, 2, 1, 1, 3, 4, 1), `Country Price` = c("   NA", 
" 6.11", "   NA", "   NA", "   NA", " 8.66", "   NA", "13.49", 
" 0.04", "23.27", "   NA", "   NA", "22.47", "   NA", "   NA", 
"   NA", "   NA"), `Estimated reference price` = c("11.8", " 6.9", 
" 6.9", " 6.9", " 4.9", "13.5", " 5.6", "13.3", " NaN", "21.6", 
" 9.2", "16.2", "22.7", " 2.6", "13.9", " 4.0", "10.0"), `price difference (IRP Current)` = c("    NA", 
" 13.58", "    NA", "    NA", "    NA", " 55.43", "    NA", " -1.11", 
"   NaN", " -7.09", "    NA", "    NA", "  0.93", "    NA", "    NA", 
"    NA", "    NA"), `Median price` = c("11.8", " 6.9", " 6.9", 
" 6.9", " 4.9", "13.5", " 5.6", "13.3", "  NA", "21.6", " 9.2", 
"16.2", "22.7", " 2.6", "13.6", " 4.7", "10.0"), `Average price` = c("11.8", 
" 6.9", " 6.9", " 6.9", " 4.9", "13.5", " 5.6", "13.3", " NaN", 
"21.6", " 9.2", "16.2", "22.7", " 2.6", "13.9", " 4.8", "10.0"
), `Minimun price` = c("11.8", " 6.2", " 6.9", " 6.9", " 3.7", 
"13.5", " 5.6", "13.3", " Inf", "20.3", " 9.2", "15.8", "22.7", 
" 2.6", "12.0", " 2.5", "10.0"), `Maximum price` = c("11.8", 
" 7.7", " 6.9", " 6.9", " 6.1", "13.5", " 5.6", "13.3", "-Inf", 
"22.9", " 9.2", "16.7", "22.7", " 2.6", "16.1", " 7.2", "10.0"
), `Percentage lowest of the maximum price` = c("100", " 80", 
"100", "100", " 60", "100", "100", "100", "NaN", " 89", "100", 
" 94", "100", "100", " 75", " 35", "100")), row.names = c(95L, 
28L, 18L, 29L, 71L, 67L, 19L, 1L, 93L, 127L, 30L, 2L, 129L, 31L, 
3L, 32L, 72L), class = "data.frame")


agg$`number reference markets`<-as.numeric(agg$`number reference markets`)
> agg$`Country Price`<-as.numeric(agg$`Country Price`)
> agg$`Estimated reference price`<-agg$`Estimated reference price`
> agg$`price difference (IRP Current)`<-as.numeric(agg$`price difference (IRP Current)`)
> agg$`Median price`<-agg$`Median price`
> agg$`Average price`<-agg$`Average price`
> agg$`Minimun price`<-as.numeric(agg$`Minimun price`)
> agg$`Maximum price`<-as.numeric(agg$`Maximum price`)
> agg$`Percentage lowest of the maximum price`<-as.numeric(agg$`Percentage lowest of the maximum price`)


agg <- agg %>% mutate_all(~ifelse(is.nan(.), NA, .))
 
my_data <- do.call(data.frame,                      # Replace Inf in data by NA
                     lapply(agg,
                            function(x) replace(x, is.infinite(x), NA)))

CodePudding user response:

Data
df <- data.frame(x = letters[1:4], y = c(1, 2, NaN, 4), z = c(5, Inf, 7, 8))

#   x   y   z
# 1 a   1   5
# 2 b   2 Inf
# 3 c NaN   7
# 4 d   4   8

In R, NaN and Inf are both numeric (texts "NaN" and "Inf" are exceptions). You can use replace() on all numeric columns.

library(dplyr)

df %>%
  mutate(across(where(is.numeric),
                ~ replace(.x, is.nan(.x) | is.infinite(.x), NA)))

# # A tibble: 4 × 3
#   x         y     z
#   <chr> <dbl> <dbl>
# 1 a         1     5
# 2 b         2    NA
# 3 c        NA     7
# 4 d         4     8

A general way to take character "NaN" and "Inf" into account:

df[df == "NaN" | df == "Inf"] <- NA

This works no matter numeric or character they are.


  •  Tags:  
  • r
  • Related