Home > front end >  Trying to summarize all numeric variables based on character variable gives error
Trying to summarize all numeric variables based on character variable gives error

Time:03-15

This is my dataframe:

dp<-structure(list(`Element Name` = c("Nitric acid (concentrated)", 
"Sulphuric acid(concentrated)", "2-hydroxybenzoic acid", "Acetic anhydride", 
"2-Naphthol", "Sodium Hydroxide", "Phenyl hydrazine hydrochloride", 
"Glucose", "Sodium acetate", "Aniline", "Zinc poweder", "2-amino-benzoic acid", 
"1.3-dihydroxybenzene", "Ethyl acetate", "hydroxy benzene", "phenyl methanol", 
"Sodium carbonate", "Potassium permanganate", "Sodium bisulfite.", 
"Hydrochloric acid (concentrated)", "Sodium nitrite", "Copper(II) sulfate", 
"Methyl orange", "EtOH", "Distilled water", "cuper ion", "ammonium hydroxide", 
"ammonium hydroxide", "Iron( III)", "Potassium Thiocyanate", 
"ferric ammonium sulfate", "Ammonium Sulfate", "sodium hypochlorite", 
"Acetic acid", "Phenolphthalein", "Sodium carbonate", "Sodum hydroxide", 
"Acetic acid", "Phenolphthalein", "Methyl orange", "Phosphoric acid", 
"Sodium carbonate", "Iron(II) sulfate", "Potassium permanganate", 
"Sulfuric Acid", "Barium Chloride.monoHydrate", "Distilled water", 
"nickel Sulphate", "Dimethyl glyoxime (DMG)", "Calsium chloride"
), DemandCourse = c(375, 1050, 300, 1614, 225, 75, 414, 414, 
225, 450, 111, 675, 105, 120, 375, 75, 75, 375, 150, 750, 264,975, 20, 250, 30, 25, 2500, 2500, 15, 730, 25, 170, 75, 255, 
10, 160, 144, 54, 15, 18, 132, 48, 138, 36, 300, 2250, 45, 1500, 
90, 999), AmountsAv = c(1000, 3000, 4000, 1000, 750, 750, 2000, 
5000, 150, 24000, 450, 3000, 1400, 400, 400, 250, 250, 1000, 
1000, 7500, 6400, 900, 250, 1500, 20000, 50, 300, 4000, 200, 
3000, 500, 1200, 1000, 6000, 900, 250, 200, 6000, 900, 250, 200, 
250, 150, 1000, 15000, 3000, 20000, 1500, 600, 7500)), row.names = c(NA, 
-50L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`1` = 1L, 
`24` = 24L, `32` = 32L, `36` = 36L, `38` = 38L, `42` = 42L, `45` = 45L, 
`49` = 49L, `66` = 66L, `111` = 111L, `140` = 140L, `151` = 151L, 
`154` = 154L, `164` = 164L, `169` = 169L, `171` = 171L, `175` = 175L, 
`185` = 185L, `193` = 193L, `227` = 227L, `252` = 252L, `253` = 253L, 
`256` = 256L, `257` = 257L, `258` = 258L, `262` = 262L, `263` = 263L, 
`265` = 265L, `275` = 275L, `276` = 276L, `277` = 277L, `279` = 279L, 
`280` = 280L, `281` = 281L, `282` = 282L, `283` = 283L, `284` = 284L, 
`285` = 285L, `286` = 286L, `288` = 288L, `289` = 289L, `290` = 290L, 
`291` = 291L, `292` = 292L, `293` = 293L, `298` = 298L, `299` = 299L, 
`300` = 300L, `302` = 302L, `303` = 303L, `304` = 304L, `308` = 308L, 
`309` = 309L, `310` = 310L, `311` = 311L, `312` = 312L, `314` = 314L, 
`315` = 315L, `316` = 316L, `317` = 317L, `318` = 318L, `319` = 319L, `323` = 323L, `325` = 325L), class = "omit"))
 

and Im trying to:

dp <- dp %>% 
  group_by(c(`Element Name`)) %>% 
  summarise(across(everything(), sum, na.rm = TRUE))

but I get:

Error in `summarise()`:
! Problem while computing `..1 = across(everything(), sum, na.rm = TRUE)`.
i The error occurred in group 1: c(`Element Name`) = "1.3-dihydroxybenzene".
Caused by error in `across()`:
! Problem while computing column `Element Name`.
Caused by error in `fn()`:
! invalid 'type' (character) of argument
Run `rlang::last_error()` to see where the error occurred.

CodePudding user response:

Just remove the concatenation (c) in group_by

library(dplyr)
dp %>%
   group_by(`Element Name`) %>%
   summarise(across(everything(), sum, na.rm = TRUE))

-output

# A tibble: 42 × 3
   `Element Name`              DemandCourse AmountsAv
   <chr>                              <dbl>     <dbl>
 1 1.3-dihydroxybenzene                 105      1400
 2 2-amino-benzoic acid                 675      3000
 3 2-hydroxybenzoic acid                300      4000
 4 2-Naphthol                           225       750
 5 Acetic acid                          309     12000
 6 Acetic anhydride                    1614      1000
 7 ammonium hydroxide                  5000      4300
 8 Ammonium Sulfate                     170      1200
 9 Aniline                              450     24000
10 Barium Chloride.monoHydrate         2250      3000
# … with 32 more rows

CodePudding user response:

So across seems to be trying to summarize also your first column, that is a character, try this:

dp<-structure(list(`Element Name` = c("Nitric acid (concentrated)", 
                                      "Sulphuric acid(concentrated)", "2-hydroxybenzoic acid", "Acetic anhydride", 
                                      "2-Naphthol", "Sodium Hydroxide", "Phenyl hydrazine hydrochloride", 
                                      "Glucose", "Sodium acetate", "Aniline", "Zinc poweder", "2-amino-benzoic acid", 
                                      "1.3-dihydroxybenzene", "Ethyl acetate", "hydroxy benzene", "phenyl methanol", 
                                      "Sodium carbonate", "Potassium permanganate", "Sodium bisulfite.", 
                                      "Hydrochloric acid (concentrated)", "Sodium nitrite", "Copper(II) sulfate", 
                                      "Methyl orange", "EtOH", "Distilled water", "cuper ion", "ammonium hydroxide", 
                                      "ammonium hydroxide", "Iron( III)", "Potassium Thiocyanate", 
                                      "ferric ammonium sulfate", "Ammonium Sulfate", "sodium hypochlorite", 
                                      "Acetic acid", "Phenolphthalein", "Sodium carbonate", "Sodum hydroxide", 
                                      "Acetic acid", "Phenolphthalein", "Methyl orange", "Phosphoric acid", 
                                      "Sodium carbonate", "Iron(II) sulfate", "Potassium permanganate", 
                                      "Sulfuric Acid", "Barium Chloride.monoHydrate", "Distilled water", 
                                      "nickel Sulphate", "Dimethyl glyoxime (DMG)", "Calsium chloride"
), DemandCourse = c(375, 1050, 300, 1614, 225, 75, 414, 414, 
                    225, 450, 111, 675, 105, 120, 375, 75, 75, 375, 150, 750, 264,975, 20, 250, 30, 25, 2500, 2500, 15, 730, 25, 170, 75, 255, 
                    10, 160, 144, 54, 15, 18, 132, 48, 138, 36, 300, 2250, 45, 1500, 
                    90, 999), AmountsAv = c(1000, 3000, 4000, 1000, 750, 750, 2000, 
                                            5000, 150, 24000, 450, 3000, 1400, 400, 400, 250, 250, 1000, 
                                            1000, 7500, 6400, 900, 250, 1500, 20000, 50, 300, 4000, 200, 
                                            3000, 500, 1200, 1000, 6000, 900, 250, 200, 6000, 900, 250, 200, 
                                            250, 150, 1000, 15000, 3000, 20000, 1500, 600, 7500)), row.names = c(NA, 
                                                                                                                 -50L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`1` = 1L, 
                                                                                                                                                                                          `24` = 24L, `32` = 32L, `36` = 36L, `38` = 38L, `42` = 42L, `45` = 45L, 
                                                                                                                                                                                          `49` = 49L, `66` = 66L, `111` = 111L, `140` = 140L, `151` = 151L, 
                                                                                                                                                                                          `154` = 154L, `164` = 164L, `169` = 169L, `171` = 171L, `175` = 175L, 
                                                                                                                                                                                          `185` = 185L, `193` = 193L, `227` = 227L, `252` = 252L, `253` = 253L, 
                                                                                                                                                                                          `256` = 256L, `257` = 257L, `258` = 258L, `262` = 262L, `263` = 263L, 
                                                                                                                                                                                          `265` = 265L, `275` = 275L, `276` = 276L, `277` = 277L, `279` = 279L, 
                                                                                                                                                                                          `280` = 280L, `281` = 281L, `282` = 282L, `283` = 283L, `284` = 284L, 
                                                                                                                                                                                          `285` = 285L, `286` = 286L, `288` = 288L, `289` = 289L, `290` = 290L, 
                                                                                                                                                                                          `291` = 291L, `292` = 292L, `293` = 293L, `298` = 298L, `299` = 299L, 

                                                                                                                                                                                                                                                                                                                                                                        `300` = 300L, `302` = 302L, `303` = 303L, `304` = 304L, `308` = 308L, 
                                                                                                                                                                                          `309` = 309L, `310` = 310L, `311` = 311L, `312` = 312L, `314` = 314L, 
                                                                                                                                                                                          `315` = 315L, `316` = 316L, `317` = 317L, `318` = 318L, `319` = 319L, `323` = 323L, `325` = 325L), class = "omit"))

library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.1.2
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
dp %>% 
  group_by(`Element Name`) %>% 
  summarise(across(where(is.double), sum, na.rm = TRUE))
#> # A tibble: 42 x 3
#>    `Element Name`              DemandCourse AmountsAv
#>    <chr>                              <dbl>     <dbl>
#>  1 1.3-dihydroxybenzene                 105      1400
#>  2 2-amino-benzoic acid                 675      3000
#>  3 2-hydroxybenzoic acid                300      4000
#>  4 2-Naphthol                           225       750
#>  5 Acetic acid                          309     12000
#>  6 Acetic anhydride                    1614      1000
#>  7 ammonium hydroxide                  5000      4300
#>  8 Ammonium Sulfate                     170      1200
#>  9 Aniline                              450     24000
#> 10 Barium Chloride.monoHydrate         2250      3000
#> # ... with 32 more rows

Created on 2022-03-14 by the reprex package (v2.0.1)

  •  Tags:  
  • r
  • Related