I would like to convert variable v1 from character to numeric. The values in v1 are numbers.
I tried:
v1 <- as.numeric(v1)
This changes the variable to numeric but changes all values with >3 digits to NA.
> dput(dat)
structure(list(X = c("Baldwin.County", "Banks.County", "Barrow.County",
"Bibb.County", "Butts.County", "Clarke.County", "Columbia.County",
"Dawson.County", "DeKalb.County", "Elbert.County", "Forsyth.County",
"Franklin.County", "Glascock.County", "Greene.County",
"Gwinnett.County",
"Habersham.County", "Hall.County", "Hancock.County", "Hart.County",
"Henry.County", "Jackson.County", "Jasper.County", "Jefferson.County",
"Jones.County", "Lamar.County", "Lincoln.County", "Lumpkin.County",
"McDuffie.County", "Madison.County", "Monroe.County", "Morgan.County",
"Newton.County", "Oconee.County", "Oglethorpe.County", "Putnam.County",
"Rabun.County", "Richmond.County", "Rockdale.County", "Spalding.County",
"Stephens.County", "Taliaferro.County", "Towns.County", "Union.County",
"Walton.County", "Warren.County", "Washington.County", "White.County",
"Wilkes.County", "Wilkinson.County"), total = c("11,936", "333",
"6,285", "50,801", "4,767", "21,606", "17,549", "117", "270,370",
"3,719", "5,508", "1,138", "185", "3,913", "159,130", "910",
"9,417", "4,687", "3,579", "65,739", "3,400", "2,037", "5,398",
"4,981", "3,210", "1,698", "366", "5,394", "1,757", "4,124",
"3,117", "30,641", "1,312", "1,799", "3,864", "252", "72,566",
"32,136", "14,073", "1,840", "742", "108", "155", "10,220", "2,221",
"7,293", "605", "2,801", "2,358"), malet = c("5,996", "166",
"2,957", "22,113", "2,889", "9,160", "8,268", "105", "118,932",
"1,688", "2,536", "511", " 54", "1,661", "71,095", "255", "4,410",
"2,605", "1,728", "28,442", "1,810", "960", "2,378", "2,358",
"1,426", "709", "178", "2,358", "916", "1,928", "1,325", "13,197",
"684", "820", "1,830", "209", "32,360", "13,739", "6,127", "852",
"358", " 41", "123", "4,545", "1,031", "3,528", "157", "1,255",
"1,089"), m1 = c("1,164", " 63", "476", "4,144", "1,050", "2,017",
"520", " 29", "13,043", "382", "130", "63", " 41", "365", "4,129",
" 35", "820", "1,134", "293", "2,430", "351", "215", "470", "180",
"156", "188", " 28", "630", "249", "606", "301", "1,681", "123",
"216", "206", " 49", "5,876", "1,012", "1,358", "53", " 97",
" 0", " 29", "954", "377", "896", " 48", "283", "94")), class =
"data.frame", row.names = c(NA, -49L))
CodePudding user response:
You can do this for all variables that contain numbers (but are not of type numeric) thus:
library(dplyr)
library(readr)
dat %>%
mutate(across(where( ~ any(str_detect(.,","))), ~ parse_number(.)))
X total malet m1
1 Baldwin.County 11936 5996 1164
2 Banks.County 333 166 63
3 Barrow.County 6285 2957 476
4 Bibb.County 50801 22113 4144
5 Butts.County 4767 2889 1050
6 Clarke.County 21606 9160 2017
7 Columbia.County 17549 8268 520
8 Dawson.County 117 105 29
9 DeKalb.County 270370 118932 13043
10 Elbert.County 3719 1688 382
11 Forsyth.County 5508 2536 130
12 Franklin.County 1138 511 63
13 Glascock.County 185 54 41
14 Greene.County 3913 1661 365
15 Gwinnett.County 159130 71095 4129
16 Habersham.County 910 255 35
17 Hall.County 9417 4410 820
18 Hancock.County 4687 2605 1134
19 Hart.County 3579 1728 293
20 Henry.County 65739 28442 2430
21 Jackson.County 3400 1810 351
22 Jasper.County 2037 960 215
23 Jefferson.County 5398 2378 470
24 Jones.County 4981 2358 180
25 Lamar.County 3210 1426 156
26 Lincoln.County 1698 709 188
27 Lumpkin.County 366 178 28
28 McDuffie.County 5394 2358 630
29 Madison.County 1757 916 249
30 Monroe.County 4124 1928 606
31 Morgan.County 3117 1325 301
32 Newton.County 30641 13197 1681
33 Oconee.County 1312 684 123
34 Oglethorpe.County 1799 820 216
35 Putnam.County 3864 1830 206
36 Rabun.County 252 209 49
37 Richmond.County 72566 32360 5876
38 Rockdale.County 32136 13739 1012
39 Spalding.County 14073 6127 1358
40 Stephens.County 1840 852 53
41 Taliaferro.County 742 358 97
42 Towns.County 108 41 0
43 Union.County 155 123 29
44 Walton.County 10220 4545 954
45 Warren.County 2221 1031 377
46 Washington.County 7293 3528 896
47 White.County 605 157 48
48 Wilkes.County 2801 1255 283
49 Wilkinson.County 2358 1089 94
Alternatively, you can first remove the comma and then explicitly convert to numeric, thus:
dat %>%
mutate(across(where( ~ any(str_detect(.,","))), ~ as.numeric(sub(",", "", .))))
If you add %>% str()
to the pipe you will see that all three variables that contain numbers have been converted to numeric.
If you want to convert to numeric just one particular column:
dat %>%
mutate(m1 = parse_number(m1))
or:
dat %>%
mutate(m1 = as.numeric(sub(",", "", m1)))