Apologies in advance as my understanding of user-defined functions in R is very limited (I've never been quite able to figure them out).
Basically, I would like to define a function that changes the class of my columns in the data frame. I have written the following code:
correct_var_types_func = function(data){
data[["Accident ID"]] = as.integer(data[["Accident ID"]])
data[["Accident Year"]] = as.integer(data[["Accident Year"]])
data[["City"]] = as.character(data[["City"]])
data[["Total Fatalities"]] = as.integer(data[["Total Fatalities"]])
data[["Total Number Injured"]] = as.integer(data[["Total Number Injured"]])
data[["Bicyclist Injuries"]] = as.integer(data[["Bicyclist Injuries"]])
data[["Bicyclist Fatalities"]] = as.integer(data[["Bicyclist Fatalities"]])
data[["Longitude"]] = as.numeric(data[["Longitude"]])
data[["Latitude"]] = as.numeric(data[["Latitide"]])
}
correct_var_types_func(test_data)
Running this code gives an error message Error in `[[<-.data.frame`(`*tmp*`, "Latitude", value = numeric(0)) : replacement has 0 rows, data has 50
I tried running the function again with the dataframe in quotes correct_var_types_function("test_data")
, but this gave me an error as well.
The test_data
dataframe is in my global environment. I have had similar issues with functions in the past and I am not quite sure how to resolve this so that the function does what I need it to do.
Thank you very much!
test_data = structure(list(`Accident ID` = c(6343597L, 6343740L, 6343743L,
6343752L, 6343831L, 6343846L, 6343870L, 6343900L, 6349938L, 6350439L,
6350519L, 6350521L, 6350552L, 6350558L, 6350562L, 6350596L, 6350598L,
6350629L, 6354470L, 6354480L), `Accident Year` = c("2011", "2011",
"2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011",
"2011", "2011"), City = c("Champaign-Urbana", "Champaign-Urbana",
"Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana",
"Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana",
"Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana",
"Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana", "Champaign-Urbana",
"Champaign-Urbana", "Champaign-Urbana"), `Total Fatalities` = c("0",
"0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), `Total Number Injured` = c("0",
"1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "3",
"0", "1", "0", "0", "2", "0"), `Bicyclist Injuries` = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Bicyclist Fatalities` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Latitude = c("40.1102285458875",
"40.0981104733762", "40.0980895305079", "40.0981499831071", "40.1080218304802",
"40.1124497904284", "40.1314437866789", "40.132399221271", "40.074148834686",
"40.3168001708079", "39.9812000296446", "40.1192502269937", "40.024754913885",
"39.8986645952302", "40.1166326222204", "40.1360830131773", "40.0980934386924",
"40.3790963986935", "40.127649987084", "40.138010298808"), Longitude = c("-88.2421",
"-88.252", "-88.2467", "-88.2576", "-88.2963", "-88.205", "-88.197",
"-88.1966", "-88.2484", "-88.1833", "-88.2375", "-88.2469", "-88.256",
"-88.2707", "-88.2435", "-88.258", "-88.2446", "-87.9508", "-88.3001",
"-88.2389")), row.names = c(NA, 20L), class = "data.frame")
CodePudding user response:
Typo in my code...it runs correctly now. Thanks!
CodePudding user response:
A recommendation tangential to the typo: this might be a more robust function:
correct_var_types_func <- function(data) {
chr <- intersect(colnames(data),
c("City"))
int <- intersect(colnames(data),
c("Accident ID", "Accident Year", "Total Fatalities",
"Total Number Injured", "Bicyclist Injuries", "Bicyclist Fatalities"))
num <- intersect(colnames(data),
c("Longitude", "Latitude"))
missed <- setdiff(c(chr, int, num), colnames(data))
if (length(missed)) {
warning("missing column names: ",
paste(sQuote(missed, FALSE), collapse = ", "))
}
data[,chr] <- lapply(data[,chr,drop=FALSE], as.character)
data[,int] <- lapply(data[,int,drop=FALSE], as.character)
data[,num] <- lapply(data[,num,drop=FALSE], as.character)
data
}