Home > Blockchain >  Convert multiple columns from numeric to factor
Convert multiple columns from numeric to factor

Time:10-18

I thought this task is simple, then I was surprised that it wasn't.

I have multiple selected columns with coded responses (likert-scales). I want to transform them into a factor variable with factor levels (some of them were never chosen). The questionnair is in German, that is why I you probably won't be able to understand the labels.

df[,c(3:21,23:25)] <- apply(df[,c(3:21,23:25)],2,
                                       function (x) factor(x,
                                                           levels = c(0,1,2,3,4),
                                                           labels = c("gar nicht",
                                                                      "gering",
                                                                      "eher schwach",
                                                                      "eher stark",
                                                                      "sehr stark"))) 
df[,22] <- apply(df[,22],1,
                            function (x) factor(x,
                                                levels = c(0,1,2,3),
                                                labels = c("gar nicht",
                                                           "sofort",
                                                           "mittelfristig",
                                                           "langfristig")))

I will need to split those data frames because of the different scales. Nevertheless, it does not transform my data accurately. The outcome is a character.

Here is my test data:

structure(list(ï..lfdNr = 1:20, company = c("Nationalpark Thayathal", 
"Naturpark Heidenreichsteiner Moor", "Naturpark Hohe Wand", "Tierpark Stadt Haag", 
"Ötscher Tropfsteinhöhle", "Carnuntum", "Stift Heiligenkreuz", 
"Ruine Kollmitz", "Schlosshof", "Retzer Erlebniskeller", "LOISIUM Weinwelt", 
"Bio Imkerei Stögerer", "Amethyst Welt Maissau", "Donau Niederösterreich tourismus", 
"Niederösterreich Bahnen", "Benediktinerstift Melk", "Kunstmeile Krems", 
"Die Garten Tulln", "Winzer Krems ", "Domäne Wachau"), A2_1_hitz = c(4L, 
NA, NA, 3L, NA, NA, 3L, 2L, 3L, NA, 3L, NA, 3L, NA, 2L, 3L, 3L, 
4L, 2L, 3L), A2_2_trock = c(3L, NA, NA, 3L, NA, NA, 3L, NA, 3L, 
NA, 2L, NA, 1L, NA, 2L, 4L, 3L, 4L, 2L, 3L), A2_3_reg = c(2L, 
NA, NA, 2L, NA, NA, 3L, 2L, 3L, NA, 3L, NA, 2L, NA, 3L, 4L, 2L, 
3L, 4L, 2L), A2_4_schnee = c(4L, NA, NA, 3L, NA, NA, NA, 3L, 
3L, NA, 1L, NA, 0L, NA, 4L, NA, 3L, 4L, 4L, 1L), B1_1_hitz = c(4L, 
NA, NA, 1L, NA, NA, NA, 3L, 3L, NA, 2L, NA, NA, NA, 2L, 3L, 2L, 
4L, 0L, 2L), B1_2_trock = c(3L, NA, NA, 2L, NA, NA, NA, NA, 3L, 
NA, 0L, NA, NA, NA, 2L, 3L, 2L, 4L, 3L, 1L), B1_3_reg = c(2L, 
NA, NA, 1L, NA, NA, NA, NA, 3L, NA, 3L, NA, NA, NA, 3L, 3L, 1L, 
2L, 3L, 3L), B1_4_schnee = c(1L, NA, NA, 0L, NA, NA, 0L, 0L, 
1L, NA, NA, NA, NA, NA, 4L, 1L, 0L, 4L, 0L, 0L), B2_1_nZuk = c(3L, 
NA, NA, 0L, NA, NA, NA, 0L, 0L, NA, 0L, NA, 0L, 3L, 3L, 0L, 3L, 
2L, 0L, 0L), B2_2_mZuk = c(3L, NA, NA, 0L, NA, NA, NA, 0L, 2L, 
NA, 2L, NA, 0L, 2L, 3L, 0L, 3L, 2L, 3L, 0L), B2_3_fZuk = c(3L, 
NA, NA, 2L, NA, NA, NA, NA, 2L, NA, 2L, NA, 0L, 2L, 3L, 0L, 3L, 
NA, 3L, 0L), C1_1_aktEin = c(2L, NA, NA, 1L, NA, NA, NA, NA, 
2L, NA, NA, NA, NA, NA, NA, 0L, 1L, 3L, 2L, 3L), C1_2_zukEin = c(3L, 
NA, NA, 2L, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA, NA, 0L, 2L, 
4L, 3L, 3L), C2_1_bisVer = c(2L, NA, NA, 1L, NA, NA, NA, NA, 
2L, NA, NA, NA, NA, NA, 2L, 2L, 1L, 3L, 2L, 2L), C2_2_zukVer = c(3L, 
NA, NA, 2L, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA, 2L, 2L, 2L, 
3L, 3L, 2L), C3_1_bisVer = c(NA, NA, NA, 1L, NA, NA, 2L, NA, 
3L, NA, NA, NA, NA, NA, 1L, 1L, 1L, NA, 2L, 2L), C3_2_zukVer = c(NA, 
NA, NA, 2L, NA, NA, 3L, NA, 3L, NA, NA, NA, NA, NA, 1L, 2L, 2L, 
NA, 3L, 2L), C4_1_EinKlim = c(NA, NA, NA, 2L, NA, NA, NA, NA, 
2L, NA, 2L, NA, NA, NA, 3L, 0L, 1L, NA, 3L, 1L), D1a_1_StÃ.rke = c(NA, 
NA, NA, 3L, NA, NA, NA, NA, 3L, NA, NA, NA, 3L, NA, 2L, 3L, 2L, 
3L, 3L, 3L), D1b_1_Dring = c(NA, NA, NA, NA, NA, NA, 2L, 3L, 
NA, NA, NA, NA, 2L, NA, 1L, 1L, 1L, 1L, 1L, 1L), D5_1_bestBed = c(NA, 
NA, NA, 0L, NA, NA, NA, NA, 3L, NA, NA, NA, NA, NA, NA, 2L, 1L, 
NA, 3L, 3L), E1_1_zuBesuch = c(NA, NA, NA, 2L, NA, NA, NA, NA, 
3L, NA, NA, NA, NA, NA, 4L, 1L, 4L, NA, 4L, NA), E1_2_wirtBed = c(NA, 
NA, NA, 3L, NA, NA, 3L, NA, 2L, NA, NA, NA, NA, NA, 1L, 1L, 4L, 
NA, 3L, NA)), row.names = c(NA, 20L), class = "data.frame")

Thanks, nadine

CodePudding user response:

We need lapply and not apply as apply converts to matrix and matrix can have only a single class

df[,c(3:21,23:25)] <- lapply(df[,c(3:21,23:25)],
                            function (x) factor(x,
                                                levels = c(0,1,2,3),
                                                labels = c("gar nicht",
                                                           "sofort",
                                                           "mittelfristig",
                                                           "langfristig")))
  • Related