I have a very large dataset (150 variables, 100 of them are discrete), thus I make a vector (factor_columns) where are names of all 100 columns that I need to change to factors, and then use mutate and as.factor,
data <- data %>% mutate_at(factor_columns, as.factor)
but then I get for all the binary: Factor, with 2 levels, "0" and "1", but then there are actually "ones" and "twos" instead of "zeroes" and "ones".
For some reason, R behaves to that sometimes as 0 and 1 and sometimes as 1 and 2.
I have found only ways how to do it for each variable, but with this number of variables, it is not feasible. Is there better way to keep factor levels 0 and 1 (for instance logistic regression within ggplot2 does not work properly because y isnt between 0 and 1, but between 1 and 2).
Providing a part of dataset:
"rcp_sex","age","bmi","factor1","factor2","CKD_stage"
"F",36.0739219712526,31,"0","0",NA
"F",26.2778918548939,16.3,"0","0","3a"
"M",38.9541409993155,29.4,"0","0","2"
"F",31.0992470910335,28.3,"0","0",NA
"M",34.6721423682409,28.9,"0","0","3b"
"M",46.6146475017112,26.6,"0","0","3b"
"M",48.7912388774812,23.3,"0","0","3a"
"M",25.2402464065708,27.2,"0","0","2"
"M",48.35318275154,33.5,"0","0","4"
"M",23.4989733059548,31.9,"0","0","3b"
"M",35.3182751540041,26.6,"0","0","2"
"F",34.2368240930869,40.2,"0","0","4"
"F",39.8110882956879,24.3,"0","0","5"
"M",20.6324435318275,30.4,"0","0","2"
"M",36.3230663928816,33.6,"0","0",NA
"M",42.652977412731,27.7,"0","0","3b"
"F",31.315537303217,23.1,"0","0","4"
"M",46.8528405201916,NA,"0","0",NA
"M",19.3949349760438,22,"0","0",NA
"F",26.4804928131417,25.9,"0","0","4"
"M",30.3107460643395,25.4,"0","0","4"
"F",30.2833675564682,24.7,"0","0",NA
"F",26.4804928131417,29.7,"0","0","4"
"F",46.3846680355921,28.8,"1","0","3b"
"F",32.9034907597536,32.7,"0","0","4"
"M",22.9103353867214,26.2,"0","0","4"
"M",35.2361396303901,26,"1","1","2"
"M",35.2361396303901,26,"1","1","2"
"M",53.6180698151951,21.3,"0","0",NA
"M",27.2470910335387,28.5,"0","1","5"
"M",27.2470910335387,28.5,"0","1","5"
"F",36.1998631074606,20.4,"0","0","3b"
"F",35.6221765913758,25.8,"1","0",NA
"M",57.3716632443532,21.5,"0","0","2"
"M",32.4134154688569,24.5,"0","0","2"
"M",20.2847364818617,28.2,"0","0","2"
"F",52.6023271731691,30.5,"1","0","4"
"F",33.5058179329227,21.6,"0","0",NA
"M",22.2532511978097,29.4,"0","0",NA
"M",49.8097193702943,22.9,"0","0","2"
"F",33.5085557837098,22.7,"0","0","5"
"F",41.3853524982888,32.7,"0","0","3b"
"M",52.9253935660507,29,"0","0","3a"
"M",54.0232717316906,25.2,"0","0","3a"
"F",57.9931553730322,17.4,"0","0","3a"
"F",36.5311430527036,22,"0","0",NA
"F",53.6235455167693,23.3,"0","0","3a"
"M",46.7679671457906,25.5,"0","0","3a"
"M",51.4387405886379,34,"0","0","3a"
"M",57.2539356605065,20.4,"0","0","4"
"M",28.331279945243,24.8,"0","0","2"
"F",22.3928815879535,19.6,"0","0","3b"
"M",52.0355920602327,28.3,"0","0",NA
"M",39.4387405886379,19.4,"0","0","4"
"F",35.7618069815195,27.9,"0","0","3b"
"M",34.5516769336071,30.9,"0","0","3a"
"F",36.2765229295003,29.4,"1","0",NA
"M",38.6173853524983,27.4,"0","0","3a"
"F",50.9596167008898,24.2,"0","0",NA
"M",21.6509240246407,28.4,"0","0","3b"
"F",46.8966461327858,22.1,"0","0","3b"
"F",56.1998631074606,30.8,"0","0",NA
"F",53.5496235455168,24.2,"0","0",NA
"M",23.9260780287474,15.2,"0","0","3b"
"M",59.0800821355236,22,"0","0","4"
"F",50.7022587268994,61.7,"0","0","4"
"M",27.4223134839151,22.1,"0","0","4"
"M",43.192334017796,24.9,"0","0","3b"
"M",53.5468856947296,29.3,"0","0","3b"
"M",63.4086242299795,25.7,"0","0",NA
"M",47.0417522245038,25.5,"0","0","4"
"M",45.1937029431896,30.3,"0","0","3a"
"M",31.4661190965092,25.3,"0","0","4"
"F",47.4661190965092,35.4,"0","0",NA
"F",44.9555099247091,34.6,"0","0",NA
"F",54.9760438056126,29.6,"0","0",NA
"M",45.6892539356605,29.1,"0","0","2"
"M",50.0917180013689,26.8,"0","0","3b"
"F",31.895961670089,25.9,"0","0","3b"
"M",61.7248459958932,25.9,"0","0","4"
"M",24.5749486652977,38.3,"0","0","3a"
"M",48.3148528405202,39.8,"0","0","5"
"F",50.3819301848049,32.4,"0","0","3a"
"F",58.2943189596167,35.3,"0","0","3b"
"F",28.2765229295003,24.6,"0","1","3b"
"F",28.2765229295003,24.6,"0","1","3b"
"F",24.6954140999316,21.1,"0","0","2"
"F",23.4223134839151,17,"1","0","3a"
"M",42.7214236824093,26.3,"0","0","3b"
"M",54.6365503080082,27.8,"0","0","3b"
"M",32.7200547570157,32.5,"0","0","3b"
"M",32.0684462696783,23,"0","0","4"
"M",44.8542094455852,30.1,"0","0",NA
"M",36.5886379192334,29.5,"0","0","2"
"F",39.5154004106776,27.7,"0","0","3b"
"M",49.015742642026,22.4,"1","0","4"
"M",35.6796714579055,31.6,"0","0",NA
"F",33.6810403832991,35,"0","0","3b"
"F",30.5270362765229,28,"0","0","2"
"M",57.5797399041752,24.2,"0","0","3a"
"M",50.2751540041068,24.7,"0","0","3a"
"F",63.0636550308008,28.5,"0","0","3a"
"M",52.8186173853525,25.9,"0","0",NA
"F",58.5927446954141,24.4,"0","0","3b"
"M",57.5605749486653,24.1,"0","0","3a"
"F",60.7173169062286,24.6,"0","0","3b"
"F",56.2354551676934,31.4,"0","0","4"
"M",71.586584531143,22.8,"0","0","3b"
"M",56.1806981519507,22.1,"0","0","3b"
"M",45.1362080766598,33.8,"0","0","4"
"F",43.4907597535934,18.9,"0","0","3b"
"M",27.9698836413415,32.4,"0","0",NA
"M",21.2731006160164,24.2,"0","0",NA
"M",46.7898699520876,33.1,"0","0","3a"
"F",37.2292950034223,22.1,"0","0",NA
"M",44.3504449007529,37.4,"0","0","3b"
"M",53.5770020533881,30.5,"0","0","4"
"M",30.1601642710472,26.8,"0","0","3a"
"M",39.2991101984942,40.6,"0","0","3a"
"F",26.0917180013689,26.7,"1","0","3a"
"M",60.3285420944559,32.7,"0","0","4"
"F",64.0739219712526,25.8,"1","0",NA
"F",47.4852840520192,36.4,"0","0","4"
"F",47.6358658453114,43.3,"1","0","4"
"M",47.3921971252567,31.8,"0","0","3a"
"F",41.5906913073238,18.1,"0","0","2"
"M",46.6228610540726,36.6,"0","0","4"
"F",57.8918548939083,30.3,"0","0","3b"
"M",57.1964407939767,27.8,"0","0","4"
"M",31.2361396303901,26.6,"0","0","3b"
"F",57.8754277891855,29.1,"0","0","3b"
"M",38.0342231348392,27.9,"0","0",NA
"M",29.1416837782341,23.6,"0","0",NA
"M",44.8761122518823,28.9,"0","0","2"
"F",47.2169746748802,30.4,"1","0","3b"
"M",39.5920602327173,28.4,"0","0",NA
"M",46.7926078028747,29.8,"0","0","2"
"M",28.249144421629,59.5,"0","0","1"
"M",46.0533880903491,29.4,"0","0",NA
"F",63.5619438740589,28.3,"0","0","4"
"F",59.4852840520192,40.4,"0","0","3b"
"M",34.9459274469541,24.5,"0","0","3a"
"M",48.0739219712526,21.8,"1","0","4"
"F",55.8932238193018,25.3,"0","0","3b"
"F",36.7611225188227,22.3,"0","0","3b"
"M",45.1143052703628,25.8,"0","0","2"
"F",35.9151266255989,26.3,"0","0",NA
"M",57.4976043805613,27.4,"0","0","3b"
"F",57.5496235455168,23,"0","0","3b"
"F",59.2279260780287,25.9,"0","0","3a"
"M",58.4010951403149,30,"0","0",NA
"F",36.1697467488022,30.3,"0","0","3b"
"M",51.0006844626968,24.1,"1","0","4"
"M",63.564681724846,31.7,"0","0","3b"
"M",44.9555099247091,25.8,"0","0","3a"
"F",46.943189596167,47,"0","0","2"
"M",44.7063655030801,27.5,"0","0","2"
"M",35.192334017796,26,"1","0","1"
"M",63.1101984941821,24,"0","0",NA
"F",53.864476386037,32,"0","0","3b"
"M",41.9110198494182,20.5,"0","1","2"
"M",41.9110198494182,20.5,"0","1","2"
"F",62.9185489390828,30.4,"0","0","3b"
"M",54.2203969883641,21.1,"1","0","2"
"M",46.9404517453799,25.5,"0","0","3a"
"F",64.4736481861738,31.2,"0","0","3a"
"M",57.927446954141,33,"0","0",NA
"M",43.8247775496235,30.6,"0","0","4"
"F",34.0889801505818,25.7,"0","0","2"
"M",51.0499657768652,28.9,"1","0","2"
"M",32.974674880219,36.3,"1","0","3a"
"F",57.8179329226557,21.4,"0","1",NA
"F",57.8179329226557,21.4,"0","1",NA
"F",61.0841889117043,21.2,"0","0","3b"
"M",48.3340177960301,22.5,"0","0","3b"
"F",48.0985626283368,26.3,"0","0",NA
"F",54.2012320328542,36.7,"0","0","3b"
"M",59.2854209445585,31.8,"0","0","3b"
"M",40.7310061601643,31.6,"0","1",NA
"M",40.7310061601643,31.6,"0","1",NA
"M",60.8980150581793,29.9,"0","0",NA
"M",49.8316221765914,26.3,"0","0",NA
"M",26.3052703627652,19.1,"0","1","3b"
"M",26.3052703627652,19.1,"0","1","3b"
"M",35.937029431896,31.1,"0","0",NA
"M",60.4654346338125,28.4,"0","0","4"
"F",37.347022587269,21.5,"0","0","3b"
"F",66.880219028063,25.9,"0","0",NA
"M",41.3114305270363,34.8,"0","0","3a"
"M",69.7084188911704,31.3,"0","0",NA
"M",61.5660506502396,25.1,"1","0","4"
"F",54.9733059548255,23.8,"1","0","3b"
"M",59.2662559890486,35.1,"0","0","3b"
"M",41.6180698151951,28.1,"0","0","3b"
"F",47.6988364134155,29.2,"0","0",NA
"M",49.719370294319,37.4,"0","0","3a"
"F",57.3442847364819,25,"0","0","4"
"F",41.6591375770021,26.7,"0","0",NA
"M",43.6933607118412,27.5,"1","0","3b"
And the code:
factor_columns <- c("factor1", "factor2", "CKD_stage", "rcp_sex")
data <- data %>%
mutate_at(factor_columns, as.factor)
One of the issues (but not the only one) I encountered is with this piece of code:
ggplot(aes(x = age, y = factor1)) geom_point() stat_smooth(method="glm", se=FALSE, method.args = list(family=binomial))
Similar problem is encountered when using function getDescriptionStatsBy from Gmisc package, when it then gives proportions as a sum of "ones" (that are actually "zeroes" in the original coding).
Thank you.
CodePudding user response:
With ggplot
, stat_smooth
, we need a continuous axis, which means not a factor. From the examples in the ?geom_smooth
documentation:
# To fit a logistic regression, you need to coerce the values to
# a numeric vector lying between 0 and 1.
ggplot(rpart::kyphosis, aes(Age, as.numeric(Kyphosis) - 1))
...
Following these instructions (this process would be the same no matter what your factor levels are, as long as there are 2 levels):
ggplot(data, aes(x = age, y = as.numeric(factor1) - 1))
geom_point()
stat_smooth(method="glm", se=FALSE, method.args = list(family=binomial))
Your problem is not with the factor
, but with the subsequent methods. If you have more problems, I'd encourage you to ask more questions illustrating those problems. But probably this similar technique will work--binary data can be left as integer 1s and 0s, there's often no reason to convert such columns to factors at all, and many methods that expect binary data may not expect factors.