I have a data.frame with 3 columns:
df <- structure(list(ParticipantID = c("R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT", "R_3m49eVj2HUtJUT7", "R_UQMpKBVCiSCXCvv",
"R_11ZLsdTLX0zwe77", "R_1H1sGpFS6srJHZa", "R_2OU6OMBsaEyGdh9",
"R_1Qrvncp9IyGCQhl", "R_2QJNX5jr05Exz5K", "R_3s5DrmDel4LJAlw",
"R_Ubx6wZtZejtEUH7", "R_3kzBb3CRtJzBPEE", "R_3IRbEYxYDSexz84",
"R_3EHnuMJ1b48jvA9", "R_QhIZ4MiZ0UVdUoF", "R_1Kg2e4K2IGunWoA",
"R_2UYIgsOvy0UVsqT"), Question = c("Q1", "Q1", "Q1", "Q1", "Q1",
"Q1", "Q1", "Q1", "Q1", "Q1", "Q1", "Q1", "Q1", "Q1", "Q1", "Q2",
"Q2", "Q2", "Q2", "Q2", "Q2", "Q2", "Q2", "Q2", "Q2", "Q2", "Q2",
"Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q3", "Q3", "Q3", "Q3", "Q3",
"Q3", "Q3", "Q3", "Q3", "Q3", "Q3", "Q3", "Q4", "Q4", "Q4", "Q4",
"Q4", "Q4", "Q4", "Q4", "Q4", "Q4", "Q4", "Q4", "Q4", "Q4", "Q4",
"Q5", "Q5", "Q5", "Q5", "Q5", "Q5", "Q5", "Q5", "Q5", "Q5", "Q5",
"Q5", "Q5", "Q5", "Q5", "Q6", "Q6", "Q6", "Q6", "Q6", "Q6", "Q6",
"Q6", "Q6", "Q6", "Q6", "Q6", "Q6", "Q6", "Q6", "Q7", "Q7", "Q7",
"Q7", "Q7", "Q7", "Q7", "Q7", "Q7", "Q7", "Q7", "Q7", "Q7", "Q7",
"Q7"), Scores = c("4", "2", "2", "2", "2", "4", "2", "2", "3",
"4", "3", "1", "1", "4", "1", "3", "3", "3", "3", "1", "3", "1",
"", "3", "1", "3", "1", "3", "3", "1", "4", "4", "4", "4", "4",
"4", "1", "4", "4", "2", "4", "4", "4", "4", "4", "2", "2", "2",
"2", "2", "2", "2", "2", "3", "3", "2", "2", "3", "2", "2", "4",
"4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4",
"4", "4", "3", "3", "3", "3", "4", "3", "3", "3", "4", "3", "4",
"3", "3", "3", "4", "4", "3", "4", "4", "4", "3", "4", "4", "4",
"4", "4", "4", "4", "4")), row.names = c(NA, -105L), class = "data.frame")
I want to change to values of the column Score
, which consists of character values that represent numbers (e.g. "1", "2", etc) to the character values "correct" or "incorrect". For this, I am trying to use code to change the values of Score
depending of the value of Score
and the value of Question
.
In pseudocode:
If Question
is equal to "Q1" and Score
is equal to "3", change the value to "correct". Else, change it to "incorrect".
I am trying the following:
df$Score <- ifelse(df$Question == "Q1" & df$Score == "3", "correct", "incorrect")
However, this code ignores the first condition, because it changes all the values in Score depending on the condition of Score == 3 (so, it does this for all rows, and not only for those where Question
is equal to "Q1".
I also tried:
df$Scores[df$Question == "Q1"] <- ifelse(df$Scores == "3","correct","incorrect")
This works with a warning:
Warning message: In df$Scores[df$Question == "Q1"] <- ifelse(df$Scores == "3", "correct", : number of items to replace is not a multiple of replacement length
However, if I keep using the code above for Q2, and change the condition of the ifelse to, e.g, Score being equal to "1", then everything gets the "incorrect" label.
This is as far as I could go. I could swear I am using the ifelse() function correctly, but apparently that's not the case.
Thanks in advance for the help.
CodePudding user response:
You're close with your last attempt, you'd just need to ensure you subset as well within the ifelse
statement as below:
df$Scores[df$Question == "Q1"] <- ifelse(df$Scores[df$Question == "Q1"] == "3",
"correct",
"incorrect")
Output:
df[1:20, ]
ParticipantID Question Scores
1 R_3m49eVj2HUtJUT7 Q1 incorrect
2 R_UQMpKBVCiSCXCvv Q1 incorrect
3 R_11ZLsdTLX0zwe77 Q1 incorrect
4 R_1H1sGpFS6srJHZa Q1 incorrect
5 R_2OU6OMBsaEyGdh9 Q1 incorrect
6 R_1Qrvncp9IyGCQhl Q1 incorrect
7 R_2QJNX5jr05Exz5K Q1 incorrect
8 R_3s5DrmDel4LJAlw Q1 incorrect
9 R_Ubx6wZtZejtEUH7 Q1 correct
10 R_3kzBb3CRtJzBPEE Q1 incorrect
11 R_3IRbEYxYDSexz84 Q1 correct
12 R_3EHnuMJ1b48jvA9 Q1 incorrect
13 R_QhIZ4MiZ0UVdUoF Q1 incorrect
14 R_1Kg2e4K2IGunWoA Q1 incorrect
15 R_2UYIgsOvy0UVsqT Q1 incorrect
16 R_3m49eVj2HUtJUT7 Q2 3
17 R_UQMpKBVCiSCXCvv Q2 3
18 R_11ZLsdTLX0zwe77 Q2 3
19 R_1H1sGpFS6srJHZa Q2 3
20 R_2OU6OMBsaEyGdh9 Q2 1
Alternatively (using pure ifelse
logic while doing the assignment to the full Scores
vector, as in your first attempt), you could try a nested ifelse
:
df$Scores <- ifelse(df$Question == "Q1",
ifelse(
df$Scores == "3", "correct", "incorrect"
),
df$Scores)