I'm struggling with following dplyr pipeline:
normality_table <- df_join %>%
tidyr::pivot_longer(., -c(ID, GR, SES, COND, time),'signals') %>%
group_by(COND, signals) %>%
shapiro_test(value)
After which, I've been obtaing the following error:
Error: Problem with `mutate()` column `data`.
i `data = map(.data$data, .f, ...)`.
x Must group by variables found in `.data`.
* Column `variable` is not found.
Actually that is quite strange because if I just run the part of pipeline with no ruuning the shapiro_test() function, it correctly turns back this
normality_table <- df_join %>%
tidyr::pivot_longer(., -c(ID, GR, SES, COND, time),'signals') %>%
print()
# A tibble: 900 x 7
ID GR SES COND time signals value
<chr> <chr> <chr> <fct> <chr> <chr> <dbl>
1 01 RP V NEG-CTR t1 P3FCz -11.6
2 01 RP V NEG-CTR t1 P3Cz -5.17
3 01 RP V NEG-CTR t1 P3Pz 11.9
4 01 RP V NEG-CTR t1 LPPearlyFCz -11.8
5 01 RP V NEG-CTR t1 LPPearlyCz -5.96
6 01 RP V NEG-CTR t1 LPPearlyPz 8.24
7 01 RP V NEG-CTR t1 LPP1FCz -5.67
8 01 RP V NEG-CTR t1 LPP1Cz -0.774
9 01 RP V NEG-CTR t1 LPP1Pz 9.99
10 01 RP V NEG-CTR t1 LPP2FCz -0.199
Confirming that the variable 'value' exists in that datasets. Is there anyone that knows the problems that is going on?
Here the dataset I'm working on:
> dput(head(df_join))
structure(list(ID = c("01", "01", "01", "04", "04", "04"), GR = c("RP",
"RP", "RP", "RP", "RP", "RP"), SES = c("V", "V", "V", "V", "V",
"V"), COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), P3FCz = c(-11.6312151716924,
-11.1438413285935, -3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385), P3Cz = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441
), P3Pz = c(11.8802266972569, 12.1053426662461, 12.955441582096,
15.0981004360619, 15.4046229884164, 16.671036999147), LPPearlyFCz = c(-11.7785042972793,
-9.14927207125904, -7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179), LPPearlyCz = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645, 0.160393685024631
), LPPearlyPz = c(8.23981597718437, 9.51261484648731, 9.42367409925817,
5.06332653216481, 5.02619159395405, 9.07903916629231), LPP1FCz = c(-5.67295796971287,
-4.3918290080777, -2.96652960658775, 0.159183652691071, -1.78361184935376,
1.97377908783621), LPP1Cz = c(-0.774461731301161, -0.650009462761383,
1.14010250644923, 1.51403741206392, 0.25571835554024, 3.76051565494304
), LPP1Pz = c(9.99385579756163, 11.1212652173052, 10.6989716871958,
3.7899021820967, 4.59413830322224, 8.52123662617732), LPP2FCz = c(-0.198736254963744,
-3.16101041766438, 0.895992279831378, 3.11042068112836, 2.27800090558473,
3.83846437952292), LPP2Cz = c(2.96437294922766, -2.12913230708907,
2.94619035115619, 3.44844607014521, 3.02403433835637, 4.7045767546583
), LPP2Pz = c(6.28027312932027, 5.24535230966772, 7.68162285335806,
1.08242973465635, 2.99896314000211, 5.36085942954182), time = c("t1",
"t2", "t3", "t1", "t2", "t3")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
CodePudding user response:
xx <- structure(list(ID = c("01", "01", "01", "04", "04", "04"), GR = c("RP",
"RP", "RP", "RP", "RP", "RP"), SES = c("V", "V", "V", "V", "V",
"V"), COND = structure(c(1L, 2L, 3L, 1L, 2L, 3L), .Label = c("NEG-CTR",
"NEG-NOC", "NEU-NOC"), class = "factor"), P3FCz = c(-11.6312151716924,
-11.1438413285935, -3.99591470944713, -0.314155675382471, 0.238885648959708,
5.03749946898385), P3Cz = c(-5.16524399006139, -5.53112490175437,
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441
), P3Pz = c(11.8802266972569, 12.1053426662461, 12.955441582096,
15.0981004360619, 15.4046229884164, 16.671036999147), LPPearlyFCz = c(-11.7785042972793,
-9.14927207125904, -7.58190508537766, -4.01515836011381, -6.60165385653499,
-2.02861964460179), LPPearlyCz = c(-5.96429031525769, -5.10918437158799,
-2.81732229625975, -1.43557366487622, -3.14872157912645, 0.160393685024631
), LPPearlyPz = c(8.23981597718437, 9.51261484648731, 9.42367409925817,
5.06332653216481, 5.02619159395405, 9.07903916629231), LPP1FCz = c(-5.67295796971287,
-4.3918290080777, -2.96652960658775, 0.159183652691071, -1.78361184935376,
1.97377908783621), LPP1Cz = c(-0.774461731301161, -0.650009462761383,
1.14010250644923, 1.51403741206392, 0.25571835554024, 3.76051565494304
), LPP1Pz = c(9.99385579756163, 11.1212652173052, 10.6989716871958,
3.7899021820967, 4.59413830322224, 8.52123662617732), LPP2FCz = c(-0.198736254963744,
-3.16101041766438, 0.895992279831378, 3.11042068112836, 2.27800090558473,
3.83846437952292), LPP2Cz = c(2.96437294922766, -2.12913230708907,
2.94619035115619, 3.44844607014521, 3.02403433835637, 4.7045767546583
), LPP2Pz = c(6.28027312932027, 5.24535230966772, 7.68162285335806,
1.08242973465635, 2.99896314000211, 5.36085942954182), time = c("t1",
"t2", "t3", "t1", "t2", "t3")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
You can only calculate shapiro test statistic on sample size > 2 observations. Grouping by COND
variable should work, though:
xx %>%
tidyr::pivot_longer(., -c(ID, GR, SES, COND, time),'signals')%>%
group_by(COND) %>%
summarise(s = rstatix::shapiro_test(value))