Issue:
I have created a table of summary of descriptive statistics for seven acoustic parameters that were measured in a spectrogram (see below). I would like to add a column that shows the number of observations in the data frame
that I'd like to add after the column Variable
.
I have tried using a combination of different dplyr code such as n = n()
, n = count(n())
, and n = length(n())
and I keep on getting the same error message (see below)
Please find dummy data below by using the function dput()
.
Would anyone be able to lend a hand?
Many thanks in advance if you can.
Error Message
Error in `dplyr::summarise()`:
! Problem while computing `..1 = across(...)`.
Caused by error:
! Invalid index: out of bounds
Run `rlang::last_error()` to see where the error occurred.
Summary Statistics Table
# A tibble: 9 × 11
variable Median Mean n SD SE Min Max q25 q75 CV
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
2 Low.Freq 30645 47718421. 7157763188 160229651. 13082696. 0 936779338 392. 5065917. 336.
3 High.Freq 6020. 33588147. 5038222034 126884782. 10360099. 0 825466852 78.5 941394. 378.
4 Peak.Freq 45487 74707306. 11206095904 202504621. 16534433. 0 999242982 436. 32466176. 271.
5 Delta.Freq 20268. 31612255. 4741838252 113350682. 9255044. 0 754038591 93.2 2282342. 359.
6 Delta.Time 16852. 64582719. 9687407814 208416077. 17017101. 0 946706344 70.5 4181862. 323.
7 Peak.Time 35342 64781815. 9717272204 190695860. 15570252. 1 964147297 790. 6424504. 294.
8 Start.Freq 39416. 54517987. 8177697991 173895386. 14198499. 0 940000382 77.2 2694535 319.
9 End.Freq 71317 41475068. 6221260243 132873661. 10849089. 1 856943893 430. 7667247. 320.
R-Code
#intall.packages(tidyr)
#intall.packages(dplyr)
library(dplyr)
library(tidyr)
#Function to calculate the coefficient of variation
cv <- function(x) 100*( sd(x)/mean(x))
#Produce the summary statistics table for the acoustic parameters
Summary_Statistics <- Dummy_Data[-1] %>%
dplyr::summarise(across(where(is.numeric), .fns =
list(n = length(n()),
Median = median,
Mean = mean,
n = sum,
SD = sd,
SE = ~sd(.)/sqrt(n()),
Min = min,
Max = max,
q25 = ~quantile(., 0.25),
q75 = ~quantile(., 0.75),
CV = cv
))) %>%
pivot_longer(everything(), names_sep = "_", names_to = c( "variable", ".value"))
#Neaten the results by producing a data frame
Final_Summary_Statistics<-as.data.frame(Summary_Statistics)
Dummy Data
structure(list(ID = 1:150, Low.Freq = c(435, 94103292, 1, 2688,
8471, 28818, 654755585, 468628164, 342491, 2288474, 3915, 411,
267864894, 3312618, 5383, 8989443, 1894, 534981, 9544861, 3437614,
475386, 7550764, 48744, 2317845, 5126197, 2445, 8, 557450, 450259742,
21006647, 9, 7234027, 59, 9, 605, 9199, 3022, 30218156, 46423,
38, 88, 396396244, 28934316, 7723, 95688045, 679354, 716352,
76289, 332826763, 6, 90975, 83103577, 9529, 229093, 42810, 5,
18175302, 1443751, 5831, 8303661, 86, 778, 23947, 8, 9829740,
2075838, 7434328, 82174987, 2, 94037071, 9638653, 5, 3, 65972,
0, 936779338, 4885076, 745, 8, 56456, 125140, 73043989, 516476,
7, 4440739, 612, 3966, 8, 9255, 84127, 96218, 5690, 56, 3561,
78738, 1803363, 809369, 7131, 0, 35502443, 88864, 94, 850, 89628,
261319, 4848, 97341913, 75, 82481, 806, 14, 7, 88, 912102577,
48, 169, 6949, 165, 35969, 44, 955, 9, 43090317, 3, 7905, 596,
7262304, 4246710, 68, 0, 6859264, 85725268, 985, 861158777, 95,
386, 8773, 765, 6798, 18, 9, 32472, 3, 647698334, 27918648, 836,
188837805, 968, 88656, 5823702), High.Freq = c(6071, 3210, 6,
7306092, 6919054, 666399, 78, 523880161, 4700783, 4173830, 30,
811, 341014, 780, 44749, 91, 201620707, 74, 1, 65422, 595, 89093186,
946520, 6940919, 655350, 4, 6, 618, 2006697, 889, 1398, 28769,
90519642, 984, 0, 296209525, 487088392, 5, 894, 529, 5, 99106,
2, 926017, 9078, 1, 21, 88601017, 575770, 48, 8431, 194, 62324996,
5, 81, 40634727, 806901520, 6818173, 3501, 91780, 36106039, 5834347,
58388837, 34, 3280, 6507606, 19, 402, 584, 76, 4078684, 199,
6881, 92251, 81715, 40, 327, 57764, 97668898, 2676483, 76, 4694,
817120, 51, 116712, 666, 3, 42841, 9724, 21, 4, 359, 2604, 22,
30490, 5640, 34, 51923625, 35544, 59644, 306266049, 27533545,
61, 2, 5970, 88728373, 664355064, 30548207, 254317, 868, 460,
80, 7003, 86, 916570, 3124, 24, 1217766, 1, 95912, 59634328,
0, 761341, 95, 5, 65269, 849, 30408, 99, 9, 78771, 1288, 70567,
4, 449, 77, 90, 4520867, 7492578, 825466852, 349445, 23, 9094,
20, 233144, 21, 82, 216201, 15988509, 8594969), Peak.Freq = c(87005561,
9102, 994839015, 42745869, 32840, 62737133, 2722, 24, 67404881,
999242982, 3048, 85315406, 703037627, 331264, 8403609, 3934064,
50578953, 370110665, 3414, 12657, 40, 432, 7707, 214, 68588962,
69467, 75, 500297, 704, 1, 102659072, 60896923, 4481230, 94124925,
60164619, 447, 580, 8, 172, 9478521, 20, 53, 3072127, 2160, 27301893,
8, 4263, 508, 712409, 50677, 522433683, 112844, 193385, 458269,
93578705, 22093131, 6, 9, 1690461, 0, 4, 652847, 44767, 21408,
5384, 304, 721, 651147, 2426, 586, 498289375, 945, 6, 816, 46207,
39135, 6621028, 66905, 26905085, 4098, 0, 14, 88, 530, 97809006,
90, 6, 260792844, 9, 833205723, 99467321, 5, 8455640, 54090,
2, 309, 299161148, 4952, 454824, 729805154, 395358183, 2643,
314568967, 80599, 34187604, 304, 86161503, 8685, 99892, 13687,
54, 98470989, 7, 55, 879113565, 106660, 902524584, 71116, 43689323,
863, 6930, 5, 965, 70107282, 29588, 9, 59, 39159, 64446665, 6034008,
96, 35850, 670200431, 17145, 311804, 31, 7747522, 2270162, 611,
36121267, 22900823, 3, 323, 844738, 956, 41433111, 22694748,
85899937, 42, 9497559), Delta.Freq = c(5, 78, 88553, 794, 5,
3859122, 782, 36, 8756801, 243169338, 817789, 8792384, 7431,
626921743, 9206, 95789, 7916, 8143453, 6, 4, 6363, 181125, 259618,
6751, 33, 37960, 0, 2, 599582228, 565585, 19, 48, 269450424,
70676581, 7830566, 4, 86484313, 21, 90899794, 2, 72356, 574280,
869544, 73418, 6468164, 2259, 5938505, 31329, 1249, 354, 8817,
3, 2568, 82809, 29836269, 5230, 37, 33752014, 79307, 1736, 8522076,
40, 2289135, 862, 801448, 8026, 5, 15, 4393771, 405914, 71098,
950288, 8319, 1396973, 832, 70, 1746, 61907, 8709547, 300750537,
45862, 91417085, 79892, 47765, 5477, 18, 4186, 2860, 754038591,
375, 53809223, 72, 136, 509, 232325, 13128104, 1692, 8581, 23,
7, 5, 1269784, 55451, 614683290, 645, 1, 16, 93, 19250054, 2695713,
78271, 7, 6, 691373, 0, 278, 42857, 8, 94900646, 27796603, 72716820,
12816272, 7738, 596087, 64091692, 2261964, 371, 978998, 127180699,
94, 60969, 23, 66, 13, 26408810, 4, 65, 8687, 72140, 566656,
263437292, 9088, 46496188, 215843, 510, 51989, 16736709, 147,
8823, 82), Delta.Time = c(1361082, 7926, 499, 5004, 3494530,
213, 64551179, 70, 797, 5, 72588, 86976, 5163, 635080, 3, 91,
919806257, 81443, 3135427, 4410972, 5810, 8, 46603718, 422, 1083626,
48, 15699890, 7, 90167635, 446459879, 2332071, 761660, 49218442,
381, 46, 493197, 46, 798597155, 45342274, 6265842, 6, 3445819,
351, 1761227, 214, 959, 908996387, 6, 3855, 9096604, 152664,
7970052, 32366926, 31, 5201618, 114, 7806411, 70, 239, 5065,
2, 1, 14472831, 122042249, 8, 495604, 29, 8965478, 2875, 959,
39, 9, 690, 933626665, 85294, 580093, 95934, 982058, 65244056,
137508, 29, 7621, 7527, 72, 2, 315, 6, 2413, 8625150, 51298109,
851, 890460, 160736, 6, 850842734, 2, 7, 76969113, 190536, 7855,
3, 946706344, 789239369, 93, 802838635, 291929, 55, 52, 6, 93601,
5, 179260, 44006, 220573, 42152733, 7, 534618382, 3, 768153,
206921, 59, 324331, 52641160, 74563, 24, 4, 24, 1046, 862, 71454,
98894864, 27686265, 72584615, 3, 106, 2, 67439, 6408, 2160359,
862, 998, 453, 26179, 5857483, 7796517, 659, 261, 0, 25779, 688592899
), Peak.Time = c(1465265, 452894, 545076172, 8226275, 5040875,
700530, 1, 3639, 20141, 71712131, 686, 923, 770569738, 69961,
737458636, 122403, 199502046, 6108, 907, 108078263, 7817, 4,
6, 69, 721, 786353, 87486, 1563, 876, 47599535, 79295722, 53,
7378, 591, 6607935, 954, 6295, 75514344, 5742050, 25647276, 449,
328566184, 4, 2, 2703, 21367543, 63429043, 708, 782, 909820,
478, 50, 922, 579882, 7850, 534, 2157492, 96, 6, 716, 5, 653290336,
447854237, 2, 31972263, 645, 7, 609909, 4054695, 455631, 4919894,
9, 72713, 9997, 84090765, 89742, 5, 5028, 4126, 23091, 81, 239635020,
3576, 898597785, 6822, 3798, 201999, 19624, 20432923, 18944093,
930720236, 1492302, 300122, 143633, 5152743, 417344, 813, 55792,
78, 14203776, 853437, 964147297, 895424, 57, 361, 68512, 5, 9395,
53562, 83436598, 6372, 33387, 51, 71, 2340, 246145, 58519, 15442937,
362336019, 213917, 5874213, 4550, 644668065, 3712371, 5928, 8833,
7, 2186023, 61627221, 37297, 716427989, 21387, 26639, 13428,
66290997, 6657, 16, 52256745, 1179, 578, 4602, 7, 5, 23530306,
545453, 6930, 9120798, 415, 229600784, 8862829), Start.Freq = c(426355,
22073538, 680374, 41771, 54, 6762844, 599171, 108, 257451851,
438814, 343045, 4702, 967787, 1937, 18, 89301735, 366, 90, 954,
7337732, 70891703, 4139, 10397931, 940000382, 7, 38376, 878528819,
6287, 738366, 31, 47, 5, 6, 77848, 2366508, 45, 3665842, 7252260,
6, 61, 3247, 448348, 1, 705132, 144, 7423637, 2, 497, 844927639,
78978, 914, 131, 7089563, 927, 9595581, 2774463, 1651, 73509280,
7, 35, 18, 96, 1, 92545512, 27354947, 7556, 65019, 7480, 71835,
8249, 64792, 71537, 349389666, 280244484, 82, 6, 40, 353872,
0, 103, 1255, 4752, 29, 76, 81185, 14, 9, 470775630, 818361265,
57947209, 44, 24, 41295, 4, 261449, 9931404, 773556640, 930717,
65007421, 341175, 287376310, 104, 17, 45616, 48, 0, 34436, 413591913,
799218, 81, 139678, 87142, 314040, 1, 486, 30120565, 95, 421252320,
586616452, 45, 2, 1470, 391034, 59182022, 75, 267101, 267, 2,
183169, 814298, 2, 68720184, 8256, 3, 26054, 391397, 5724, 61464405,
40455, 8066605, 1, 474008, 9577335, 1, 443959, 2454751, 2250,
30385546, 22091, 7973), End.Freq = c(71000996, 11613579, 71377155,
1942738, 8760748, 79, 455, 374, 8, 5, 2266932, 597833, 155488,
3020, 4, 554, 4, 16472, 1945649, 668181101, 649780, 22394365,
93060602, 172146, 20472, 23558847, 190513, 22759044, 44, 78450,
205621181, 218, 69916344, 23884, 66, 312148, 7710564, 4, 422,
744572, 651547554, 45554, 38493, 91055218, 38, 1116474, 2295482,
3001, 9, 3270, 141, 55595, 38451, 8660867, 14, 96, 345, 6, 44,
8235824, 910517, 1424326, 87102566, 53644, 667983, 565598, 84,
971, 555498297, 60431, 6597, 856943893, 607815536, 4406, 79,
7, 28978746, 7537295, 6, 633, 345860066, 802, 1035131, 602, 2740,
8065, 61370968, 429953765, 981507, 8105, 343787257, 44782, 64184,
12981359, 123367978, 818775, 123745614, 25345654, 3, 800889,
3087787, 2, 143387, 552269, 285146, 86, 95, 95207, 88122, 999,
7, 206341870, 11725048, 7398, 9686, 2913230, 771, 56634, 267,
87834983, 1910, 79718, 172348, 5042905, 47066, 88747, 9668242,
8175066, 51, 4578, 359354, 942, 2244547, 4799667, 44, 245003,
31132, 25, 58351180, 1, 24331, 74483974, 46771553, 46750, 6,
3, 14, 31554881, 1, 13), Species = c("Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Delphinus_Delphinus", "Delphinus_Delphinus", "Delphinus_Delphinus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Grampus_griseus", "Grampus_griseus", "Grampus_griseus", "Grampus_griseus",
"Truncatus_Tursiops", "Truncatus_Tursiops", "Truncatus_Tursiops"
)), class = "data.frame", row.names = c(NA, -150L))
CodePudding user response:
Could you do it like this? You're calculating n
twice, and you could put n()
after the across.
library(tidyverse)
tribble(~col1, ~col2, ~col3,
1, 2, 3,
2, 3, 4,
3, 4, 5
) |>
summarise(across(where(is.numeric),
list(Median = median,
Mean = mean,
nsum = sum)), n = n())
#> # A tibble: 1 × 10
#> col1_Median col1_Mean col1_nsum col2_Median col2_Mean col2_nsum col3_Median
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2 2 6 3 3 9 4
#> # … with 3 more variables: col3_Mean <dbl>, col3_nsum <dbl>, n <int>
Created on 2022-05-23 by the reprex package (v2.0.1)
CodePudding user response:
There's also package skimr
that can replace most of your code:
skimr::skim(Dummy_Data)
However it gives you the number of missing values instead of the number of non-missing values - you can mutate()
to fix that:
library(dplyr)
Dummy_Data |>
select(-c(ID, Species)) |>
skimr::skim() |>
mutate(N = nrow(Dummy_Data) - n_missing,
SE = numeric.sd/sqrt(N)) |>
as_tibble() |>
select(skim_variable, N, numeric.mean, numeric.p50, numeric.sd, SE, numeric.p0, numeric.p100, numeric.p25, numeric.p75)
Output:
#> # A tibble: 8 x 10
#> skim_variable N numeric.mean numeric.p50 numeric.sd SE numeric.p0
#> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Low.Freq 150 47718421. 30645 160229651. 13082696. 0
#> 2 High.Freq 150 33588147. 6020. 126884782. 10360099. 0
#> 3 Peak.Freq 150 74707306. 45487 202504621. 16534433. 0
#> 4 Delta.Freq 150 31612255. 20268. 113350682. 9255044. 0
#> 5 Delta.Time 150 64582719. 16852. 208416077. 17017101. 0
#> 6 Peak.Time 150 64781815. 35342 190695860. 15570252. 1
#> 7 Start.Freq 150 54517987. 39416. 173895386. 14198499. 0
#> 8 End.Freq 150 41475068. 71317 132873661. 10849089. 1
#> # ... with 3 more variables: numeric.p100 <dbl>, numeric.p25 <dbl>,
#> # numeric.p75 <dbl>
Not sure about the coefficient of variation but it looks like you can customise the function with skim_with()
.
CodePudding user response:
Edit: it appears that you're getting the error from when you are using a function within a function as is the case in length(n())
you have to use the lambda to ~
, also you had two n's, I changed one to sum
df[-1] %>%
dplyr::summarise(across(where(is.numeric), .fns =
list(n = ~length(n()),
Median = median,
Mean = mean,
sum = sum,
SD = sd,
SE = ~ sd(.x)/sqrt(n()),
Min = min,
Max = max,
q25 = ~ quantile(., 0.25),
q75 = ~ quantile(., 0.75),
CV = cv
))) %>%
pivot_longer(everything(), names_sep = "_", names_to = c( "variable", ".value"))
I think you might find it easier to use the geoup_by
into summarise
method after pivoting
df %>%
pivot_longer(cols = 2:(ncol(df) - 1), names_to = "variable") |>
group_by(variable) |>
summarise(mean = mean(value), SD = sd(value), Min = min(value), max = max(value), SE = sd(value)/sqrt(n()))