I want to limit each of the AI_ columns to those that are above a certain threshold (in this case 0.0), and then return the standard deviation of those that fall within the 1st and 99th (or 25th to 75th) percentile
I used a function that limits a vector x to those that are above the threshold, then gets the 1st to 99th percentile, and return the st dev of the values that fall within this range.
f <- function(x, thresh=0) {
x <- x[x>thresh]
lims = quantile(x, probs=c(0.01,0.99))
sd(x[between(x,lims[1],lims[2])])
}
and then:
AI %>% group_by(ID) %>% summarize(across(starts_with("AI"), f))
Then I got the following ERRORs
Error in summarize(., across(starts_with("AI"), f)) :
i The error occurred in group 11: ID = "ID19".
Caused by error in across()
:
! Problem while computing column AI_2
.
Caused by error in quantile.default()
:
! missing values and NaN's not allowed if 'na.rm' is FALSE
I also used the following code for to keep values between the 1st and 99 percentile but that only works for 1 column and does not include the treshold..
keep <- AI$AI_1 >= quantile(AI$AI_1, .01) & AI$AI_1 <= quantile(AI$AI_1, .99)
AI_2 <- AI[keep,]
This is a sample of the dataset (does not include NA values..):
structure(list(X = 1:100, x1.time = c("00:00:00", "00:01:00",
"00:02:00", "00:03:00", "00:04:00", "00:05:00", "00:06:00", "00:07:00",
"00:08:00", "00:09:00", "00:10:00", "00:11:00", "00:12:00", "00:13:00",
"00:14:00", "00:15:00", "00:16:00", "00:17:00", "00:18:00", "00:19:00",
"00:20:00", "00:21:00", "00:22:00", "00:23:00", "00:24:00", "00:25:00",
"00:26:00", "00:27:00", "00:28:00", "00:29:00", "00:30:00", "00:31:00",
"00:32:00", "00:33:00", "00:34:00", "00:35:00", "00:36:00", "00:37:00",
"00:38:00", "00:39:00", "00:40:00", "00:41:00", "00:42:00", "00:43:00",
"00:44:00", "00:45:00", "00:46:00", "00:47:00", "00:48:00", "00:49:00",
"00:50:00", "00:51:00", "00:52:00", "00:53:00", "00:54:00", "00:55:00",
"00:56:00", "00:57:00", "00:58:00", "00:59:00", "01:00:00", "01:01:00",
"01:02:00", "01:03:00", "01:04:00", "01:05:00", "01:06:00", "01:07:00",
"01:08:00", "01:09:00", "01:10:00", "01:11:00", "01:12:00", "01:13:00",
"01:14:00", "01:15:00", "01:16:00", "01:17:00", "01:18:00", "01:19:00",
"01:20:00", "01:21:00", "01:22:00", "01:23:00", "01:24:00", "01:25:00",
"01:26:00", "01:27:00", "01:28:00", "01:29:00", "01:30:00", "01:31:00",
"01:32:00", "01:33:00", "01:34:00", "01:35:00", "01:36:00", "01:37:00",
"01:38:00", "01:39:00"), AI_1 = c(0.16528026437454, 0.163995204250076,
0.164581401494747, 0.161694314834269, 0.163917689382925, 0.164007457424482,
0.165191747536324, 0.16530877672998, 0.164301659628605, 0.16242849405884,
0.163719381361245, 0.164336918744191, 0.163752563186272, 0.164085843486251,
0.16378316688897, 0.166833331781526, 0.16429177819724, 0.163654350013046,
0.162191321708722, 0.164125054995564, 0.163007832586539, 0.164642576607691,
0.164240497997558, 0.164368040327747, 0.165115677448188, 0.166446115241087,
0.164364226061044, 0.163980648381964, 0.164217299179925, 0.163697267390534,
0.165023149913166, 0.164963028992835, 0.164581942410383, 0.164102888598112,
0.164114359824609, 0.164909199734182, 0.1647723097995, 0.164333631722669,
0.163217992625673, 0.162930307326649, 0.164019924780975, 0.162721307047624,
0.164192622368622, 0.164757374919656, 0.164779535457602, 0.165821461484994,
0.162647621622753, 0.165919323130543, 0.164117042017469, 0.162910189831296,
0.164258105734382, 0.162312615318485, 0.16428182389575, 0.16213902859448,
0.162884408334533, 0.162198437052509, 0.165414446056782, 0.162873605934549,
0.162172656294551, 0.162135582764911, 0.163042752319553, 0.16224644975155,
0.164009684185798, 0.163110019752022, 0.163379927078646, 0.164012257170658,
0.164439089427949, 0.16224770327632, 0.164591945565879, 0.164431298827987,
0.165736844531931, 0.162629472542491, 0.164787476839252, 0.162719814852985,
0.164527472183818, 0.162039786720132, 0.1645545617745, 0.164705888656581,
0.16242985788503, 0.16077563386833, 0.164190837236036, 0.164459238508408,
0.162516082375934, 0.164605728678467, 0.163147995834175, 0.161349837954994,
0.163897611611084, 0.162861088963398, 0.162403920420443, 0.163557577227182,
0.164293649590329, 0.161513543124043, 0.160984587132739, 0.162398145567455,
0.16196526716528, 0.164749564473586, 0.160500524232274, 0.161814085721974,
0.161727028371301, 0.163082100005441), AI_2 = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), AI_3 = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), AI_4 = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0), AI_5 = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), AI_6 = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0), AI_7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0), ID = c("ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1", "ID1",
"ID1", "ID1", "ID1", "ID1", "ID1")), row.names = c(NA, 100L), class = "data.frame")
Does anyone know how to solve this problem?
CodePudding user response:
Where df
is your data set.
library(tidyverse)
thresh <- 0
df %>%
pivot_longer(starts_with("AI_")) %>%
arrange(name, X) %>%
filter(value > thresh) %>%
group_by(name) %>%
mutate(
q1 = quantile(value, probs = 0.01),
q99 = quantile(value, probs = 0.99)
) %>%
filter(value >= q1, value <= q99) %>%
group_by(name, ID) %>%
summarise(sd = sd(value)) %>%
ungroup() %>%
pivot_wider(
id_cols = "ID",
names_from = "name",
values_from = "sd"
)