I have this type of data:
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135",
"136", "137", "138", "139", "140", "141", "142", "143", "144",
"145"),
Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B",
"M", "M", "M", "M", "M", "W", "M"),
Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze",
"gaze", "gaze", "gaze", "gaze", "gaze", "gaze"),
Activity = c("hey", "dort drüben die sparrenburg",
"schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum",
"~", "@tum", "~", "~", "@tum"),
Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809,
52251, 52333, 53227, 53267, 53429),
Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250,
52332, 53226, 53428, 53524, 53606)),
row.names = 129:145, class = "data.frame")
What I need to do is slice/filter that subset of rows where Starttime_ms
is >=
the Starttime_ms
of the pattern sparrenburg
in column Activity
and Endtime_ms
is <=
the Endtime_ms
of the same pattern sparrenburg
in column Activity
.
I've tried these two subsetting methods but neither works correctly:
library(dplyr)
df %>% slice(which(Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")])
:
which(Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
and:
df %>% filter(between(Line,
Starttime_ms >= Starttime_ms[str_detect(Activity, "sparrenburg")],
Endtime_ms <= Endtime_ms[str_detect(Activity, "sparrenburg")]))
How can I subset so that the result is this:
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
138 138 B gaze @tum 51270 53474
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332
142 142 M gaze @tum 52333 53226
CodePudding user response:
You were close with the conditions you set up, but you need to provide them to dplyr::filter()
connected with the logical and operator &
to require both. Because you may have multiple rows that satisfy the condition str_detect(Activity, "sparrenburg")
, you can just take the min()
and max()
to get the most extreme ones for the comparison.
library(tidyverse)
df <- structure(list(Line = c("129", "130", "131", "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", "143", "144", "145"), Actor = c("R", "R", "R", "R", "R", "B", "R", "B", "B", "B", "M", "M", "M", "M", "M", "W", "M"), Act_cat = c("SpeechRec", "ver", "SpeechRec","ges", "ges", "gaze", "ges", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze", "gaze"), Activity = c("hey", "dort drüben die sparrenburg", "schwert", "D-onset", "D-peak", "~", "D-retract", "@tum", "~", "@tum", "~", "@tum", "~", "@tum", "~", "~", "@tum"), Starttime_ms = c(46616, 48825, 48865, 49220, 50080, 50730, 50900, 51009, 51191, 51270, 51486, 51809, 52251, 52333, 53227, 53267, 53429), Endtime_ms = c(47616,53035, 49865, 50080, 50900, 51009, 52220, 51191, 51270, 53474, 51808, 52250, 52332, 53226, 53428, 53524, 53606)), row.names = 129:145, class = "data.frame")
df %>%
filter(
Starttime_ms >= min(Starttime_ms[str_detect(Activity, "sparrenburg")], na.rm = T) &
Endtime_ms <= max(Endtime_ms[str_detect(Activity, "sparrenburg")], na.rm = T)
)
#> Line Actor Act_cat Activity Starttime_ms Endtime_ms
#> 1 130 R ver dort drüben die sparrenburg 48825 53035
#> 2 131 R SpeechRec schwert 48865 49865
#> 3 132 R ges D-onset 49220 50080
#> 4 133 R ges D-peak 50080 50900
#> 5 134 B gaze ~ 50730 51009
#> 6 135 R ges D-retract 50900 52220
#> 7 136 B gaze @tum 51009 51191
#> 8 137 B gaze ~ 51191 51270
#> 9 139 M gaze ~ 51486 51808
#> 10 140 M gaze @tum 51809 52250
#> 11 141 M gaze ~ 52251 52332
Created on 2022-02-16 by the reprex package (v2.0.1)
CodePudding user response:
You can do that with base indexing.
starttime <- df[["Starttime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
stoptime <- df[["Endtime_ms"]][stringr::str_detect(df$Activity, "sparrenburg")]
slice_df <- df[df["Starttime_ms"] >= starttime & df["Endtime_ms"] <= stoptime, ]
> slice_df
Line Actor Act_cat Activity Starttime_ms Endtime_ms
130 130 R ver dort drüben die sparrenburg 48825 53035
131 131 R SpeechRec schwert 48865 49865
132 132 R ges D-onset 49220 50080
133 133 R ges D-peak 50080 50900
134 134 B gaze ~ 50730 51009
135 135 R ges D-retract 50900 52220
136 136 B gaze @tum 51009 51191
137 137 B gaze ~ 51191 51270
139 139 M gaze ~ 51486 51808
140 140 M gaze @tum 51809 52250
141 141 M gaze ~ 52251 52332