I have this data set as follows
structure(list(count = c("0-0", "1-0", "2-0", "2-1", "0-0", "0-1",
"0-2", "1-2", "1-2", "0-0", "0-1", "1-1", "1-2", "2-2", "2-2",
"0-0", "1-0", "1-1", "2-1", "3-1", "3-2", "0-0", "1-0", "1-1",
"0-0", "0-1", "1-1", "1-2", "0-0", "1-0", "1-1", "0-0", "0-1",
"0-0", "1-0", "1-1", "1-2", "0-0", "0-1", "0-2", "0-0", "0-1",
"0-2", "1-2", "1-2", "0-0", "0-0", "0-1", "0-0", "0-0", "0-0",
"1-0", "2-0", "0-0", "1-0", "2-0", "3-0", "0-0", "0-0", "1-0",
"1-1", "0-0", "0-0", "1-0", "2-0", "0-0", "0-1", "0-2", "0-2",
"0-0", "1-0", "1-1", "2-1", "2-2", "2-2", "0-0", "1-0", "2-0",
"2-1", "2-2", "0-0", "0-1", "0-0", "0-0", "0-1", "0-2", "0-2",
"1-2", "2-2", "0-0", "1-0", "1-1", "0-0", "1-0", "0-0", "0-1",
"1-1", "1-2"), pitchResult = c("Ball", "Ball",
"Foul", "Ground Out", "Foul", "Strike Looking", "Ball", "Foul",
"Ground Out", "Strike Looking", "Ball", "Foul", "Ball", "Foul",
"Ground Out", "Ball", "Strike Looking", "Ball", "Ball", "Strike Swinging",
"Single on a Fly Ball", "Ball", "Strike Swinging", "Double Play",
"Strike Looking", "Ball", "Strike Looking", "Ground Out", "Ball",
"Strike Swinging", "Ground Out", "Foul", "Single on a Fly Ball",
"Ball", "Strike Swinging", "Strike Swinging", "Strikeout (Swinging)",
"Strike Looking", "Foul", "Strikeout (Swinging)", "Strike Looking",
"Strike Looking", "Ball", "Foul", "Fly Out", "Fly Out", "Strike Looking",
"Fly Out", "Double on a Fly Ball", "Hit By Pitch", "Ball", "Ball",
"Fly Out", "Ball", "Ball", "Ball", "Walk", "Double Play", "Ball",
"Strike Looking", "Single on a Ground Ball", "Fly Out", "Ball",
"Ball", "Fly Out", "Strike Looking", "Foul", "Foul", "Single on a Ground Ball",
"Ball", "Strike Looking", "Ball", "Foul", "Foul", "Home Run on a 402.65 ft Fly Ball",
"Ball", "Ball", "Strike Swinging", "Foul", "Fly Out", "Strike Swinging",
"Line Out", "Fly Out", "Strike Looking", "Foul", "Foul", "Ball",
"Ball", "Single on a Ground Ball", "Ball", "Strike Looking",
"Fielder's Choice", "Ball", "Ground Out", "Foul", "Ball", "Strike Swinging",
"Single on a Line Drive"), gameId = c(536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L, 536158720L, 536158720L, 536158720L, 536158720L, 536158720L,
536158720L), inn = c("Top 1", "Top 1",
"Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1",
"Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 1", "Top 2",
"Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 2",
"Top 2", "Top 2", "Top 2", "Top 2", "Top 2", "Top 3", "Top 3",
"Top 3", "Top 3", "Top 3", "Top 3", "Top 3", "Top 3", "Top 3",
"Top 3", "Top 3", "Top 3", "Top 4", "Top 4", "Top 4", "Top 4",
"Top 4", "Top 4", "Top 4", "Top 4", "Top 5", "Top 5", "Top 5",
"Top 5", "Top 5", "Top 5", "Top 5", "Top 5", "Top 5", "Top 5",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6", "Top 6",
"Top 6", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7",
"Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 7", "Top 8",
"Top 8", "Top 8", "Top 8", "Top 8", "Top 8"
), batter = c("Player A", "Player A", "Player A", "Player A", "Player B", "Player B",
"Player B", "Player B", "Player B", "Player C", "Player C", "Player C", "Player C",
"Player C", "Player C", "Player D", "Player D", "Player D", "Player D", "Player D",
"Player D", "Player E", "Player E", "Player E", "Player F", "Player F", "Player F",
"Player F", "Player G", "Player G", "Player G", "Player H", "Player H", "Player I",
"Player I", "Player I", "Player I", "Player A", "Player A", "Player A", "Player B",
"Player B", "Player B", "Player B", "Player B", "Player C", "Player D", "Player D",
"Player E", "Player F", "Player G", "Player G", "Player G", "Player H", "Player H",
"Player H", "Player H", "Player I", "Player A", "Player A", "Player A", "Player B",
"Player C", "Player C", "Player C", "Player D", "Player D", "Player D", "Player D",
"Player E", "Player E", "BPlayer E", "Player E", "Player E", "Player E", "Player F", "Player F",
"Player F", "Player F", "Player F", "Player G", "Player G", "Player H", "Player I",
"Player I", "Player I", "Player I", "Player I", "Player I", "Player A", "Player A", "Player A",
"Player B", "Player B", "Player C", "Player C", "Player C", "Player C"), pitcher = c("Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1", "Player 1",
"Player 2", "Player 2", "Player 2", "Player 2", "Player 2", "Player 2",
"Player 2", "Player 2", "Player 2", "Player 2", "Player 2", "Player 2",
"Player 2", "Player 2", "Player 2", "Player 2"
), bb = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
100L), class = "data.frame")
Where Players A-I take at bats (groups of pitches) against Player 1 and 2... If Player A Walked or got Hit by the pitch in their at bat it is denoted in the bb column on the pitch where that event occurred. What I want to do is have that bb column show a 1 for the entire at bat if they were walked or hit by the pitch instead of just on the row where the event happened.
CodePudding user response:
I am not familiar with baseball rules, but I tried to get the result that matches your description. Note that I found your data has 100 rows, but the 98th row and the 99th row are "NA" so I just use row 1 to row 98. Here is my trial:
dat[1:98,] %>%
group_by(batter, inn) %>%
mutate(wanted_bb = ifelse(any(bb ==1) & str_detect(pitchResult, "[Walk|Hit]"), 1, 0)) %>%
select(-bb) %>%
as.data.frame()
The result:
count pitchResult gameId inn batter pitcher wanted_bb
1 0-0 Ball 536158720 Top 1 Player A Player 1 0
2 1-0 Ball 536158720 Top 1 Player A Player 1 0
3 2-0 Foul 536158720 Top 1 Player A Player 1 0
4 2-1 Ground Out 536158720 Top 1 Player A Player 1 0
5 0-0 Foul 536158720 Top 1 Player B Player 1 0
6 0-1 Strike Looking 536158720 Top 1 Player B Player 1 0
7 0-2 Ball 536158720 Top 1 Player B Player 1 0
8 1-2 Foul 536158720 Top 1 Player B Player 1 0
9 1-2 Ground Out 536158720 Top 1 Player B Player 1 0
10 0-0 Strike Looking 536158720 Top 1 Player C Player 1 0
11 0-1 Ball 536158720 Top 1 Player C Player 1 0
12 1-1 Foul 536158720 Top 1 Player C Player 1 0
13 1-2 Ball 536158720 Top 1 Player C Player 1 0
14 2-2 Foul 536158720 Top 1 Player C Player 1 0
15 2-2 Ground Out 536158720 Top 1 Player C Player 1 0
16 0-0 Ball 536158720 Top 2 Player D Player 1 0
17 1-0 Strike Looking 536158720 Top 2 Player D Player 1 0
18 1-1 Ball 536158720 Top 2 Player D Player 1 0
19 2-1 Ball 536158720 Top 2 Player D Player 1 0
20 3-1 Strike Swinging 536158720 Top 2 Player D Player 1 0
21 3-2 Single on a Fly Ball 536158720 Top 2 Player D Player 1 0
22 0-0 Ball 536158720 Top 2 Player E Player 1 0
23 1-0 Strike Swinging 536158720 Top 2 Player E Player 1 0
24 1-1 Double Play 536158720 Top 2 Player E Player 1 0
25 0-0 Strike Looking 536158720 Top 2 Player F Player 1 0
26 0-1 Ball 536158720 Top 2 Player F Player 1 0
27 1-1 Strike Looking 536158720 Top 2 Player F Player 1 0
28 1-2 Ground Out 536158720 Top 2 Player F Player 1 0
29 0-0 Ball 536158720 Top 3 Player G Player 1 0
30 1-0 Strike Swinging 536158720 Top 3 Player G Player 1 0
31 1-1 Ground Out 536158720 Top 3 Player G Player 1 0
32 0-0 Foul 536158720 Top 3 Player H Player 1 0
33 0-1 Single on a Fly Ball 536158720 Top 3 Player H Player 1 0
34 0-0 Ball 536158720 Top 3 Player I Player 1 0
35 1-0 Strike Swinging 536158720 Top 3 Player I Player 1 0
36 1-1 Strike Swinging 536158720 Top 3 Player I Player 1 0
37 1-2 Strikeout (Swinging) 536158720 Top 3 Player I Player 1 0
38 0-0 Strike Looking 536158720 Top 3 Player A Player 1 0
39 0-1 Foul 536158720 Top 3 Player A Player 1 0
40 0-2 Strikeout (Swinging) 536158720 Top 3 Player A Player 1 0
41 0-0 Strike Looking 536158720 Top 4 Player B Player 1 0
42 0-1 Strike Looking 536158720 Top 4 Player B Player 1 0
43 0-2 Ball 536158720 Top 4 Player B Player 1 0
44 1-2 Foul 536158720 Top 4 Player B Player 1 0
45 1-2 Fly Out 536158720 Top 4 Player B Player 1 0
46 0-0 Fly Out 536158720 Top 4 Player C Player 1 0
47 0-0 Strike Looking 536158720 Top 4 Player D Player 1 0
48 0-1 Fly Out 536158720 Top 4 Player D Player 1 0
49 0-0 Double on a Fly Ball 536158720 Top 5 Player E Player 1 0
50 0-0 Hit By Pitch 536158720 Top 5 Player F Player 1 1
51 0-0 Ball 536158720 Top 5 Player G Player 1 0
52 1-0 Ball 536158720 Top 5 Player G Player 1 0
53 2-0 Fly Out 536158720 Top 5 Player G Player 1 0
54 0-0 Ball 536158720 Top 5 Player H Player 1 1
55 1-0 Ball 536158720 Top 5 Player H Player 1 1
56 2-0 Ball 536158720 Top 5 Player H Player 1 1
57 3-0 Walk 536158720 Top 5 Player H Player 1 1
58 0-0 Double Play 536158720 Top 5 Player I Player 1 0
59 0-0 Ball 536158720 Top 6 Player A Player 1 0
60 1-0 Strike Looking 536158720 Top 6 Player A Player 1 0
61 1-1 Single on a Ground Ball 536158720 Top 6 Player A Player 1 0
62 0-0 Fly Out 536158720 Top 6 Player B Player 1 0
63 0-0 Ball 536158720 Top 6 Player C Player 1 0
64 1-0 Ball 536158720 Top 6 Player C Player 1 0
65 2-0 Fly Out 536158720 Top 6 Player C Player 1 0
66 0-0 Strike Looking 536158720 Top 6 Player D Player 1 0
67 0-1 Foul 536158720 Top 6 Player D Player 1 0
68 0-2 Foul 536158720 Top 6 Player D Player 1 0
69 0-2 Single on a Ground Ball 536158720 Top 6 Player D Player 1 0
70 0-0 Ball 536158720 Top 6 Player E Player 1 0
71 1-0 Strike Looking 536158720 Top 6 Player E Player 1 0
72 1-1 Ball 536158720 Top 6 BPlayer E Player 1 0
73 2-1 Foul 536158720 Top 6 Player E Player 1 0
74 2-2 Foul 536158720 Top 6 Player E Player 1 0
75 2-2 Home Run on a 402.65 ft Fly Ball 536158720 Top 6 Player E Player 1 0
76 0-0 Ball 536158720 Top 6 Player F Player 1 0
77 1-0 Ball 536158720 Top 6 Player F Player 1 0
78 2-0 Strike Swinging 536158720 Top 6 Player F Player 1 0
79 2-1 Foul 536158720 Top 6 Player F Player 1 0
80 2-2 Fly Out 536158720 Top 6 Player F Player 1 0
81 0-0 Strike Swinging 536158720 Top 7 Player G Player 1 0
82 0-1 Line Out 536158720 Top 7 Player G Player 1 0
83 0-0 Fly Out 536158720 Top 7 Player H Player 2 0
84 0-0 Strike Looking 536158720 Top 7 Player I Player 2 0
85 0-1 Foul 536158720 Top 7 Player I Player 2 0
86 0-2 Foul 536158720 Top 7 Player I Player 2 0
87 0-2 Ball 536158720 Top 7 Player I Player 2 0
88 1-2 Ball 536158720 Top 7 Player I Player 2 0
89 2-2 Single on a Ground Ball 536158720 Top 7 Player I Player 2 0
90 0-0 Ball 536158720 Top 7 Player A Player 2 0
91 1-0 Strike Looking 536158720 Top 7 Player A Player 2 0
92 1-1 Fielder's Choice 536158720 Top 7 Player A Player 2 0
93 0-0 Ball 536158720 Top 8 Player B Player 2 0
94 1-0 Ground Out 536158720 Top 8 Player B Player 2 0
95 0-0 Foul 536158720 Top 8 Player C Player 2 0
96 0-1 Ball 536158720 Top 8 Player C Player 2 0
97 1-1 Strike Swinging 536158720 Top 8 Player C Player 2 0
98 1-2 Single on a Line Drive 536158720 Top 8 Player C Player 2 0
CodePudding user response:
A similar approach like Abdur Rohman but with slightly different interpretation of your description:
library(dplyr)
library(stringr)
df[1:98,] %>%
group_by(batter, grp = cumsum(coalesce(batter != lag(batter), FALSE))) %>%
mutate(new_bb = any(str_detect(pitchResult, "Walk|Hit"))) %>%
ungroup() %>%
select(-grp)
This returns
# A tibble: 98 x 8
count pitchResult gameId inn batter pitcher bb new_bb
<chr> <chr> <int> <chr> <chr> <chr> <dbl> <int>
1 0-0 Ball 536158720 Top 1 Player A Player 1 0 0
2 1-0 Ball 536158720 Top 1 Player A Player 1 0 0
3 2-0 Foul 536158720 Top 1 Player A Player 1 0 0
4 2-1 Ground Out 536158720 Top 1 Player A Player 1 0 0
5 0-0 Foul 536158720 Top 1 Player B Player 1 0 0
6 0-1 Strike Looking 536158720 Top 1 Player B Player 1 0 0
7 0-2 Ball 536158720 Top 1 Player B Player 1 0 0
8 1-2 Foul 536158720 Top 1 Player B Player 1 0 0
9 1-2 Ground Out 536158720 Top 1 Player B Player 1 0 0
10 0-0 Strike Looking 536158720 Top 1 Player C Player 1 0 0
...
48 0-1 Fly Out 536158720 Top 4 Player D Player 1 0 0
49 0-0 Double on a Fly Ball 536158720 Top 5 Player E Player 1 0 0
50 0-0 Hit By Pitch 536158720 Top 5 Player F Player 1 1 1
51 0-0 Ball 536158720 Top 5 Player G Player 1 0 0
52 1-0 Ball 536158720 Top 5 Player G Player 1 0 0
53 2-0 Fly Out 536158720 Top 5 Player G Player 1 0 0
54 0-0 Ball 536158720 Top 5 Player H Player 1 0 1
55 1-0 Ball 536158720 Top 5 Player H Player 1 0 1
56 2-0 Ball 536158720 Top 5 Player H Player 1 0 1
57 3-0 Walk 536158720 Top 5 Player H Player 1 1 1
58 0-0 Double Play 536158720 Top 5 Player I Player 1 0 0
59 0-0 Ball 536158720 Top 6 Player A Player 1 0 0
...
95 0-0 Foul 536158720 Top 8 Player C Player 2 0 0
96 0-1 Ball 536158720 Top 8 Player C Player 2 0 0
97 1-1 Strike Swinging 536158720 Top 8 Player C Player 2 0 0
98 1-2 Single on a Line Drive 536158720 Top 8 Player C Player 2 0 0
I don't know if the last batter of an inn (whatever this means) can be the new first batter of a new inn. In this case you need to include inn
in the group_by
statement.