I have a CSV
data in which I want to merge every two rows
in each of the columns. The code below works fine on other CSVs
. However, it returns an error
while mutating
with this particular dataset.
How can I fix this? The purpose is to merge every two rows in each of the columns in this dataset.
Error:
Error in mutate()
:
! Problem while computing id = rep(1:(n()/2), each = 2)
.
x id
must be size 179 or 1, not 178.
Code:
df = df%>% mutate(id = rep(1:(n()/2), each = 2)) %>%
group_by(id) %>%
summarize(across(date_received:permit,
~trimws(paste0(.x, collapse = " "))))
Data:
structure(list(date_received = c("Existing/Pool", "1/26/2021",
"Existing ", "2/3/2021", "Existing", "2/10/2021", "Abandonment",
"2/11/2021", "Holding Tank", "2/11/2021", "Existing/Additon",
"2/19/2021", "Abandonment", "2/23/2021", "Existing ", "3/2/2021",
"Existing", "3/2/2021", "Existing", "3/2/2021", "Existing/Pool",
"3/3/2021", "Abandonment", "2/11/2021", "Abandonment", "3/8/2021",
"Existing", "3/10/2021", "Abandonment", "3/10/2021", "Abandonment",
"3/11/2021", "Abandonment", "3/25/2021", "Abandonment", "3/30/2021",
"Abandonment", "4/6/2021", "Abandonment", "4/2/2021", "Abandonment",
"4/7/2021", "Abandonment", "4/16/2021", "Holding Tank", "4/19/2021",
"Existing/Pool", "4/21/2021", "Holding Tank", "4/26/2021", "Abandonment",
"4/27/2021", "Existing Addition", "4/1/2020", "Portables", "4/30/2021",
"Existing ", "5/17/2021", "Existing", "5/27/2021", "Existing",
"5/27/2021", "Abandonment ", "6/2/2021", "Existing", "6/4/2021",
"Abandonment", "6/9/2021", "Portables", "6/14/2021", "Pool Addition",
"6/28/2021", "Existing Addition", "6/28/20201", "Abandonment",
"6/29/2021", "Holding Tank", "6/21/2021", "Abandonment", "7/2/2021",
"Pool Addition", "7/6/2021", "Holding Tank", "7/9/2021", "Abandonment",
"7/15/2021", "Abandonment", "7/16/2021", "Holding Tank", "7/27/2021",
"Abandonment", "7/28/2021", "Holding Tank", "7/28/2021", "Existing",
"8/3/2021", "Abandonment", "8/3/2021", "Existing", "8/17/2021",
"Abandonment", "9/8/2021", "Abandonment", "9/14/2021", "Existing",
"8/19/2021", "Existing ", "9/20/2021", "Restroom", "9/20/2021",
"Holding Tank", "9/22/2021", "Existing", "10/7/2021", "Holding Tank",
"10/5/2021", "Existing", "10/8/2021", "Existing", "10/13/2021",
"Holding Tank", "10/15/2021", "Abandonment", "10/18/2021", "Abandonment",
"10/19/2021", "Holding Tank", "10/22/2021", "Abandonment", "10/25/2021",
"Holding Tank", "10/25/2021", "Restroom", "10/27/2021", "Abandonment",
"11/1/2021", "Restroom", "11/4/2021", "Abandonment", "11/4/2021",
"Existing", "6/14/2021", "Existing", "11/9/2021", "Holding Tank",
"11/10/2021", "Abandonment", "11/18/2021", "Abandonment", "11/18/2021",
"Abandonment", "11/18/2021", "Abandonment", "11/18/2021", "Existing",
"11/22/2021", "Abandonment ", "11/30/2021", "Abandonment ", "Abandonment ",
"11/30/2021", "Abandonment ", "11/30/2021", "Abandonment ", "11/30/2021",
"Abandonment ", "11/30/2021", "Abandonment ", "11/30/2021", "Abandonment ",
"11/17/2021", "Abandonment", "12/3/2021", "Abandonment", "12/3/2021",
"Abandonment", "12/13/2021", "Abandonment", "12/14/2021"), date_approved = c("HC",
"1/27/2021", "CE", "2/3/2021", "NC", "2/10/2021", "VV", "2/11/2021",
"SB", "2/18/2021", "VV", "2/19/2021", "HC", "2/23/2021", "CE",
"3/2/2021", "NC", "3/2/2021", "NC", "3/2/2021", "HC", "3/3/2021",
"", "2/19/2021", "", "3/8/2021", "NC", "3/10/2021", "NC", "3/10/2021",
"HC", "3/11/2021", "HC", "3/25/2021", "NC", "3/30/2021", "SB",
"4/6/2021", "NC", "4/2/2021", "NC", "4/7/2021", "HC", "4/19/2021",
"NC", "4/20/2021", "VV", "4/21/2021", "VV", "4/26/2021", "NC",
"4/27/2021", "SB", "4/28/2021", "CE", "5/3/2021", "", "5/17/2021",
"VV", "5/27/2021", "VV", "5/27/2021", "", "6/3/2021", "", "6/4/2021",
"", "6/9/2021", "NC", "6/14/2021", "SB", "6/28/2021", "", "6/28/2021",
"", "6/29/2021", "", "7/1/2021", "VV", "7/6/2021", "SB", "7/6/2021",
"VV", "7/9/2021", "SB", "7/15/2021", "HC", "7/16/2021", "NC",
"7/27/2021", "HC", "7/28/2021", "HC", "7/28/2021", "", "8/3/2021",
"", "8/4/2021", "", "8/17/2021", "SB", "9/8/2021", "NC", "9/14/2021",
"", "9/14/2021", "NC", "9/20/2021", "CE", "9/20/2021", "NC",
"9/22/2021", "", "10/7/2021", "", "10/7/2021", "", "10/8/2021",
"SB", "10/13/2021", "", "10/15/2021", "", "10/19/2021", "HC",
"10/19/2021", "HC", "10/22/2021", "", "10/25/2021", "", "10/25/2021",
"", "10/28/2021", "", "11/1/2021", "", "11/4/2021", "", "11/5/2021",
"", "11/9/2021", "", "11/9/2021", "", "", "", "11/18/2021", "",
"11/18/2021", "", "11/18/2021", "", "11/18/2021", "", "11/22/2021",
"", "11/30/2021", "", "", "11/30/2021", "", "11/30/2021", "",
"11/30/2021", "", "11/30/2021", "", "11/30/2021", "", "11/17/2021",
"", "12/3/2021", "", "12/3/2021", "", "12/3/2021", "", "12/15/2021"
), ap = c("", "AP-21-001", "", "AP-21-002", "", "AP-21-003",
"", "AP-21-004", "", "AP-21-005", "", "AP-21-006", "", "AP-21-007",
"", "AP-21-008", "", "AP-21-009", "", "AP-21-010", "", "AP-21-011",
"", "AP-21-012", "", "AP-21-013", "", "AP-21-014", "", "AP-21-015",
"", "AP-21-016", "", "AP-21-017", "", "AP-21-018", "", "AP-21-019",
"", "AP-21-020", "", "AP-21-021", "", "AP-21-022", "", "AP-21-023",
"", "AP-21-024", "", "AP-21-025", "", "AP-21-026", "", "AP-21-027",
"", "AP-21-028", "", "AP-21-029", "", "AP-21-030", "", "AP-21-031",
"", "AP-21-032", "", "AP-21-033", "", "AP-21-034", "", "AP-21-034",
"", "AP-21-035", "", "AP-21-036", "", "AP-21-037", "", "AP-21-038",
"", "AP-21-039", "", "AP-21-040", "", "AP-21-041", "", "AP-21-042",
"", "AP-21-043", "", "AP-21-044", "", "AP-21-045", "", "AP-21-046",
"", "AP-21-047", "", "AP-21-048", "", "AP-21-049", "", "AP-21-050",
"", "AP-21-051", "", "AP-21-052", "", "AP-21-053", "", "AP-21-054",
"", "AP-21-055", "", "AP-21-056", "", "AP-21-057", "", "AP-21-058",
"", "AP-21-059", "", "AP-21-060", "", "AP-21-061", "", "AP-21-062",
"", "AP-21-063", "", "AP-21-064", "", "AP-21-065", "", "AP-21-066",
"", "AP-21-067", "", "AP-21-068", "", "AP-21-069", "", "AP-21-070",
"", "AP-21-071", "", "AP-21-072", "", "AP-21-073", "", "AP-21-074",
"", "AP-21-075", "", "AP-21-076", "", "AP-21-077", "", "AP-21-078",
"", "", "AP-21-080", "", "AP-21-081", "", "AP-21-082", "", "AP-21-083",
"", "AP-21-084", "", "AP-21-085", "", "AP-21-086", "", "AP-21-087",
"", "AP-21-088", "", "AP-21-089"), permit = c("", "06-SE-2224986",
"", "06-SE-2227597", "", "06-SM-2239687", "", "06-", "", "06-SM-2241767",
"", "06-SE-2243142", "", "06-SE-2244287", "", "06-SE-2246427",
"", "06-SM-2246414", "", "06-SM-2246400", "", "06-SE-2247078",
"", "06-SM-2242491", "", "06-SM-2248390", "", "06-SM-2249588",
"", "06-SM-2249687", "", "06-SE-2250115", "", "06-SE-2254824",
"", "06-SM-2255976", "", "06-SM-2269208", "", "06-SM-2267853",
"", "06-SM-2269667", "", "06-SM-2273781", "", "06-SM-2273781",
"", "06-SE-2274109", "", "06-SE-2275335", "", "06-SM-2276119",
"", "06-SM-2045833", "", "06-SE-2278410", "", "06-SM-2292821",
"", "06-SE-2296438", "", "06-SE-", "", "06-SM-2308253", "", "06-SE-2308815",
"", "06-SM-2310625", "", "06-SM-2310987", "", "06-SM-2316223",
"", "", "", "06-SE-2316719", "", "06-SM-2327886", "", "06-SE-2328314",
"", "06-SM-2328437", "", "06-SE-2329714", "", "06-SM-2331651",
"", "06-SE-2332160", "", "06-SM-2335477", "", "06-SE-2335855",
"", "06-SE-2335957", "", "06-SE-2337766", "", "06-SE-2338114",
"", "06-SE-2351769", "", "06-SM-2368036", "", "06-SM-2369869",
"", "06-SE-23651769", "", "06-SM-2371532", "", "06-SE-2371710",
"", "06-SM-237238", "", "06-SM-2407385", "", "06-SM-2407543",
"", "06-SM-2407855", "", "06-SM-2409931", "", "06-SE-2410182",
"", "06-SE-2411071", "", "06-SE-2411268", "", "06-SE-2412725",
"", "06-SE-2413035", "", "06-SE-2413090", "", "06-SE-2413883",
"", "06-SM-2415394", "", "06-SE-2416802", "", "06-SE-2417359",
"", "06-SM-2408057", "", "06-SM-2418381", "", "06-SE-2418776",
"", "06-SM-2421477", "", "06-SM-2421473", "", "06-SM-", "", "06-SM-",
"", "06-SM-2410994", "Pool", "AP1766846", "Classroom E", "Classroom F",
"AP1766856", "Classroom C", "AP1766858", "Classroom A", "AP1766862",
"Landscape Area", "AP1766864", "Classroom B", "AO1766867", "",
"06-SE-2420566", "", "06-E-2425187", "", "", "", "06-SM-2424110",
"", "06-SE-2428789")), class = "data.frame", row.names = c(NA,
-179L))
CodePudding user response:
To assign the 'id', you can increase the count of rows by 1, replicate each number by twice and then select the first n
.
df%>% mutate(id = rep(1:((n() 1)/2), each = 2)[1:n()]) %>% group_by(id) %>%
summarize(across(date_received:permit,
~trimws(paste0(.x, collapse = " "))))
Result:
# A tibble: 90 x 5
id date_received date_approved ap permit
<int> <chr> <chr> <chr> <chr>
1 1 Existing/Pool 1/26/2021 HC 1/27/2021 AP-21-001 06-SE-2224986
2 2 Existing 2/3/2021 CE 2/3/2021 AP-21-002 06-SE-2227597
3 3 Existing 2/10/2021 NC 2/10/2021 AP-21-003 06-SM-2239687
4 4 Abandonment 2/11/2021 VV 2/11/2021 AP-21-004 06-
5 5 Holding Tank 2/11/2021 SB 2/18/2021 AP-21-005 06-SM-2241767
6 6 Existing/Additon 2/19/2021 VV 2/19/2021 AP-21-006 06-SE-2243142
7 7 Abandonment 2/23/2021 HC 2/23/2021 AP-21-007 06-SE-2244287
8 8 Existing 3/2/2021 CE 3/2/2021 AP-21-008 06-SE-2246427
9 9 Existing 3/2/2021 NC 3/2/2021 AP-21-009 06-SM-2246414
10 10 Existing 3/2/2021 NC 3/2/2021 AP-21-010 06-SM-2246400
# ... with 80 more rows
CodePudding user response:
Use group_by(id = (1:n() 1) %/% 2)
:
library(dplyr)
df %>%
group_by(id = (1:n() 1) %/% 2) %>%
summarize(across(date_received:permit,
~ trimws(paste0(.x, collapse = " "))))
# # A tibble: 90 × 5
# id date_received date_approved ap permit
# <dbl> <chr> <chr> <chr> <chr>
# 1 1 Existing/Pool 1/26/2021 HC 1/27/2021 AP-21-001 06-SE-2224986
# 2 2 Existing 2/3/2021 CE 2/3/2021 AP-21-002 06-SE-2227597
# 3 3 Existing 2/10/2021 NC 2/10/2021 AP-21-003 06-SM-2239687
# 4 4 Abandonment 2/11/2021 VV 2/11/2021 AP-21-004 06-
# 5 5 Holding Tank 2/11/2021 SB 2/18/2021 AP-21-005 06-SM-2241767
# 6 6 Existing/Additon 2/19/2021 VV 2/19/2021 AP-21-006 06-SE-2243142
# 7 7 Abandonment 2/23/2021 HC 2/23/2021 AP-21-007 06-SE-2244287
# 8 8 Existing 3/2/2021 CE 3/2/2021 AP-21-008 06-SE-2246427
# 9 9 Existing 3/2/2021 NC 3/2/2021 AP-21-009 06-SM-2246414
# 10 10 Existing 3/2/2021 NC 3/2/2021 AP-21-010 06-SM-2246400
# # … with 80 more rows
%/%
indicates integer division. If n()
is 15, the grouping variable will gives
(1:15 1) %/% 2
# [1] 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8