I try to create a data.frame
which shows the reduction between all time series columns for each row in R. I only want to capture the negative difference (reduction) and set the positive difference to zero.
To illustrate it, I created df1
as the starting point and I try to achieve df2
.
library(data.table)
df1 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"2018" = c(3,5,11,3,9,22,6),
"2019" = c(3,5,6,21,1,4,0),
"2020" = c(0,4,13,9,16,7,9),
"2021" = c(4,0,3,8,5,4,6),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
ID string1 2018 2019 2020 2021 string2
1: a1 x2 3 3 0 4 si
2: a2 g3 5 5 4 0 q2
3: a3 n2 11 6 13 3 oq
4: a4 m3 3 21 9 8 mx
5: a5 2w 9 1 16 5 ix
6: a6 ps2 22 4 7 4 p2
7: a7 kg2 6 0 9 6 2q
df2 = data.table(
ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7"),
"string1" = c("x2", "g3", "n2", "m3", "2w", "ps2", "kg2"),
"2018" = c(0,0,0,0,0,0,0),
"2019" = c(0,0,5,0,8,18,6),
"2020" = c(3,1,0,12,0,0,0),
"2021" = c(0,4,10,1,11,3,3),
"string2" = c("si", "q2", "oq", "mx", "ix", "p2", "2q"))
ID string1 2018 2019 2020 2021 string2
1: a1 x2 0 0 3 0 si
2: a2 g3 0 0 1 4 q2
3: a3 n2 0 5 0 10 oq
4: a4 m3 0 0 12 1 mx
5: a5 2w 0 8 0 11 ix
6: a6 ps2 0 18 0 3 p2
7: a7 kg2 0 6 0 3 2q
Is there a more efficient way than iterating with a for loop?
Thanks a lot!
CodePudding user response:
df1.melt <- melt(df1, measure.vars = patterns("^[0-9]{4}$"),
variable.factor = FALSE, variable.name = "year")
setkey(df1.melt, ID, year)
df1.melt[, val.diff := value - shift(value, type = "lag", fill = value[1]), by = .(ID)]
df1.melt[val.diff > 0, val.diff := 0]
df1.melt[val.diff < 0, val.diff := abs(val.diff)]
dcast(df1.melt, ID string1 string2 ~ year, value.var = "val.diff")
# ID string1 string2 2018 2019 2020 2021
# 1: a1 x2 si 0 0 3 0
# 2: a2 g3 q2 0 0 1 4
# 3: a3 n2 oq 0 5 0 10
# 4: a4 m3 mx 0 0 12 1
# 5: a5 2w ix 0 8 0 11
# 6: a6 ps2 p2 0 18 0 3
# 7: a7 kg2 2q 0 6 0 3
CodePudding user response:
library(tidyr)
library(dplyr)
spec <-
df1 %>%
build_longer_spec(matches("\\d{4}"),
names_to = "year", values_to = "value")
df1 %>%
pivot_longer_spec(spec) %>%
group_by(ID) %>%
arrange(ID, year) %>%
mutate(value = {
diff = c(0, - diff(value))
diff * (diff > 0)
}) %>%
pivot_wider_spec(spec)
#> # A tibble: 7 x 7
#> # Groups: ID [7]
#> ID string1 string2 `2018` `2019` `2020` `2021`
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 a1 x2 si 0 0 3 0
#> 2 a2 g3 q2 0 0 1 4
#> 3 a3 n2 oq 0 5 0 10
#> 4 a4 m3 mx 0 0 12 1
#> 5 a5 2w ix 0 8 0 11
#> 6 a6 ps2 p2 0 18 0 3
#> 7 a7 kg2 2q 0 6 0 3
CodePudding user response:
A bit shorter solution with data.table
:
df1[
,
c(4:6) := df1[, 4:6] - df1[, 3:5]
][
,
c(3:6) := lapply(.SD, \(x) { ifelse(x > 0, 0, abs(x)) })
,
.SDcols = c(3:6)
][]
# ID string1 2018 2019 2020 2021 string2
# 1: a1 x2 0 0 3 0 si
# 2: a2 g3 0 0 1 4 q2
# 3: a3 n2 0 5 0 10 oq
# 4: a4 m3 0 0 12 1 mx
# 5: a5 2w 0 8 0 11 ix
# 6: a6 ps2 0 18 0 3 p2
# 7: a7 kg2 0 6 0 3 2q