I have this dataframe with two columns, one index and one date.
I would like to add another column of sequences, the column will contain 20 x "C" and 10 x "Fo" for each index, if an index has more or fewer rows, that sequence will be limited or extended while maintaining the periodicity.
My daset
df = structure(list(Index = c(4885L, 4885L, 4885L, 4885L, 4885L, 4885L,
4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L,
4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L,
4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 4885L, 5109L, 5109L,
5109L, 5109L, 5109L, 5109L, 5109L, 5693L, 5693L, 5693L, 5693L,
5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L,
5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L,
5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L, 5693L,
5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L,
5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L,
5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L, 5986L,
5986L, 5986L, 5986L, 5986L), date = structure(c(18892, 18896,
18900, 18904, 18908, 18912, 18916, 18920, 18924, 18928, 18932,
18936, 18940, 18944, 18948, 18952, 18956, 18960, 18964, 18968,
18972, 18976, 18980, 18984, 18988, 18992, 18996, 19000, 19004,
19008, 19012, 18893, 18897, 18901, 18905, 18909, 18913, 18917,
18891, 18895, 18899, 18903, 18907, 18911, 18915, 18919, 18923,
18927, 18931, 18935, 18939, 18943, 18947, 18951, 18955, 18959,
18963, 18967, 18971, 18975, 18979, 18983, 18987, 18991, 18995,
18999, 19003, 19007, 19011, 18892, 18896, 18900, 18904, 18908,
18912, 18916, 18920, 18924, 18928, 18932, 18936, 18940, 18944,
18948, 18952, 18956, 18960, 18964, 18968, 18972, 18976, 18980,
18984, 18988, 18992, 18996, 19000, 19004, 19008, 19012), class = "Date")), row.names = c(560L,
564L, 568L, 572L, 576L, 580L, 584L, 588L, 592L, 596L, 600L, 604L,
608L, 612L, 616L, 620L, 624L, 628L, 632L, 636L, 640L, 644L, 648L,
652L, 656L, 660L, 664L, 668L, 672L, 676L, 680L, 957L, 961L, 965L,
969L, 973L, 977L, 981L, 1842L, 1846L, 1850L, 1854L, 1858L, 1862L,
1866L, 1870L, 1874L, 1878L, 1882L, 1886L, 1890L, 1894L, 1898L,
1902L, 1906L, 1910L, 1914L, 1918L, 1922L, 1926L, 1930L, 1934L,
1938L, 1942L, 1946L, 1950L, 1954L, 1958L, 1962L, 2827L, 2831L,
2835L, 2839L, 2843L, 2847L, 2851L, 2855L, 2859L, 2863L, 2867L,
2871L, 2875L, 2879L, 2883L, 2887L, 2891L, 2895L, 2899L, 2903L,
2907L, 2911L, 2915L, 2919L, 2923L, 2927L, 2931L, 2935L, 2939L,
2943L, 2947L), class = "data.frame")
Plan = c(rep("C",20), rep("FO",20))
Any suggestion?
CodePudding user response:
We can use rep
after grouping by 'Index'
library(dplyr)
df <- df %>%
group_by(Index) %>%
mutate(Seq = rep(rep(c("C", "Fo"), c(20, 10)), length.out = n())) %>%
ungroup
-output
df
# A tibble: 100 × 3
Index date Seq
<int> <date> <chr>
1 4885 2021-09-22 C
2 4885 2021-09-26 C
3 4885 2021-09-30 C
4 4885 2021-10-04 C
5 4885 2021-10-08 C
6 4885 2021-10-12 C
7 4885 2021-10-16 C
8 4885 2021-10-20 C
9 4885 2021-10-24 C
10 4885 2021-10-28 C
# … with 90 more rows