I would like to add a column to my dataset that assigns to each date a number based on the week it is in
So I would have for day1 day2... etc day7 a value in the column for the days part of that week equal to 1, and for day8, day 9 etc... till day 14 a value equal to 2
what would be the best way to add that column ?
dput(head(sdata0))
structure(list(date = structure(c(18628, 18629, 18630, 18631,
18632, 18633), class = "Date"), launches = c(-0.423325435196192,
-0.95406180171082, -0.95406180171082, -0.95406180171082, 0.107410931318437,
-0.423325435196192), pledged = c(-0.242997575062835, -0.300759417946595,
-0.300759417946595, -0.300759417946595, 0.120035260531115, -0.103075942164302
), backers = c(-0.124417670254619, -0.269239525943361, -0.269239525943361,
-0.269239525943361, 0.0620404689446357, -0.0918327527246523),
total_goal = c(-0.314834573033319, -0.33600837985916, -0.33600837985916,
-0.33600837985916, -0.205436571099805, -0.283073862794557
), mean_goal = c(-0.350195946618206, -0.422316295398803,
-0.422316295398803, -0.422316295398803, -0.199945219991962,
-0.24201542344731), US = c(0.179454667531907, -0.720497098001238,
-0.720497098001238, -0.720497098001238, 0.179454667531907,
-0.720497098001238), `number of success` = c(0.23782061224498,
-0.594551530612449, -0.594551530612449, -0.594551530612449,
1.07019275510241, 0.23782061224498), duration_days = c(-0.0399540270332042,
-1.6958261375219, -1.6958261375219, -1.6958261375219, 0.0152417099830856,
-0.0399540270332042), Twitter = c(-2.35635395414648, -1.37949565613006,
-2.47410026685382, -1.21813959797556, -0.995729896195041,
-1.226861547065), replies = c(-1.11872430995012, -0.454408610464075,
-1.06845177052955, -0.874543404193084, -1.24799655417443,
-0.906861465249162), likes = c(-0.812127568832484, -0.63113030668481,
-1.40968119485432, -1.1127549475184, -1.2106558412922, -1.22498280135666
), retweets = c(-0.606241425199139, -0.766152931679175, -1.64441036779204,
-1.39868247694445, -1.31077301003134, -1.3509601949059),
group_date = c("01", "01", "01", "01", "01", "01")), row.names = c(NA,
6L), class = "data.frame")`
CodePudding user response:
You can use the function week
from lubridate
like this:
library(dplyr)
library(lubridate)
sdata0 %>%
mutate(week_number = week(ymd(date)))
#> date launches pledged backers total_goal mean_goal US
#> 1 2021-01-01 -0.4233254 -0.2429976 -0.12441767 -0.3148346 -0.3501959 0.1794547
#> 2 2021-01-02 -0.9540618 -0.3007594 -0.26923953 -0.3360084 -0.4223163 -0.7204971
#> 3 2021-01-03 -0.9540618 -0.3007594 -0.26923953 -0.3360084 -0.4223163 -0.7204971
#> 4 2021-01-04 -0.9540618 -0.3007594 -0.26923953 -0.3360084 -0.4223163 -0.7204971
#> 5 2021-01-05 0.1074109 0.1200353 0.06204047 -0.2054366 -0.1999452 0.1794547
#> 6 2021-01-06 -0.4233254 -0.1030759 -0.09183275 -0.2830739 -0.2420154 -0.7204971
#> number of success duration_days Twitter replies likes retweets
#> 1 0.2378206 -0.03995403 -2.3563540 -1.1187243 -0.8121276 -0.6062414
#> 2 -0.5945515 -1.69582614 -1.3794957 -0.4544086 -0.6311303 -0.7661529
#> 3 -0.5945515 -1.69582614 -2.4741003 -1.0684518 -1.4096812 -1.6444104
#> 4 -0.5945515 -1.69582614 -1.2181396 -0.8745434 -1.1127549 -1.3986825
#> 5 1.0701928 0.01524171 -0.9957299 -1.2479966 -1.2106558 -1.3107730
#> 6 0.2378206 -0.03995403 -1.2268615 -0.9068615 -1.2249828 -1.3509602
#> group_date week_number
#> 1 01 1
#> 2 01 1
#> 3 01 1
#> 4 01 1
#> 5 01 1
#> 6 01 1
Created on 2022-07-30 by the reprex package (v2.0.1)
CodePudding user response:
Base R approach without any dependencies:
sdata0["week_number"] <- sdata0["date"] |> format("%V")
sdata0["week_number"]
#> week_number
#> 1 53
#> 2 53
#> 3 53
#> 4 01
#> 5 01
#> 6 01
Have also a look at %U
and %W
in ?strptime
if you need week numbers following US/UK conventions instead of ISO 8601.