My subset of my dataset looks as follows (the full dataset has all the countries of the world):
structure(list(Country = c("Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",
"Afghanistan", "Afghanistan"), CountryCode = c("AFG", "AFG",
"AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"), Time = c(1996,
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005), `Time Code` = c("YR1996",
"YR1997", "YR1998", "YR1999", "YR2000", "YR2001", "YR2002", "YR2003",
"YR2004", "YR2005"), GDPpc_growth = c(NA, NA, NA, NA, NA, NA,
NA, 3.86838029515866, -2.87520316702623, 7.20796721836321), GDP_pc = c(NA,
NA, NA, NA, NA, NA, 1189.78466765718, 1235.81006329565, 1200.27801321734,
1286.79365893927), Pgrowth = c(4.0194777158615, 2.63650176396731,
1.9473438616857, 2.17042851112236, 2.97505722281038, 3.90280496415438,
4.4967187466326, 4.66834379545461, 4.32155951673842, 3.68269988149014
), Gross_savings = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_),
Inflation = c(NA, NA, NA, NA, NA, NA, NA, 11.655238211175,
11.2714320712639, 10.9127735539374), Unemployment = c(10.9619998931885,
10.7829999923706, 10.8020000457764, 10.8090000152588, 10.8059997558594,
10.8090000152588, 11.2569999694824, 11.1409997940063, 10.9879999160767,
11.2170000076294), Crime = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Health = c(NA, NA, NA, NA, NA, NA, 0.08418062,
0.65096337, 0.5429256, 0.5291841), Health_new = c(NA, NA,
NA, NA, NA, NA, 1.21245611, 5.45767879, 3.60296822, 3.37097836
), CO2 = c(1180, 1100, 1040, 810, 760, 730, 1029.99997138977,
1220.00002861023, 1029.99997138977, 1549.99995231628), `Debt (WorldBank)` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), `Debt (IMF)` = c(NA, NA, NA,
NA, NA, NA, 345.97748, 270.60236, 244.96669, 206.35601),
Politics = c(-1.94518780708313, NA, -1.9237864613533, NA,
-1.96282829840978, NA, -1.63204962015152, -1.4781574010849,
-1.49412107467651, -1.52730602025986), Migration = c(27.194,
6.129, 35.74, 85.758, -1007.135, -192.286, 1327.074, 388.632,
-248.616, 252.185), GDPpc_log = c(NA, NA, NA, NA, NA, NA,
7.08152761818328, 7.11948195573634, 7.09030848662408, 7.15990886757784
), initial_year = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), GDP_1996_log = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), Unemployment_log = c(2.39443473688013,
2.3779708191924, 2.37973130640847, 2.38037912184574, 2.38010151282881,
2.38037912184574, 2.42099015466168, 2.41063197858748, 2.3968037605953,
2.41743048534325), Crime_log = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), Health_new_log = c(NA, NA, NA, NA, NA, NA, 0.192648145236196,
1.69702356932679, 1.28175801129963, 1.21520301677122), CO2_log = c(7.07326971745971,
7.00306545878646, 6.94697599213542, 6.69703424766648, 6.63331843328038,
6.59304453414244, 6.93731405344676, 7.10660616117831, 6.93731405344676,
7.3460101791496), Migration_5 = c(NA, NA, NA, NA, NA, 27.194,
6.129, 35.74, 85.758, -1007.135), initial_debt = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), initial_debt_log = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), debt_2001 = c(2001, 2001,
2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001)), row.names = c(NA,
-10L), groups = structure(list(Country = "Afghanistan", .rows = structure(list(
1:10), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr",
"list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df",
"tbl", "data.frame"))
I would like to create a new column which shows the debt of only the year 2001 for every country. By this I mean that the first 26 rows would show the debt value of Afghanistan between 1996 and 2021 and the next 26 would show the next country.
I did this for the initial debt of 1996 but I do not know how to change this such that I would get what I want.
dataset5$initial_debt <- c(rep(1996, dim(dataset5)[1]))
i <- 0
for(country in c(unique(dataset5$Country))){ # initial debt becomes the new programmed initial debt level
i <- i 1
initial_debt_country <- min(dataset5[which(dataset5$Country == country),3]) # minimizes and selects the year which is 1996
initial_value <- dataset5[which(dataset5$Time == initial_debt_country)[i], 16] # gives the debt value of 1996
dataset5$initial_debt <- replace(dataset5$initial_debt, which(dataset5$Country == country), initial_value)
}
Is there another solution to my problem?
I hope that I have explained my question clear enough.
CodePudding user response:
Just group by Country, and assign your column of interest to the debt (IMF)
value for which Time==2001
.
library(dplyr)
df %>% group_by(Country) %>% mutate(debt_2001 = `debt (IMF)`[Time==2001])