Home > Software engineering >  Creating a column with value of other columns in R
Creating a column with value of other columns in R

Time:12-06

My subset of my dataset looks as follows (the full dataset has all the countries of the world):

structure(list(Country = c("Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", 
"Afghanistan", "Afghanistan"), CountryCode = c("AFG", "AFG", 
"AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"), Time = c(1996, 
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005), `Time Code` = c("YR1996", 
"YR1997", "YR1998", "YR1999", "YR2000", "YR2001", "YR2002", "YR2003", 
"YR2004", "YR2005"), GDPpc_growth = c(NA, NA, NA, NA, NA, NA, 
NA, 3.86838029515866, -2.87520316702623, 7.20796721836321), GDP_pc = c(NA, 
NA, NA, NA, NA, NA, 1189.78466765718, 1235.81006329565, 1200.27801321734, 
1286.79365893927), Pgrowth = c(4.0194777158615, 2.63650176396731, 
1.9473438616857, 2.17042851112236, 2.97505722281038, 3.90280496415438, 
4.4967187466326, 4.66834379545461, 4.32155951673842, 3.68269988149014
), Gross_savings = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    Inflation = c(NA, NA, NA, NA, NA, NA, NA, 11.655238211175, 
    11.2714320712639, 10.9127735539374), Unemployment = c(10.9619998931885, 
    10.7829999923706, 10.8020000457764, 10.8090000152588, 10.8059997558594, 
    10.8090000152588, 11.2569999694824, 11.1409997940063, 10.9879999160767, 
    11.2170000076294), Crime = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), Health = c(NA, NA, NA, NA, NA, NA, 0.08418062, 
    0.65096337, 0.5429256, 0.5291841), Health_new = c(NA, NA, 
    NA, NA, NA, NA, 1.21245611, 5.45767879, 3.60296822, 3.37097836
    ), CO2 = c(1180, 1100, 1040, 810, 760, 730, 1029.99997138977, 
    1220.00002861023, 1029.99997138977, 1549.99995231628), `Debt (WorldBank)` = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), `Debt (IMF)` = c(NA, NA, NA, 
    NA, NA, NA, 345.97748, 270.60236, 244.96669, 206.35601), 
    Politics = c(-1.94518780708313, NA, -1.9237864613533, NA, 
    -1.96282829840978, NA, -1.63204962015152, -1.4781574010849, 
    -1.49412107467651, -1.52730602025986), Migration = c(27.194, 
    6.129, 35.74, 85.758, -1007.135, -192.286, 1327.074, 388.632, 
    -248.616, 252.185), GDPpc_log = c(NA, NA, NA, NA, NA, NA, 
    7.08152761818328, 7.11948195573634, 7.09030848662408, 7.15990886757784
    ), initial_year = c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), GDP_1996_log = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_), Unemployment_log = c(2.39443473688013, 
    2.3779708191924, 2.37973130640847, 2.38037912184574, 2.38010151282881, 
    2.38037912184574, 2.42099015466168, 2.41063197858748, 2.3968037605953, 
    2.41743048534325), Crime_log = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), Health_new_log = c(NA, NA, NA, NA, NA, NA, 0.192648145236196, 
    1.69702356932679, 1.28175801129963, 1.21520301677122), CO2_log = c(7.07326971745971, 
    7.00306545878646, 6.94697599213542, 6.69703424766648, 6.63331843328038, 
    6.59304453414244, 6.93731405344676, 7.10660616117831, 6.93731405344676, 
    7.3460101791496), Migration_5 = c(NA, NA, NA, NA, NA, 27.194, 
    6.129, 35.74, 85.758, -1007.135), initial_debt = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), initial_debt_log = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), debt_2001 = c(2001, 2001, 
    2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001)), row.names = c(NA, 
-10L), groups = structure(list(Country = "Afghanistan", .rows = structure(list(
    1:10), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", 
"list"))), row.names = c(NA, -1L), class = c("tbl_df", "tbl", 
"data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"))

I would like to create a new column which shows the debt of only the year 2001 for every country. By this I mean that the first 26 rows would show the debt value of Afghanistan between 1996 and 2021 and the next 26 would show the next country.

I did this for the initial debt of 1996 but I do not know how to change this such that I would get what I want.

dataset5$initial_debt <- c(rep(1996, dim(dataset5)[1]))
i <- 0
for(country in c(unique(dataset5$Country))){ # initial debt becomes the new programmed initial debt level
  i <- i   1
  initial_debt_country <- min(dataset5[which(dataset5$Country == country),3]) # minimizes and selects the year which is 1996
  initial_value <- dataset5[which(dataset5$Time == initial_debt_country)[i], 16] # gives the debt value of 1996
  dataset5$initial_debt <- replace(dataset5$initial_debt, which(dataset5$Country == country), initial_value)
}

Is there another solution to my problem?

I hope that I have explained my question clear enough.

CodePudding user response:

Just group by Country, and assign your column of interest to the debt (IMF) value for which Time==2001.

library(dplyr)
df %>% group_by(Country) %>% mutate(debt_2001 = `debt (IMF)`[Time==2001])
  • Related