I have a dataframe that looks like this:
structure(list(date = c("01dec2013", "01jul2003", "01nov2008",
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung",
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung",
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com",
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com",
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45,
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L,
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA),
est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar",
"China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)",
"Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid",
"Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified",
"Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid",
"Paris", "Not Specified"), country = c("Algeria", "Algeria",
"Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar",
"Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified",
"Not Specified", "Not Specified", "Not Specified", "Not Specified",
"Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane",
"Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "",
"", "", "", ""), relocation = c("", "", "", "", "", ""),
sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas",
"Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products",
"Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction",
"Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction",
"Energy", "Energy", "Energy", "Life sciences", "Life sciences"
), activity = c("Manufacturing", "Extraction", "Extraction",
"Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L,
145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes",
"Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7,
29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No",
"No"), fdi_type = c("New", "New", "New", "Expansion", "New",
"New"), fdi_status = c("Announced", "Announced", "Announced",
"Opened", "Announced", "Opened"), year = c(2013L, 2003L,
2008L, 2017L, 2017L, 2003L), code_d = c("012", "012", "012",
"012", "012", "012"), income_d = c("MIDLW", "MIDLW", "MIDLW",
"MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa", "Africa",
"Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic",
"Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L,
0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA",
"DZA", "DZA"), isic = c("26", "11", "11", "11", "24", "85"
), isic4 = c(2695, 1110, 1110, 1110, 2411, 8519), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying",
"Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying")), datalabel = "", time.stamp = "24 May 2021 12:23", formats = c("1s",
"5s", "5s", "5s", ".0g", ".0g", ".0g", ".0g",
"(s", "Rs", "Ts", "1s", "Ds", "Qs", "Is", "Ss",
"ps", "(s", ")s", "