Home > Back-end >  Unexpected Result with Left Join in R
Unexpected Result with Left Join in R

Time:09-26

I have 2 databases obviously. For the first database, I've already modified it to display how I want it to. The output is simply the country codes and the country. But what I'm expecting is a new column, matching the government type to the countries that I've already displayed in my first data frame using pivot_wider.

library('WDI')
gdpDF2 <- (dat = WDI(indicator='NY.GDP.PCAP.KD', country=c('DZ','GU','NE', 'PR', 'PT'), start=2000, end=2016))
gdpDF2  %>%
    pivot_wider(
        names_from = year,
        values_from = NY.GDP.PCAP.KD,
        names_prefix = 'gdp.',
        id_cols=c(iso2c, country)
    )

For the second database, I'm trying to add another column that shows a new element of data, only matched to the 5 countries that I've selected (the second database has more than 5 countries).

Here's what I tried for the join:

gov_type <- read.csv(newdata.csv)

gdpDF2 <- gov_type %>%
    left_join(gov_type, by = "Country") %>%
    select(country)

*Note: "country" in the second dataframe is all lowercase, and "Country" in the first data frame is sentence case.

Appreciate any help on how to fix this or any links to some good tutorials that go deeper than showing simple data sets.

*Screenshot is attached of the last column I'm trying to addenter image description here

Dput for the first data set:

structure(list(iso2c = c("DZ", "GU", "NE", "PR", "PT"), country = c("Algeria", 
"Guam", "Niger", "Puerto Rico", "Portugal"), gdp.2016 = structure(c(4224.03726253056, 
35652.804761085, 492.563968122591, 29961.7528224649, 19700.9100016775
), label = "GDP per capita (constant 2015 US$)"), gdp.2015 = structure(c(4177.8895415169, 
35829.2503598989, 484.153137350801, 29763.4883013861, 19250.1065376852
), label = "GDP per capita (constant 2015 US$)"), gdp.2014 = structure(c(4112.0760239586, 
35722.1143432164, 482.136224762006, 29554.6244202874, 18833.0518281692
), label = "GDP per capita (constant 2015 US$)"), gdp.2013 = structure(c(4042.92363833722, 
35222.3885175896, 470.086463162402, 29426.1576026315, 18584.5540451703
), label = "GDP per capita (constant 2015 US$)"), gdp.2012 = structure(c(4012.36150560736, 
34728.8937132729, 464.147736697598, 29180.4121304848, 18654.9575281944
), label = "GDP per capita (constant 2015 US$)"), gdp.2011 = structure(c(3956.8958150634, 
34067.1937684541, 436.549667995126, 28821.0227981922, 19365.1816488681
), label = "GDP per capita (constant 2015 US$)"), gdp.2010 = structure(c(3918.48641960904, 
34094.348815539, 443.35126539427, 28592.1219724064, 19670.3610686333
), label = "GDP per capita (constant 2015 US$)"), gdp.2009 = structure(c(3851.21377724014, 
33383.1268615286, 424.326789418745, 28565.8122674875, 19343.2800499088
), label = "GDP per capita (constant 2015 US$)"), gdp.2008 = structure(c(3856.40765950165, 
33278.0733411376, 432.319391275196, 28976.1760245988, 19985.6983319711
), label = "GDP per capita (constant 2015 US$)"), gdp.2007 = structure(c(3828.1484762911, 
32653.3078657691, 416.746825011148, 29347.8915630732, 19950.8440966199
), label = "GDP per capita (constant 2015 US$)"), gdp.2006 = structure(c(3760.15536841842, 
32461.448494029, 419.515783828859, 29519.7845120471, 19501.2330070103
), label = "GDP per capita (constant 2015 US$)"), gdp.2005 = structure(c(3752.09616063379, 
33743.741405003, 411.127549538442, 29815.2627072972, 19224.0346513314
), label = "GDP per capita (constant 2015 US$)"), gdp.2004 = structure(c(3592.64530105789, 
32622.1336880318, 397.592403828298, 30375.8367359555, 19110.3208431632
), label = "GDP per capita (constant 2015 US$)"), gdp.2003 = structure(c(3490.22324716767, 
30704.953260077, 411.120617079398, 27937.8599606904, 18819.4437376393
), label = "GDP per capita (constant 2015 US$)"), gdp.2002 = structure(c(3297.61281111481, 
30543.8238259879, 417.504598917128, 27940.4259935603, 19067.6554332118
), label = "GDP per capita (constant 2015 US$)"), gdp.2001 = structure(c(3162.81150705065, 
NA, 412.785384537547, 27722.4102313948, 19025.6956905388), label = "GDP per capita (constant 2015 US$)"), 
    gdp.2000 = structure(c(3111.17616898069, NA, 399.072792636895, 
    26132.0036445504, 18795.0311479834), label = "GDP per capita (constant 2015 US$)")), row.names = c(NA, 
-5L), class = c("tbl_df", "tbl", "data.frame"))

Dput for the second dataset:

structure(list(Country = c("Afghanistan", "Albania", "Algeria", 
"Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia", 
"Aruba", "Australia", "Austria", "Azerbaijan", "Bahamas, The", 
"Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", 
"Benin", "Bermuda", "Bhutan", "Bolivia", "Bosnia and Herzegovina", 
"Botswana", "Brazil", "Brunei", "Bulgaria", "Burkina Faso", "Burma", 
"Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands", 
"Central African Republic", "Chad", "Chile", "China", "Colombia", 
"Comoros", "Congo, Democratic Republic of the", "Congo, Republic of the", 
"Cook Islands", "Costa Rica", "Cote d'Ivoire", "Croatia", "Cuba", 
"Curacao", "Cyprus", "Czech Republic", "Denmark", "Djibouti", 
"Dominica", "Dominican Republic", "Ecuador", "Egypt", "El Salvador", 
"Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Fiji", 
"Finland", "France", "French Polynesia", "Gabon", "Gambia, The", 
"Georgia", "Germany", "Ghana", "Greece", "Greenland", "Grenada", 
"Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana", 
"Haiti", "Holy See (Vatican City)", "Honduras", "Hong Kong", 
"Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", 
"Isle of Man", "Israel", "Italy", "Jamaica", "Japan", "Jersey", 
"Jordan", "Kazakhstan", "Kenya", "Kiribati", "Korea, North", 
"Korea, South", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", 
"Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", 
"Luxembourg", "Macau", "Macedonia", "Madagascar", "Malawi", "Malaysia", 
"Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", 
"Mauritius", "Mexico", "Micronesia, Federated States of", "Moldova", 
"Monaco", "Mongolia", "Montenegro", "Morocco", "Mozambique", 
"Namibia", "Nauru", "Nepal", "Netherlands", "New Caledonia", 
"New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "Northern Mariana Islands", 
"Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", 
"Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Puerto Rico", 
"Qatar", "Romania", "Russia", "Rwanda", "Saint Kitts and Nevis", 
"Saint Lucia", "Saint Pierre and Miquelon", "Saint Vincent and the Grenadines", 
"Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", 
"Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", 
"Sint Maarten", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", 
"South Africa", "South Sudan", "Spain", "Sri Lanka", "Sudan", 
"Suriname", "Swaziland", "Sweden", "Switzerland", "Syria", "Taiwan", 
"Tajikistan", "Tanzania", "Thailand", "Timor-Leste", "Togo", 
"Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", 
"Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", 
"United States", "Uruguay", "Uzbekistan", "Vanuatu", "Venezuela", 
"Vietnam", "Wallis and Futuna", "Yemen", "Zambia", "Zimbabwe"
), GovernmentType = c("Islamic republic", "parliamentary democracy", 
"republic", "parliamentary democracy", "republic; multiparty presidential regime", 
"constitutional monarchy with a parliamentary system of government and a Commonwealth realm", 
"republic", "republic", "parliamentary democracy", "federal parliamentary democracy and a Commonwealth realm", 
"federal republic", "republic", "constitutional parliamentary democracy and a Commonwealth realm", 
"constitutional monarchy", "parliamentary democracy", "parliamentary democracy and a Commonwealth realm", 
"republic in name, although in fact a dictatorship", "federal parliamentary democracy under a constitutional monarchy", 
"parliamentary democracy and a Commonwealth realm", "republic", 
"parliamentary; self-governing territory", "constitutional monarchy", 
"republic; note - the new constitution defines Bolivia as a \\Social Unitarian State\\", 
"emerging federal democratic republic", "parliamentary republic", 
"federal republic", "constitutional sultanate (locally known as Malay Islamic Monarchy)", 
"parliamentary democracy", "parliamentary republic", "parliamentary government took power in March 2011", 
"republic", "multiparty democracy under a constitutional monarchy", 
"republic; multiparty presidential regime", "a parliamentary democracy, a federation, and a constitutional monarchy", 
"republic", "parliamentary democracy", "republic", "republic", 
"republic", "Communist state", "republic; executive branch dominates government structure", 
"republic", "republic", "republic", "self-governing parliamentary democracy", 
"democratic republic", "republic; multiparty presidential regime established 1960", 
"parliamentary democracy", "Communist state", "parliamentary", 
"republic\xa0", "parliamentary democracy", "constitutional monarchy", 
"republic", "parliamentary democracy", "democratic republic", 
"republic", "republic", "republic", "republic", "transitional government\xa0", 
"parliamentary republic", "federal republic", "republic", "republic", 
"republic", "parliamentary representative democratic French overseas collectivity", 
"republic; multiparty presidential regime", "republic", "republic", 
"federal republic", "constitutional democracy", "parliamentary republic", 
"parliamentary democracy within a constitutional monarchy", "parliamentary democracy and a Commonwealth realm", 
"constitutional democratic republic", "parliamentary democracy", 
"republic", "republic", "republic", "republic", "ecclesiastical", 
"democratic constitutional republic", "limited democracy", "parliamentary democracy", 
"constitutional republic", "federal republic", "republic", "theocratic republic", 
"parliamentary democracy", "republic, parliamentary democracy", 
"parliamentary democracy", "parliamentary democracy", "republic", 
"constitutional parliamentary democracy and a Commonwealth realm", 
"a parliamentary government with a constitutional monarchy", 
"parliamentary democracy", "constitutional monarchy", "republic; authoritarian presidential rule, with little power outside the executive branch", 
"republic", "republic", "Communist state one-man dictatorship", 
"republic", "republic", "constitutional emirate", "republic", 
"Communist state", "parliamentary democracy", "republic", "parliamentary constitutional monarchy", 
"republic", "operates under a transitional government", "hereditary constitutional monarchy", 
"parliamentary democracy", "constitutional monarchy", "limited democracy", 
"parliamentary democracy", "republic", "multiparty democracy", 
"constitutional monarchy\xa0", "republic", "republic", "republic", 
"constitutional government in free association with the US; the Compact of Free Association entered into force on 21 October 1986 and the Amended Compact entered into force in May 2004", 
"military junta", "parliamentary democracy", "federal republic", 
"constitutional government in free association with the US; the Compact of Free Association entered into force on 3 November 1986 and the Amended Compact entered into force in May 2004", 
"republic", "constitutional monarchy", "parliamentary", "republic", 
"constitutional monarchy", "republic", "republic", "republic", 
"federal democratic republic", "constitutional monarchy", "parliamentary representative democracy", 
"parliamentary democracy and a Commonwealth realm", "republic", 
"republic", "federal republic", "self-governing parliamentary democracy", 
"commonwealth; self-governing with locally elected governor, lieutenant governor, and legislature", 
"constitutional monarchy", "monarchy", "federal republic", "constitutional government in free association with the US; the Compact of Free Association entered into force on 1 October 1994", 
"constitutional democracy", "constitutional parliamentary democracy and a Commonwealth realm", 
"constitutional republic", "constitutional republic", "republic", 
"republic", "republic; parliamentary democracy", "commonwealth", 
"emirate", "republic", "federation", "republic; presidential, multiparty system", 
"parliamentary democracy and a Commonwealth realm", "parliamentary democracy and a Commonwealth realm", 
"parliamentary representative democracy", "parliamentary democracy and a Commonwealth realm", 
"parliamentary democracy", "republic", "republic", "monarchy", 
"republic", "republic", "republic", "constitutional democracy", 
"parliamentary republic", "parliamentary", "parliamentary democracy", 
"parliamentary republic", "parliamentary democracy and a Commonwealth realm", 
"in the process of building a federated parliamentary republic", 
"republic", "republic", "parliamentary monarchy", "republic", 
"Federal republic", "constitutional democracy", "monarchy", "constitutional monarchy", 
"formally a confederation but similar in structure to a federal republic", 
"republic under an authoritarian regime", "multiparty democracy", 
"republic", "republic", "constitutional monarchy", "republic", 
"republic under transition to multiparty democratic rule", "constitutional monarchy", 
"parliamentary democracy", "republic", "republican parliamentary democracy", 
"defines itself as a secular democracy and a presidential republic; in actuality displays authoritarian presidential rule, with power concentrated within the presidential administration", 
"parliamentary democracy and a Commonwealth realm", "republic", 
"republic", "federation with specified powers delegated to the UAE federal government and other powers reserved to member emirates", 
"constitutional monarchy and Commonwealth realm", "Constitution-based federal republic; strong democratic tradition", 
"constitutional republic", "republic; authoritarian presidential rule, with little power outside the executive branch", 
"parliamentary republic", "federal republic", "Communist state", 
"parliamentary representive democratic French overseas collectivity", 
"republic", "republic", "parliamentary democracy")), class = "data.frame", row.names = c(NA, 
-215L))

CodePudding user response:

You apparently want to add a 'government_type' indicator to a table created by pivot_wider. (At least that seems to be the goal.) You have a few errors, some in R syntax and some in the logic of the join (if I understand the goal.)

First: Learn to assign intermediate results to object names:

gdpDF3 <- gdpDF2  %>%
    pivot_wider(
        names_from = year,
        values_from = NY.GDP.PCAP.KD,
        names_prefix = 'gdp.',
        id_cols=c(iso2c, country)
    )

Then: Your left_join only merged gov_type to gov_type, not a particularly useful strategy here. Rather merge gdpDF3 to gov_type. And use correct column names because "country" (in gdpDF3) is NOT "Country" (in gov_type).

gdpDF4 <- gdpDF3 %>%
    left_join(gov_type, by = c( "country"="Country") )

head(gdpDF4)
# A tibble: 5 × 20
  iso2c country     gdp.2016 gdp.2015 gdp.2…¹ gdp.2…² gdp.2…³ gdp.2…⁴ gdp.2…⁵ gdp.2…⁶ gdp.2…⁷ gdp.2…⁸ gdp.2…⁹ gdp.2…˟ gdp.2…˟ gdp.2…˟
  <chr> <chr>          <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
1 DZ    Algeria        4224.    4178.   4112.   4043.   4012.   3957.   3918.   3851.   3856.   3828.   3760.   3752.   3593.   3490.
2 GU    Guam          35653.   35829.  35722.  35222.  34729.  34067.  34094.  33383.  33278.  32653.  32461.  33744.  32622.  30705.
3 NE    Niger           493.     484.    482.    470.    464.    437.    443.    424.    432.    417.    420.    411.    398.    411.
4 PR    Puerto Rico   29962.   29763.  29555.  29426.  29180.  28821.  28592.  28566.  28976.  29348.  29520.  29815.  30376.  27938.
5 PT    Portugal      19701.   19250.  18833.  18585.  18655.  19365.  19670.  19343.  19986.  19951.  19501.  19224.  19110.  18819.
# … with 4 more variables: gdp.2002 <dbl>, gdp.2001 <dbl>, gdp.2000 <dbl>, GovernmentType <chr>, and abbreviated variable names
#   ¹​gdp.2014, ²​gdp.2013, ³​gdp.2012, ⁴​gdp.2011, ⁵​gdp.2010, ⁶​gdp.2009, ⁷​gdp.2008, ⁸​gdp.2007, ⁹​gdp.2006, ˟​gdp.2005, ˟​gdp.2004,
#   ˟​gdp.2003
# ℹ Use `colnames()` to see all variable names

CodePudding user response:

I'm not sure if I understood your question correctly, but here is the answer if so. First, I just renamed your variable in the first data frame to "Country", as R won't recognize this as the same variable otherwise:

#### Change Name of Data 1 ####
data1 <- data1 %>% 
  rename(Country = country)

glimpse(data1)

Glimpsing at the data, this works:

Rows: 5
Columns: 19
$ iso2c    <chr> "DZ", "GU", "NE", "PR", "PT"
$ Country  <chr> "Algeria", "Guam", "Niger", "Puerto Rico", "Portugal"
$ gdp.2016 <dbl> 4224.037, 35652.805, 492.564, 29961.753, 19700.910
$ gdp.2015 <dbl> 4177.8895, 35829.2504, 484.1531, 29763.4883, 19250.10…
$ gdp.2014 <dbl> 4112.0760, 35722.1143, 482.1362, 29554.6244, 18833.05…
$ gdp.2013 <dbl> 4042.9236, 35222.3885, 470.0865, 29426.1576, 18584.55…
$ gdp.2012 <dbl> 4012.3615, 34728.8937, 464.1477, 29180.4121, 18654.95…
$ gdp.2011 <dbl> 3956.8958, 34067.1938, 436.5497, 28821.0228, 19365.18…
$ gdp.2010 <dbl> 3918.4864, 34094.3488, 443.3513, 28592.1220, 19670.36…
$ gdp.2009 <dbl> 3851.2138, 33383.1269, 424.3268, 28565.8123, 19343.28…
$ gdp.2008 <dbl> 3856.4077, 33278.0733, 432.3194, 28976.1760, 19985.69…
$ gdp.2007 <dbl> 3828.1485, 32653.3079, 416.7468, 29347.8916, 19950.84…
$ gdp.2006 <dbl> 3760.1554, 32461.4485, 419.5158, 29519.7845, 19501.23…
$ gdp.2005 <dbl> 3752.0962, 33743.7414, 411.1275, 29815.2627, 19224.03…
$ gdp.2004 <dbl> 3592.6453, 32622.1337, 397.5924, 30375.8367, 19110.32…
$ gdp.2003 <dbl> 3490.2232, 30704.9533, 411.1206, 27937.8600, 18819.44…
$ gdp.2002 <dbl> 3297.6128, 30543.8238, 417.5046, 27940.4260, 19067.65…
$ gdp.2001 <dbl> 3162.8115, NA, 412.7854, 27722.4102, 19025.6957
$ gdp.2000 <dbl> 3111.1762, NA, 399.0728, 26132.0036, 18795.0311

Then I turned your second dataset into a tibble, as it was a bit difficult to read:

#### Transform Data 2 Into Tibble ####
data2 <- as_tibble(data2) 

glimpse(data2)

Glimpsing again, it looked okay:

Rows: 215
Columns: 2
$ Country        <chr> "Afghanistan", "Albania", "Algeria", "Andorra",…
$ GovernmentType <chr> "Islamic republic", "parliamentary democracy", …

Then the join was fairly easy to make:

#### Join ####
data.join <- data1 %>% 
  left_join(data2, 
            by = "Country")

glimpse(data.join)

As seen below, gov type is at the end:

Rows: 5
Columns: 20
$ iso2c          <chr> "DZ", "GU", "NE", "PR", "PT"
$ Country        <chr> "Algeria", "Guam", "Niger", "Puerto Rico", "Port…
$ gdp.2016       <dbl> 4224.037, 35652.805, 492.564, 29961.753, 19700.9…
$ gdp.2015       <dbl> 4177.8895, 35829.2504, 484.1531, 29763.4883, 192…
$ gdp.2014       <dbl> 4112.0760, 35722.1143, 482.1362, 29554.6244, 188…
$ gdp.2013       <dbl> 4042.9236, 35222.3885, 470.0865, 29426.1576, 185…
$ gdp.2012       <dbl> 4012.3615, 34728.8937, 464.1477, 29180.4121, 186…
$ gdp.2011       <dbl> 3956.8958, 34067.1938, 436.5497, 28821.0228, 193…
$ gdp.2010       <dbl> 3918.4864, 34094.3488, 443.3513, 28592.1220, 196…
$ gdp.2009       <dbl> 3851.2138, 33383.1269, 424.3268, 28565.8123, 193…
$ gdp.2008       <dbl> 3856.4077, 33278.0733, 432.3194, 28976.1760, 199…
$ gdp.2007       <dbl> 3828.1485, 32653.3079, 416.7468, 29347.8916, 199…
$ gdp.2006       <dbl> 3760.1554, 32461.4485, 419.5158, 29519.7845, 195…
$ gdp.2005       <dbl> 3752.0962, 33743.7414, 411.1275, 29815.2627, 192…
$ gdp.2004       <dbl> 3592.6453, 32622.1337, 397.5924, 30375.8367, 191…
$ gdp.2003       <dbl> 3490.2232, 30704.9533, 411.1206, 27937.8600, 188…
$ gdp.2002       <dbl> 3297.6128, 30543.8238, 417.5046, 27940.4260, 190…
$ gdp.2001       <dbl> 3162.8115, NA, 412.7854, 27722.4102, 19025.6957
$ gdp.2000       <dbl> 3111.1762, NA, 399.0728, 26132.0036, 18795.0311
$ GovernmentType <chr> "republic", NA, "republic", "commonwealth", "rep…
  • Related