I have a dataframe that looks like so
Continent_Name Continent_Code Country_Name Two_Letter_Coun~
<chr> <chr> <chr> <chr>
1 Asia AS Afghanistan, Islami~ AF
2 Europe EU Albania, Republic of AL
3 Antarctica AN Antarctica (the ter~ AQ
4 Africa AF Algeria, People's D~ DZ
5 Oceania OC American Samoa AS
6 Europe EU Andorra, Principali~ AD
I am trying to make a list with all 7 continents, and store each country that belongs to that continent in the same group. In other words, I want to create a list of the 7 different continents and each of the 7 continent's countries are in their related groups.
What is a good way to go about this?
CodePudding user response:
You can use split
. With data.frame d
, that would be something like
x <- split(d, d$Continent_Name)
Or perhaps
x <- split(d$Country_Name, d$Continent_Name)
CodePudding user response:
Depending on your downstream application different output formats may be most useful. Here are a few examples with the sample gapminder
dataset to simulate your situation.
library(tidyverse)
library(gapminder)
# simulate something like the original data you show
g <- gapminder %>%
distinct(continent, country) %>%
group_by(continent) %>%
summarise(country = paste0(country, collapse = ", "))
g
#> # A tibble: 5 x 2
#> continent country
#> <fct> <chr>
#> 1 Africa Algeria, Angola, Benin, Botswana, Burkina Faso, Burundi, Cameroon, ~
#> 2 Americas Argentina, Bolivia, Brazil, Canada, Chile, Colombia, Costa Rica, Cu~
#> 3 Asia Afghanistan, Bahrain, Bangladesh, Cambodia, China, Hong Kong, China~
#> 4 Europe Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croati~
#> 5 Oceania Australia, New Zealand
# option if you just split into a list without separating country names
g %>%
group_split(continent)
#> <list_of<
#> tbl_df<
#> continent: factor<be586>
#> country : character
#> >
#> >[5]>
#> [[1]]
#> # A tibble: 1 x 2
#> continent country
#> <fct> <chr>
#> 1 Africa Algeria, Angola, Benin, Botswana, Burkina Faso, Burundi, Cameroon, ~
#>
#> [[2]]
#> # A tibble: 1 x 2
#> continent country
#> <fct> <chr>
#> 1 Americas Argentina, Bolivia, Brazil, Canada, Chile, Colombia, Costa Rica, Cu~
#>
#> [[3]]
#> # A tibble: 1 x 2
#> continent country
#> <fct> <chr>
#> 1 Asia Afghanistan, Bahrain, Bangladesh, Cambodia, China, Hong Kong, China~
#>
#> [[4]]
#> # A tibble: 1 x 2
#> continent country
#> <fct> <chr>
#> 1 Europe Albania, Austria, Belgium, Bosnia and Herzegovina, Bulgaria, Croati~
#>
#> [[5]]
#> # A tibble: 1 x 2
#> continent country
#> <fct> <chr>
#> 1 Oceania Australia, New Zealand
# split and parse back into a list of data.frames
g %>%
group_split(continent) %>%
map(~separate_rows(.x, country, sep = ", "))
#> [[1]]
#> # A tibble: 54 x 2
#> continent country
#> <fct> <chr>
#> 1 Africa Algeria
#> 2 Africa Angola
#> 3 Africa Benin
#> 4 Africa Botswana
#> 5 Africa Burkina Faso
#> 6 Africa Burundi
#> 7 Africa Cameroon
#> 8 Africa Central African Republic
#> 9 Africa Chad
#> 10 Africa Comoros
#> # ... with 44 more rows
#>
#> [[2]]
#> # A tibble: 25 x 2
#> continent country
#> <fct> <chr>
#> 1 Americas Argentina
#> 2 Americas Bolivia
#> 3 Americas Brazil
#> 4 Americas Canada
#> 5 Americas Chile
#> 6 Americas Colombia
#> 7 Americas Costa Rica
#> 8 Americas Cuba
#> 9 Americas Dominican Republic
#> 10 Americas Ecuador
#> # ... with 15 more rows
#>
#> [[3]]
#> # A tibble: 37 x 2
#> continent country
#> <fct> <chr>
#> 1 Asia Afghanistan
#> 2 Asia Bahrain
#> 3 Asia Bangladesh
#> 4 Asia Cambodia
#> 5 Asia China
#> 6 Asia Hong Kong
#> 7 Asia China
#> 8 Asia India
#> 9 Asia Indonesia
#> 10 Asia Iran
#> # ... with 27 more rows
#>
#> [[4]]
#> # A tibble: 30 x 2
#> continent country
#> <fct> <chr>
#> 1 Europe Albania
#> 2 Europe Austria
#> 3 Europe Belgium
#> 4 Europe Bosnia and Herzegovina
#> 5 Europe Bulgaria
#> 6 Europe Croatia
#> 7 Europe Czech Republic
#> 8 Europe Denmark
#> 9 Europe Finland
#> 10 Europe France
#> # ... with 20 more rows
#>
#> [[5]]
#> # A tibble: 2 x 2
#> continent country
#> <fct> <chr>
#> 1 Oceania Australia
#> 2 Oceania New Zealand
# parse out country names and merge back into single long data.frame
g %>%
group_split(continent) %>%
map(~separate_rows(.x, country, sep = ", ")) %>%
bind_rows()
#> # A tibble: 148 x 2
#> continent country
#> <fct> <chr>
#> 1 Africa Algeria
#> 2 Africa Angola
#> 3 Africa Benin
#> 4 Africa Botswana
#> 5 Africa Burkina Faso
#> 6 Africa Burundi
#> 7 Africa Cameroon
#> 8 Africa Central African Republic
#> 9 Africa Chad
#> 10 Africa Comoros
#> # ... with 138 more rows
Created on 2022-04-03 by the reprex package (v2.0.1)