Home > Software engineering >  From a list of list to tibble in long format
From a list of list to tibble in long format

Time:10-20

I have this list of lists:

regions <- list(c("AO", "BI", "BW", "DJ", "DZ", "ET", "GH", "GM", "KE", "LS", "MA", "MR", "MW", "MZ", "NG", "SL", "SZ", "TN", "TZ", "UG", "ZA", "ZM", "ZR", "ZW"),
           c("BD", "CN", "HK", "ID", "IN", "KR", "MU", "MY", "PH", "PK", "SG", "TH", "TW", "VN", "AU", "NZ"),
           c("AT", "BA", "BE", "BL", "CH", "CI", "CT", "CY", "CZ", "DE", "DK", "EO", "ES", "EU", "FI", "FR", "GA", "GB", "GG", "GR", "HU", "IC", "IE", "IT", "JR", "KZ", "LN", "LU", "LV", "MK", "MT", "NL", "NO", "PO", "PT", "RM", "RS", "RU", "SE", "SJ", "SX", "TR", "UR"),
           c("AD", "AE", "BH", "DU", "EG", "IQ", "IR", "IS", "JO", "KW", "LB", "OM", "PS", "QA", "SA", "SY", "YE"),
           c("CA", "KY", "US"),
           c("AR", "BR", "CB", "CL", "EC", "MX", "PA", "PE", "PY", "UY", "VE"),
           c("JP"))

There are 7 lists, and I would like this data in a tibble format, with a region ID for each list:

country | region_id
"AO"    |   1
"BI"    |   1
"BW"    |   1
...
"BD"    |   2
"CN"    |   2
"HK"    |   2
...

CodePudding user response:

Use enframe unnest:

library(tibble)
enframe(regions, "region_id", "country") |>
  unnest(country)

In base R, name the list, and use stack :

setNames(regions, seq_along(regions)) |> 
  stack()

output

    region_id country
1           1      AO
2           1      BI
3           1      BW
4           1      DJ
5           1      DZ
6           1      ET
7           1      GH
8           1      GM
9           1      KE
10          1      LS
11          1      MA
12          1      MR
13          1      MW
14          1      MZ
15          1      NG
16          1      SL
17          1      SZ
18          1      TN
19          1      TZ
20          1      UG
21          1      ZA
22          1      ZM
23          1      ZR
24          1      ZW
25          2      BD
26          2      CN
27          2      HK
28          2      ID
29          2      IN
30          2      KR
31          2      MU
32          2      MY
33          2      PH
34          2      PK
35          2      SG
36          2      TH
37          2      TW
38          2      VN
39          2      AU
40          2      NZ
41          3      AT
42          3      BA
43          3      BE
44          3      BL
45          3      CH
46          3      CI
47          3      CT
48          3      CY
49          3      CZ
50          3      DE
51          3      DK
52          3      EO
53          3      ES
54          3      EU
55          3      FI
56          3      FR
57          3      GA
58          3      GB
59          3      GG
60          3      GR
61          3      HU
62          3      IC
63          3      IE
64          3      IT
65          3      JR
66          3      KZ
67          3      LN
68          3      LU
69          3      LV
70          3      MK
71          3      MT
72          3      NL
73          3      NO
74          3      PO
75          3      PT
76          3      RM
77          3      RS
78          3      RU
79          3      SE
80          3      SJ
81          3      SX
82          3      TR
83          3      UR
84          4      AD
85          4      AE
86          4      BH
87          4      DU
88          4      EG
89          4      IQ
90          4      IR
91          4      IS
92          4      JO
93          4      KW
94          4      LB
95          4      OM
96          4      PS
97          4      QA
98          4      SA
99          4      SY
100         4      YE
101         5      CA
102         5      KY
103         5      US
104         6      AR
105         6      BR
106         6      CB
107         6      CL
108         6      EC
109         6      MX
110         6      PA
111         6      PE
112         6      PY
113         6      UY
114         6      VE
115         7      JP

CodePudding user response:

You can try rep lengths to unnest the list

data.frame(
  country = unlist(regions),
  region_id = rep(seq_along(regions), lengths(regions))
)

which gives

    country region_id
1        AO         1
2        BI         1
3        BW         1
4        DJ         1
5        DZ         1
6        ET         1
7        GH         1
8        GM         1
9        KE         1
10       LS         1
11       MA         1
12       MR         1
13       MW         1
14       MZ         1
15       NG         1
16       SL         1
17       SZ         1
18       TN         1
19       TZ         1
20       UG         1
21       ZA         1
22       ZM         1
23       ZR         1
24       ZW         1
25       BD         2
26       CN         2
27       HK         2
28       ID         2
29       IN         2
30       KR         2
31       MU         2
32       MY         2
33       PH         2
34       PK         2
35       SG         2
36       TH         2
37       TW         2
38       VN         2
39       AU         2
40       NZ         2
41       AT         3
42       BA         3
43       BE         3
44       BL         3
45       CH         3
46       CI         3
47       CT         3
48       CY         3
49       CZ         3
50       DE         3
51       DK         3
52       EO         3
53       ES         3
54       EU         3
55       FI         3
56       FR         3
57       GA         3
58       GB         3
59       GG         3
60       GR         3
61       HU         3
62       IC         3
63       IE         3
64       IT         3
65       JR         3
66       KZ         3
67       LN         3
68       LU         3
69       LV         3
70       MK         3
71       MT         3
72       NL         3
73       NO         3
74       PO         3
75       PT         3
76       RM         3
77       RS         3
78       RU         3
79       SE         3
80       SJ         3
81       SX         3
82       TR         3
83       UR         3
84       AD         4
85       AE         4
86       BH         4
87       DU         4
88       EG         4
89       IQ         4
90       IR         4
91       IS         4
92       JO         4
93       KW         4
94       LB         4
95       OM         4
96       PS         4
97       QA         4
98       SA         4
99       SY         4
100      YE         4
101      CA         5
102      KY         5
103      US         5
104      AR         6
105      BR         6
106      CB         6
107      CL         6
108      EC         6
109      MX         6
110      PA         6
111      PE         6
112      PY         6
113      UY         6
114      VE         6
115      JP         7
  • Related