Home > Mobile >  Nested list to dataframe using tidyverse
Nested list to dataframe using tidyverse

Time:03-23

I have a nested list from reading a JSON that stores logging info from a video game. The time element of the list is a simple vector, while inputManagerStates and syncedProperties are lists that may contain 0 or more elements.

I am trying to get the data into tabular (long format).

UPDATE: The list

test_list <- 
  list(list(time = 9.92405605316162, inputManagerStates = list(), 
syncedProperties = list()), list(time = 9.9399995803833, 
inputManagerStates = list(list(inputId = "InputY", buttonState = FALSE, 
    axisValue = 0), list(inputId = "InputX", buttonState = FALSE, 
    axisValue = 0.0501395985484123), list(inputId = "xPos", 
    buttonState = FALSE, axisValue = 5), list(inputId = "yPos", 
    buttonState = FALSE, axisValue = 0.0799999982118607), 
    list(inputId = "zPos", buttonState = FALSE, axisValue = 0), 
    list(inputId = "xRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "yRot", buttonState = FALSE, axisValue = -0.70664256811142), 
    list(inputId = "zRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "wRot", buttonState = FALSE, axisValue = 0.707570731639862)), 
syncedProperties = list(list(name = "timeStamp", value = "97,2"))), 
list(time = 9.95659446716309, inputManagerStates = list(list(
    inputId = "InputY", buttonState = FALSE, axisValue = 0), 
    list(inputId = "InputX", buttonState = FALSE, axisValue = 0.0993990004062653), 
    list(inputId = "xPos", buttonState = FALSE, axisValue = 5), 
    list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0799999982118607), 
    list(inputId = "zPos", buttonState = FALSE, axisValue = 0), 
    list(inputId = "xRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "yRot", buttonState = FALSE, axisValue = -0.705721318721771), 
    list(inputId = "zRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "wRot", buttonState = FALSE, axisValue = 0.708489596843719)), 
    syncedProperties = list(list(name = "timeStamp", value = "97,21667"))), 
list(time = 20.0626411437988, inputManagerStates = list(list(
    inputId = "InputY", buttonState = FALSE, axisValue = 0.601816594600677), 
    list(inputId = "InputX", buttonState = FALSE, axisValue = 0), 
    list(inputId = "xPos", buttonState = FALSE, axisValue = -1.31777036190033), 
    list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0800001174211502), 
    list(inputId = "zPos", buttonState = FALSE, axisValue = 6.08214092254639), 
    list(inputId = "xRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "yRot", buttonState = FALSE, axisValue = -0.391442984342575), 
    list(inputId = "zRot", buttonState = FALSE, axisValue = 0), 
    list(inputId = "wRot", buttonState = FALSE, axisValue = 0.920202374458313)), 
    syncedProperties = list(list(name = "timeStamp", value = "107,3167"), 
        list(name = "previousGameState", value = "1"), list(
            name = "newGameState", value = "2"))))

What I tried

  library(tidyverse)
    
  test_list %>% 
  tibble::enframe(name = "epoch", value = "value") %>% 
  tidyr::unnest_wider(value) %>%
  tidyr::unnest(inputManagerStates, keep_empty = TRUE) %>%
  tidyr::unnest(syncedProperties, keep_empty = TRUE) %>%
  tidyr::unnest_wider(syncedProperties) 

# A tibble: 19 x 5
   epoch  time inputManagerStates name      value   
   <int> <dbl> <list>             <chr>     <chr>   
 1     1  9.92 <NULL>             NA        NA      
 2     2  9.94 <named list [3]>   timeStamp 97,2    
 3     2  9.94 <named list [3]>   timeStamp 97,2    
 4     2  9.94 <named list [3]>   timeStamp 97,2    
 5     2  9.94 <named list [3]>   timeStamp 97,2    
 6     2  9.94 <named list [3]>   timeStamp 97,2    
 7     2  9.94 <named list [3]>   timeStamp 97,2    
 8     2  9.94 <named list [3]>   timeStamp 97,2    
 9     2  9.94 <named list [3]>   timeStamp 97,2    
10     2  9.94 <named list [3]>   timeStamp 97,2    
11     3  9.96 <named list [3]>   timeStamp 97,21667
12     3  9.96 <named list [3]>   timeStamp 97,21667
13     3  9.96 <named list [3]>   timeStamp 97,21667
14     3  9.96 <named list [3]>   timeStamp 97,21667
15     3  9.96 <named list [3]>   timeStamp 97,21667
16     3  9.96 <named list [3]>   timeStamp 97,21667
17     3  9.96 <named list [3]>   timeStamp 97,21667
18     3  9.96 <named list [3]>   timeStamp 97,21667
19     3  9.96 <named list [3]>   timeStamp 97,21667

I managed to get syncedProperties into columns (I will drop column name and rename value to timeStamp). I am having trouble with inputManagerStates list.

Thank you in advance.

CodePudding user response:

Perhaps this helps

library(dplyr)
library(purrr)
library(tidyr)
library(jsonlite)
toJSON(test_list) %>%
  fromJSON(flatten = TRUE) %>%
  unnest(everything(), keep_empty = TRUE) %>% 
  mutate(across(where(is.list), 
       ~ map(.x, ~ if(is.null(.x)) NA else .x))) %>% 
  unnest(where(is.list))

-output

# A tibble: 19 × 6
    time inputId buttonState axisValue name      value   
   <dbl> <chr>   <lgl>           <dbl> <chr>     <chr>   
 1  9.92 <NA>    NA            NA      <NA>      <NA>    
 2  9.94 InputY  FALSE          0      timeStamp 97,2    
 3  9.94 InputX  FALSE          0.0501 timeStamp 97,2    
 4  9.94 xPos    FALSE          5      timeStamp 97,2    
 5  9.94 yPos    FALSE          0.08   timeStamp 97,2    
 6  9.94 zPos    FALSE          0      timeStamp 97,2    
 7  9.94 xRot    FALSE          0      timeStamp 97,2    
 8  9.94 yRot    FALSE         -0.707  timeStamp 97,2    
 9  9.94 zRot    FALSE          0      timeStamp 97,2    
10  9.94 wRot    FALSE          0.708  timeStamp 97,2    
11  9.96 InputY  FALSE          0      timeStamp 97,21667
12  9.96 InputX  FALSE          0.0994 timeStamp 97,21667
13  9.96 xPos    FALSE          5      timeStamp 97,21667
14  9.96 yPos    FALSE          0.08   timeStamp 97,21667
15  9.96 zPos    FALSE          0      timeStamp 97,21667
16  9.96 xRot    FALSE          0      timeStamp 97,21667
17  9.96 yRot    FALSE         -0.706  timeStamp 97,21667
18  9.96 zRot    FALSE          0      timeStamp 97,21667
19  9.96 wRot    FALSE          0.708  timeStamp 97,21667

With the new dataset, the unnest can be done separately

toJSON(test_list) %>%
     fromJSON(flatten = TRUE) %>% 
     unnest(inputManagerStates, keep_empty = TRUE) %>% 
     unnest(syncedProperties, keep_empty = TRUE) %>% 
     unnest(everything())
# A tibble: 46 × 6
    time inputId buttonState axisValue name      value
   <dbl> <chr>   <lgl>           <dbl> <chr>     <chr>
 1  9.92 NA      NA            NA      NA        NA   
 2  9.94 InputY  FALSE          0      timeStamp 97,2 
 3  9.94 InputX  FALSE          0.0501 timeStamp 97,2 
 4  9.94 xPos    FALSE          5      timeStamp 97,2 
 5  9.94 yPos    FALSE          0.08   timeStamp 97,2 
 6  9.94 zPos    FALSE          0      timeStamp 97,2 
 7  9.94 xRot    FALSE          0      timeStamp 97,2 
 8  9.94 yRot    FALSE         -0.707  timeStamp 97,2 
 9  9.94 zRot    FALSE          0      timeStamp 97,2 
10  9.94 wRot    FALSE          0.708  timeStamp 97,2 
# … with 36 more rows

CodePudding user response:

@akrun provided a wonderful answer. I have updated my previous question with a list element that comprises lists with different number of elements.

I did not realize my attempted solution is so near the output I desired. I will post it here just in case others may use it.

test_list %>% 
  tibble::enframe(name = "epoch", value = "value") %>% 
  tidyr::unnest_wider(value) %>%
  tidyr::unnest(inputManagerStates, keep_empty = TRUE) %>%
  tidyr::unnest(syncedProperties, keep_empty = TRUE) %>%
  tidyr::unnest_wider(syncedProperties) %>%
  tidyr::unnest_wider(inputManagerStates) 

Output:

# A tibble: 46 x 7
   epoch  time inputId buttonState axisValue name      value
   <int> <dbl> <chr>   <lgl>           <dbl> <chr>     <chr>
 1     1  9.92 NA      NA            NA      NA        NA   
 2     2  9.94 InputY  FALSE          0      timeStamp 97,2 
 3     2  9.94 InputX  FALSE          0.0501 timeStamp 97,2 
 4     2  9.94 xPos    FALSE          5      timeStamp 97,2 
 5     2  9.94 yPos    FALSE          0.0800 timeStamp 97,2 
 6     2  9.94 zPos    FALSE          0      timeStamp 97,2 
 7     2  9.94 xRot    FALSE          0      timeStamp 97,2 
 8     2  9.94 yRot    FALSE         -0.707  timeStamp 97,2 
 9     2  9.94 zRot    FALSE          0      timeStamp 97,2 
10     2  9.94 wRot    FALSE          0.708  timeStamp 97,2 
# ... with 36 more rows
  • Related