I have a nested list from reading a JSON that stores logging info from a video game. The time
element of the list is a simple vector, while inputManagerStates
and syncedProperties
are lists that may contain 0 or more elements.
I am trying to get the data into tabular (long format).
UPDATE: The list
test_list <-
list(list(time = 9.92405605316162, inputManagerStates = list(),
syncedProperties = list()), list(time = 9.9399995803833,
inputManagerStates = list(list(inputId = "InputY", buttonState = FALSE,
axisValue = 0), list(inputId = "InputX", buttonState = FALSE,
axisValue = 0.0501395985484123), list(inputId = "xPos",
buttonState = FALSE, axisValue = 5), list(inputId = "yPos",
buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.70664256811142),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.707570731639862)),
syncedProperties = list(list(name = "timeStamp", value = "97,2"))),
list(time = 9.95659446716309, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0.0993990004062653),
list(inputId = "xPos", buttonState = FALSE, axisValue = 5),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0799999982118607),
list(inputId = "zPos", buttonState = FALSE, axisValue = 0),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.705721318721771),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.708489596843719)),
syncedProperties = list(list(name = "timeStamp", value = "97,21667"))),
list(time = 20.0626411437988, inputManagerStates = list(list(
inputId = "InputY", buttonState = FALSE, axisValue = 0.601816594600677),
list(inputId = "InputX", buttonState = FALSE, axisValue = 0),
list(inputId = "xPos", buttonState = FALSE, axisValue = -1.31777036190033),
list(inputId = "yPos", buttonState = FALSE, axisValue = 0.0800001174211502),
list(inputId = "zPos", buttonState = FALSE, axisValue = 6.08214092254639),
list(inputId = "xRot", buttonState = FALSE, axisValue = 0),
list(inputId = "yRot", buttonState = FALSE, axisValue = -0.391442984342575),
list(inputId = "zRot", buttonState = FALSE, axisValue = 0),
list(inputId = "wRot", buttonState = FALSE, axisValue = 0.920202374458313)),
syncedProperties = list(list(name = "timeStamp", value = "107,3167"),
list(name = "previousGameState", value = "1"), list(
name = "newGameState", value = "2"))))
What I tried
library(tidyverse)
test_list %>%
tibble::enframe(name = "epoch", value = "value") %>%
tidyr::unnest_wider(value) %>%
tidyr::unnest(inputManagerStates, keep_empty = TRUE) %>%
tidyr::unnest(syncedProperties, keep_empty = TRUE) %>%
tidyr::unnest_wider(syncedProperties)
# A tibble: 19 x 5
epoch time inputManagerStates name value
<int> <dbl> <list> <chr> <chr>
1 1 9.92 <NULL> NA NA
2 2 9.94 <named list [3]> timeStamp 97,2
3 2 9.94 <named list [3]> timeStamp 97,2
4 2 9.94 <named list [3]> timeStamp 97,2
5 2 9.94 <named list [3]> timeStamp 97,2
6 2 9.94 <named list [3]> timeStamp 97,2
7 2 9.94 <named list [3]> timeStamp 97,2
8 2 9.94 <named list [3]> timeStamp 97,2
9 2 9.94 <named list [3]> timeStamp 97,2
10 2 9.94 <named list [3]> timeStamp 97,2
11 3 9.96 <named list [3]> timeStamp 97,21667
12 3 9.96 <named list [3]> timeStamp 97,21667
13 3 9.96 <named list [3]> timeStamp 97,21667
14 3 9.96 <named list [3]> timeStamp 97,21667
15 3 9.96 <named list [3]> timeStamp 97,21667
16 3 9.96 <named list [3]> timeStamp 97,21667
17 3 9.96 <named list [3]> timeStamp 97,21667
18 3 9.96 <named list [3]> timeStamp 97,21667
19 3 9.96 <named list [3]> timeStamp 97,21667
I managed to get syncedProperties
into columns (I will drop column name
and rename value
to timeStamp). I am having trouble with inputManagerStates
list.
Thank you in advance.
CodePudding user response:
Perhaps this helps
library(dplyr)
library(purrr)
library(tidyr)
library(jsonlite)
toJSON(test_list) %>%
fromJSON(flatten = TRUE) %>%
unnest(everything(), keep_empty = TRUE) %>%
mutate(across(where(is.list),
~ map(.x, ~ if(is.null(.x)) NA else .x))) %>%
unnest(where(is.list))
-output
# A tibble: 19 × 6
time inputId buttonState axisValue name value
<dbl> <chr> <lgl> <dbl> <chr> <chr>
1 9.92 <NA> NA NA <NA> <NA>
2 9.94 InputY FALSE 0 timeStamp 97,2
3 9.94 InputX FALSE 0.0501 timeStamp 97,2
4 9.94 xPos FALSE 5 timeStamp 97,2
5 9.94 yPos FALSE 0.08 timeStamp 97,2
6 9.94 zPos FALSE 0 timeStamp 97,2
7 9.94 xRot FALSE 0 timeStamp 97,2
8 9.94 yRot FALSE -0.707 timeStamp 97,2
9 9.94 zRot FALSE 0 timeStamp 97,2
10 9.94 wRot FALSE 0.708 timeStamp 97,2
11 9.96 InputY FALSE 0 timeStamp 97,21667
12 9.96 InputX FALSE 0.0994 timeStamp 97,21667
13 9.96 xPos FALSE 5 timeStamp 97,21667
14 9.96 yPos FALSE 0.08 timeStamp 97,21667
15 9.96 zPos FALSE 0 timeStamp 97,21667
16 9.96 xRot FALSE 0 timeStamp 97,21667
17 9.96 yRot FALSE -0.706 timeStamp 97,21667
18 9.96 zRot FALSE 0 timeStamp 97,21667
19 9.96 wRot FALSE 0.708 timeStamp 97,21667
With the new dataset, the unnest
can be done separately
toJSON(test_list) %>%
fromJSON(flatten = TRUE) %>%
unnest(inputManagerStates, keep_empty = TRUE) %>%
unnest(syncedProperties, keep_empty = TRUE) %>%
unnest(everything())
# A tibble: 46 × 6
time inputId buttonState axisValue name value
<dbl> <chr> <lgl> <dbl> <chr> <chr>
1 9.92 NA NA NA NA NA
2 9.94 InputY FALSE 0 timeStamp 97,2
3 9.94 InputX FALSE 0.0501 timeStamp 97,2
4 9.94 xPos FALSE 5 timeStamp 97,2
5 9.94 yPos FALSE 0.08 timeStamp 97,2
6 9.94 zPos FALSE 0 timeStamp 97,2
7 9.94 xRot FALSE 0 timeStamp 97,2
8 9.94 yRot FALSE -0.707 timeStamp 97,2
9 9.94 zRot FALSE 0 timeStamp 97,2
10 9.94 wRot FALSE 0.708 timeStamp 97,2
# … with 36 more rows
CodePudding user response:
@akrun provided a wonderful answer. I have updated my previous question with a list element that comprises lists with different number of elements.
I did not realize my attempted solution is so near the output I desired. I will post it here just in case others may use it.
test_list %>%
tibble::enframe(name = "epoch", value = "value") %>%
tidyr::unnest_wider(value) %>%
tidyr::unnest(inputManagerStates, keep_empty = TRUE) %>%
tidyr::unnest(syncedProperties, keep_empty = TRUE) %>%
tidyr::unnest_wider(syncedProperties) %>%
tidyr::unnest_wider(inputManagerStates)
Output:
# A tibble: 46 x 7
epoch time inputId buttonState axisValue name value
<int> <dbl> <chr> <lgl> <dbl> <chr> <chr>
1 1 9.92 NA NA NA NA NA
2 2 9.94 InputY FALSE 0 timeStamp 97,2
3 2 9.94 InputX FALSE 0.0501 timeStamp 97,2
4 2 9.94 xPos FALSE 5 timeStamp 97,2
5 2 9.94 yPos FALSE 0.0800 timeStamp 97,2
6 2 9.94 zPos FALSE 0 timeStamp 97,2
7 2 9.94 xRot FALSE 0 timeStamp 97,2
8 2 9.94 yRot FALSE -0.707 timeStamp 97,2
9 2 9.94 zRot FALSE 0 timeStamp 97,2
10 2 9.94 wRot FALSE 0.708 timeStamp 97,2
# ... with 36 more rows