I read the data from a json file using this option
library("rjson")
result <- fromJSON(file = "input.json")
And the result is a list.
I would like to convert this list into a dataframe with colum names.
However, the list has many nested options. How is it possible to convert them into a dataframe?
Here an example of the first json element:
{ "id": 6119227, "node_id": "MDEwOlJlcG9zaXRvcnk2MTE5MjI3", "name": "sentiment", "full_name": "thisandagain/sentiment", "private": false, "owner": { "login": "thisandagain", "id": 747641, "node_id": "MDQ6VXNlcjc0NzY0MQ==", "avatar_url": "https://avatars.githubusercontent.com/u/747641?v=4", "gravatar_id": "", "url": "https://api.github.com/users/thisandagain", "html_url": "https://github.com/thisandagain", "followers_url": "https://api.github.com/users/thisandagain/followers", "following_url": "https://api.github.com/users/thisandagain/following{/other_user}", "gists_url": "https://api.github.com/users/thisandagain/gists{/gist_id}", "starred_url": "https://api.github.com/users/thisandagain/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/thisandagain/subscriptions", "organizations_url": "https://api.github.com/users/thisandagain/orgs", "repos_url": "https://api.github.com/users/thisandagain/repos", "events_url": "https://api.github.com/users/thisandagain/events{/privacy}", "received_events_url": "https://api.github.com/users/thisandagain/received_events", "type": "User", "site_admin": false }, "html_url": "https://github.com/thisandagain/sentiment", "description": "AFINN-based sentiment analysis for Node.js.", "fork": false, "url": "https://api.github.com/repos/thisandagain/sentiment", "forks_url": "https://api.github.com/repos/thisandagain/sentiment/forks", "keys_url": "https://api.github.com/repos/thisandagain/sentiment/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/thisandagain/sentiment/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/thisandagain/sentiment/teams", "hooks_url": "https://api.github.com/repos/thisandagain/sentiment/hooks", "issue_events_url": "https://api.github.com/repos/thisandagain/sentiment/issues/events{/number}", "events_url": "https://api.github.com/repos/thisandagain/sentiment/events", "assignees_url": "https://api.github.com/repos/thisandagain/sentiment/assignees{/user}", "branches_url": "https://api.github.com/repos/thisandagain/sentiment/branches{/branch}", "tags_url": "https://api.github.com/repos/thisandagain/sentiment/tags", "blobs_url": "https://api.github.com/repos/thisandagain/sentiment/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/thisandagain/sentiment/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/thisandagain/sentiment/git/refs{/sha}", "trees_url": "https://api.github.com/repos/thisandagain/sentiment/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/thisandagain/sentiment/statuses/{sha}", "languages_url": "https://api.github.com/repos/thisandagain/sentiment/languages", "stargazers_url": "https://api.github.com/repos/thisandagain/sentiment/stargazers", "contributors_url": "https://api.github.com/repos/thisandagain/sentiment/contributors", "subscribers_url": "https://api.github.com/repos/thisandagain/sentiment/subscribers", "subscription_url": "https://api.github.com/repos/thisandagain/sentiment/subscription", "commits_url": "https://api.github.com/repos/thisandagain/sentiment/commits{/sha}", "git_commits_url": "https://api.github.com/repos/thisandagain/sentiment/git/commits{/sha}", "comments_url": "https://api.github.com/repos/thisandagain/sentiment/comments{/number}", "issue_comment_url": "https://api.github.com/repos/thisandagain/sentiment/issues/comments{/number}", "contents_url": "https://api.github.com/repos/thisandagain/sentiment/contents/{ path}", "compare_url": "https://api.github.com/repos/thisandagain/sentiment/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/thisandagain/sentiment/merges", "archive_url": "https://api.github.com/repos/thisandagain/sentiment/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/thisandagain/sentiment/downloads", "issues_url": "https://api.github.com/repos/thisandagain/sentiment/issues{/number}", "pulls_url": "https://api.github.com/repos/thisandagain/sentiment/pulls{/number}", "milestones_url": "https://api.github.com/repos/thisandagain/sentiment/milestones{/number}", "notifications_url": "https://api.github.com/repos/thisandagain/sentiment/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/thisandagain/sentiment/labels{/name}", "releases_url": "https://api.github.com/repos/thisandagain/sentiment/releases{/id}", "deployments_url": "https://api.github.com/repos/thisandagain/sentiment/deployments", "created_at": "2012-10-08T03:45:22Z", "updated_at": "2023-01-20T12:34:09Z", "pushed_at": "2020-05-18T13:55:58Z", "git_url": "git://github.com/thisandagain/sentiment.git", "ssh_url": "[email protected]:thisandagain/sentiment.git", "clone_url": "https://github.com/thisandagain/sentiment.git", "svn_url": "https://github.com/thisandagain/sentiment", "homepage": "", "size": 416, "stargazers_count": 2560, "watchers_count": 2560, "language": "JavaScript", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": false, "has_pages": false, "has_discussions": false, "forks_count": 319, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 16, "license": { "key": "mit", "name": "MIT License", "spdx_id": "MIT", "url": "https://api.github.com/licenses/mit", "node_id": "MDc6TGljZW5zZTEz" }, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [ "afinn", "analysis", "javascript", "nlp", "sentiment", "sentiment-analysis" ], "visibility": "public", "forks": 319, "open_issues": 16, "watchers": 2560, "default_branch": "develop", "score": 1.0 }
CodePudding user response:
I have saved your_json-element_ as input.json
. With the package RJSONIO
this file is read in as json
. Your example is deeply nested json
.
This list may consist of lists within a list. For such cases the list
is flattened with str_c()
and transformed into tibble
, if its length is 1
it is directly transformed into a tibble
.
With purrr
's list_cbind()
this is all bind into one DF with - in this case - one row and 80 columns.
library(tidyverse)
library(RJSONIO)
json <- fromJSON("input.json", nullValue = NA)
dat <- lapply(json, function(j) {
if (length(j) > 1) {
j |>
stringr::str_c(collapse = ",") |>
as_tibble()
} else {
as_tibble(j)
}
})
purrr::list_cbind(dat)
# A tibble: 1 × 80
id$value node_id…¹ name$…² full_…³ priva…⁴ owner…⁵ html_…⁶ descr…⁷
<dbl> <chr> <chr> <chr> <lgl> <chr> <chr> <chr>
1 6119227 MDEwOlJl… sentim… thisan… FALSE thisan… https:… AFINN-…
# … with 72 more variables: fork <tibble[,1]>, url <tibble[,1]>,
# forks_url <tibble[,1]>, keys_url <tibble[,1]>,
# collaborators_url <tibble[,1]>, teams_url <tibble[,1]>,
# hooks_url <tibble[,1]>, issue_events_url <tibble[,1]>,
# events_url <tibble[,1]>, assignees_url <tibble[,1]>,
# branches_url <tibble[,1]>, tags_url <tibble[,1]>,
# blobs_url <tibble[,1]>, git_tags_url <tibble[,1]>, …
# ℹ Use `colnames()` to see all variable names