Error when exporting to csv using write.csv and write

I have a data frame similar to the one below (only longer). When I try to export it (with different methods/packages, I keep getting the same error:

Error in dimnames(X) <- list(dn[[1L]], unlist(collabs[nc > 0], use.names = FALSE)) : 
  length of 'dimnames' [2] not equal to array extent

I'm unsure of what the error actually means, and googling hasn't been too much help.

This is a segment of the data frame (the below reproducible data is the head of the whole data frame).

> head(tweets2)
  possibly_sensitive                  id  author_id public_metrics.retweet_count public_metrics.reply_count
1              FALSE 1457730445089640453 4568748862                            0                          1
2              FALSE 1456653659811549193 4568748862                            0                          0
3              FALSE 1455688889889435650 4568748862                            0                          0
4              FALSE 1455571797919870980 4568748862                            0                          0
5              FALSE 1455557277369393160 4568748862                            0                          0
6              FALSE 1455288530159157249 4568748862                            0                          6
  public_metrics.like_count public_metrics.quote_count
1                         0                          0
2                         4                          0
3                         1                          0
4                         1                          0
5                         0                          0
6                        60                          0

Reproducible data:

test2 <- structure(list(possibly_sensitive = c(FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE), id = c("1457730445089640453", "1456653659811549193", 
"1455688889889435650", "1455571797919870980", "1455557277369393160", 
"1455288530159157249"), author_id = c("4568748862", "4568748862", 
"4568748862", "4568748862", "4568748862", "4568748862"), public_metrics = structure(list(
    retweet_count = c(0L, 0L, 0L, 0L, 0L, 0L), reply_count = c(1L, 
    0L, 0L, 0L, 0L, 6L), like_count = c(0L, 4L, 1L, 1L, 0L, 60L
    ), quote_count = c(0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 
6L), class = "data.frame"), entities = structure(list(urls = list(
    structure(list(start = 237L, end = 260L, url = "X", 
        expanded_url = "X", 
        display_url = "X"), class = "data.frame", row.names = 1L), 
    structure(list(start = 241L, end = 264L, url = "X", 
        expanded_url = "X", 
        display_url = "X"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0)), structure(list(
        start = 262L, end = 285L, url = "X", 
        expanded_url = "https://twitter.com/nationalpost/status/1455243316866555908", 
        display_url = "twitter.com/nationalpost/s…"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0)), structure(list(
        start = c(218L, 218L), end = c(241L, 241L), url = c("X", 
        "X"), expanded_url = c("https://twitter.com/ziad_aboultaif/status/1455288530159157249/photo/1", 
        "https://twitter.com/ziad_aboultaif/status/1455288530159157249/photo/1"
        ), display_url = c("pic.twitter.com/SNxB53sDoE", "pic.twitter.com/SNxB53sDoE"
        )), class = "data.frame", row.names = 1:2)), annotations = list(
    structure(list(start = 0L, end = 5L, probability = 0.9922, 
        type = "Place", normalized_text = "Canada"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0)), structure(list(), .Names = character(0)), 
    structure(list(start = 26L, end = 31L, probability = 0.9765, 
        type = "Place", normalized_text = "Canada"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0)), structure(list(
        start = c(43L, 81L), end = c(66L, 96L), probability = c(0.6856, 
        0.5389), type = c("Organization", "Place"), normalized_text = c("44th Canadian Parliament", 
        "Edmonton Manning")), class = "data.frame", row.names = 1:2)), 
    hashtags = list(structure(list(), .Names = character(0)), 
        structure(list(start = 8L, end = 21L, tag = "VeteransWeek"), class = "data.frame", row.names = 1L), 
        structure(list(), .Names = character(0)), structure(list(), .Names = character(0)), 
        structure(list(), .Names = character(0)), structure(list(), .Names = character(0))), 
    mentions = list(structure(list(), .Names = character(0)), 
        structure(list(), .Names = character(0)), structure(list(
            start = 0L, end = 14L, username = "egyptincanada", 
            id = "1044603931496779777"), class = "data.frame", row.names = 1L), 
        structure(list(), .Names = character(0)), structure(list(
            start = 0L, end = 11L, username = "Mukiza2021", id = "715376684501852160"), class = "data.frame", row.names = 1L), 
        structure(list(), .Names = character(0)))), row.names = c(NA, 
6L), class = "data.frame"), conversation_id = c("1457730445089640453", 
"1456653659811549193", "1455288530159157249", "1455571797919870980", 
"1455288530159157249", "1455288530159157249"), source = c("Twitter Web App", 
"Twitter Web App", "Twitter for Android", "Twitter Web App", 
"Twitter for Android", "Twitter Web App"), text = c("Canada can’t afford a cabinet that is bent on radical policies in the midst of an economic crisis. What we need right now is to give workers their paychecks back and ensure a recovery for all. \n\nThis is not the time for a “great reset”. X", 
"On this #VeteransWeek, let us remember the valiant efforts our veterans have made to keep us safe. \n\nToday, there are still many who saw the horrors of the Second World War and beyond. \n\nWe are forever indebted to our veterans. \n\nThank you. X", 
"@egyptincanada Thank you your excellency.", "Five months after placing Canada’s flag on half-mast, it is time for our symbol of unity to start flying again. \nCanadians have shown that we are united towards reconciliation and it’s time to continue this work while our Maple Leaf flies high, strong and free. X", 
"@Mukiza2021 Thank you my friend.", "I am honoured to be sworn-in today for the 44th Canadian Parliament representing Edmonton Manning. \n\nI will continue the hard work I have been doing for the last six years and I look forward to serving on your behalf. X"
), referenced_tweets = list(structure(list(type = "quoted", id = "1456333073621012480"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0)), structure(list(
        type = "replied_to", id = "1455688445586718722"), class = "data.frame", row.names = 1L), 
    structure(list(type = "quoted", id = "1455243316866555908"), class = "data.frame", row.names = 1L), 
    structure(list(type = "replied_to", id = "1455552008111345664"), class = "data.frame", row.names = 1L), 
    structure(list(), .Names = character(0))), lang = c("en", 
"en", "en", "en", "en", "en"), created_at = c("2021-11-08T15:23:16.000Z", 
"2021-11-05T16:04:30.000Z", "2021-11-03T00:10:51.000Z", "2021-11-02T16:25:34.000Z", 
"2021-11-02T15:27:52.000Z", "2021-11-01T21:39:58.000Z"), attachments = structure(list(
    media_keys = list(structure(list(), .Names = character(0)), 
        "3_1456653607873523714", structure(list(), .Names = character(0)), 
        structure(list(), .Names = character(0)), structure(list(), .Names = character(0)), 
        c("3_1455288520818544641", "3_1455288520994697217"))), row.names = c(NA, 
6L), class = "data.frame"), in_reply_to_user_id = c(NA, NA, "1044603931496779777", 
NA, "715376684501852160", NA), geo = structure(list(place_id = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_
), coordinates = structure(list(coordinates = list(NULL, NULL, 
    NULL, NULL, NULL, NULL), type = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
6L), class = "data.frame")), row.names = c(NA, 6L), class = "data.frame")), row.names = c(NA, 
6L), class = "data.frame")

CodePudding user response：

If you look at the structure of the data.frame, you'll notice that it's a complex object - a data.frame of data.frames and lists and what-have-you.

> str(test2)
'data.frame':   6 obs. of  14 variables:
 $ possibly_sensitive : logi  FALSE FALSE FALSE FALSE FALSE FALSE
 $ id                 : chr  "1457730445089640453" "1456653659811549193" "1455688889889435650" "1455571797919870980" ...
 $ author_id          : chr  "4568748862" "4568748862" "4568748862" "4568748862" ...
 $ public_metrics     :'data.frame':    6 obs. of  4 variables:
  ..$ retweet_count: int  0 0 0 0 0 0
  ..$ reply_count  : int  1 0 0 0 0 6
  ..$ like_count   : int  0 4 1 1 0 60
  ..$ quote_count  : int  0 0 0 0 0 0
 $ entities           :'data.frame':    6 obs. of  4 variables:
  ..$ urls       :List of 6
  .. ..$ :'data.frame': 1 obs. of  5 variables:
  .. .. ..$ start       : int 237
  .. .. ..$ end         : int 260
  .. .. ..$ url         : chr "X"
  .. .. ..$ expanded_url: chr "X"
  .. .. ..$ display_url : chr "X"
  .. ..$ :'data.frame': 1 obs. of  5 variables:

You will need to make the data.frame "flat" before you can safely save it as a tab/csv file.

Alternatively, you could save this complex object as an .RData binary file. This will be usable to other users only through R, though.

CodePudding user response：

As Roman mentioned, your dataframe isn't "flat" and there are dataframes in certain columns e.g. public_metrics.

You can use tidyr::unpack() to flatten these columns, then write it to CSV.

test2 %>% 
  unpack(cols = c(public_metrics, entities, attachments, geo)) %>% 
  unpack(cols = coordinates) %>% 
  write_csv("test2.csv")