Home > Software engineering >  Strange output from pivot_wider. Tibble produced is different from the View(df) output. (includes dp
Strange output from pivot_wider. Tibble produced is different from the View(df) output. (includes dp

Time:04-13

I'm trying to turn the following dataset into a wider format where the species names in Full_name become column headers and the values from counts500 go into each of those columns. The ID column should have no repeats.

Year Location Site Depth Transect ID Full_name counts500
2019 Tela AD 10 1 2019_Tela_AD_1_10 Stegastes planifrons 0
2019 Tela AD 10 1 2019_Tela_AD_1_10 Anisotremus virginicus 1
2019 Tela AD 10 1 2019_Tela_AD_1_10 Stegastes adustus 0
2019 Tela AD 10 1 2019_Tela_AD_1_10 Stegastes partitus 0
2019 Tela AD 10 1 2019_Tela_AD_1_10 Stegastes partitus 0
2019 Tela AD 10 1 2019_Tela_AD_1_10 Stegastes partitus 0

The code I used was:

> dfnew <- pivot_wider(df, names_from = Full_name, values_from = counts500, values_fn = list) ###Pivoting dataset wider to give species as columns

This returns a dataset that looks correct (in View(df)) but gives a tibble when I just type 'df' into the console which has incorrect values. Could it be that it's counting the occurrences of each species per ID rather than taking the values from counts500 column?

Here is the dput of a my top table with more rows:

structure(list(Year = c(2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L, 2019L), Location = c("Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", "Tela", 
"Tela"), Site = c("AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", "AD", 
"AD", "AD", "AD", "AD"), Depth = c(10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L), Transect = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), ID = c("2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10", 
"2019_Tela_AD_1_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10", 
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10", 
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10"
), Full_name = c("Stegastes planifrons", "Anisotremus virginicus", 
"Stegastes adustus", "Stegastes partitus", "Stegastes partitus", 
"Stegastes partitus", "Stegastes partitus", "Stegastes partitus", 
"Stegastes adustus", "Stegastes partitus", "Stegastes adustus", 
"Stegastes adustus", "Stegastes adustus", "Stegastes variabilis", 
"Stegastes partitus", "Stegastes adustus", "Stegastes adustus", 
"Serranus tortugarum", "Serranus tortugarum", "Stegastes adustus", 
"Stegastes partitus", "Stegastes adustus", "Stegastes partitus", 
"Microspathodon chrysurus", "Abudefduf saxatilis", "Microspathodon chrysurus", 
"Haemulon carbonarium", "Microspathodon chrysurus", "Abudefduf saxatilis", 
"Acanthurus coeruleus", "Haemulon chrysargyreum", "Anisotremus virginicus", 
"Anisotremus virginicus", "Sparisoma viride", "Caranx ruber", 
"Scarus vetula", "Halichoeres bivittatus", "Scarus iseri", "Sparisoma viride", 
"Anisotremus virginicus", "Sparisoma viride", "Scarus iseri", 
"Scarus iseri", "Scarus iseri", "Scarus vetula", "Stegastes partitus", 
"Stegastes adustus", "Stegastes adustus", "Stegastes adustus", 
"Scarus iseri", "Microspathodon chrysurus", "Stegastes adustus", 
"Stegastes adustus", "Microspathodon chrysurus", "Microspathodon chrysurus", 
"Stegastes adustus", "Stegastes adustus", "Scarus iseri", "Stegastes diencaeus", 
"Stegastes adustus", "Haemulon sciurus", "Stegastes adustus", 
"Stegastes adustus", "Anisotremus virginicus", "Lutjanus mahogoni", 
"Scarus vetula", "Stegastes partitus", "Stegastes adustus", "Holacanthus tricolor", 
"Stegastes partitus", "Stegastes adustus", "Stegastes adustus", 
"Stegastes partitus", "Stegastes partitus", "Halichoeres maculipinna", 
"Stegastes partitus", "Stegastes variabilis", "Microspathodon chrysurus", 
"Stegastes adustus", "Stegastes variabilis", "Stegastes adustus", 
"Stegastes partitus", "Stegastes adustus", "Stegastes adustus", 
"Stegastes partitus", "Stegastes adustus", "Stegastes adustus", 
"Stegastes adustus", "Stegastes diencaeus", "Stegastes partitus", 
"Thalassoma bifasciatum", "Chaetodon capistratus", "Haemulon carbonarium", 
"Haemulon carbonarium", "Stegastes adustus", "Thalassoma bifasciatum", 
"Stegastes adustus", "Stegastes partitus", "Stegastes partitus", 
"Stegastes partitus"), counts500 = c(0, 1, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 1, 2, 0, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 100L), class = "data.frame")))

Thanks!

CodePudding user response:

Presumably, where there are multiple rows which are all the same, you wish to add together the counts, and where there are none of the species observed you want a zero. That being the case, you can do:

tidyr::pivot_wider(exfibi500, names_from = Full_name, values_from = counts500, 
              values_fn = sum, values_fill = 0)
#> # A tibble: 3 x 29
#>    Year Location Site  Depth Transect ID       `Stegastes pla~` `Anisotremus v~`
#>   <int> <chr>    <chr> <int>    <int> <chr>               <dbl>            <dbl>
#> 1  2019 Tela     AD       10        1 2019_Te~                0                4
#> 2  2019 Tela     AD       10        2 2019_Te~                0                0
#> 3  2019 Tela     AD       10        3 2019_Te~                0                0
#> # ... with 21 more variables: `Stegastes adustus` <dbl>,
#> #   `Stegastes partitus` <dbl>, `Stegastes variabilis` <dbl>,
#> #   `Serranus tortugarum` <dbl>, `Microspathodon chrysurus` <dbl>,
#> #   `Abudefduf saxatilis` <dbl>, `Haemulon carbonarium` <dbl>,
#> #   `Acanthurus coeruleus` <dbl>, `Haemulon chrysargyreum` <dbl>,
#> #   `Sparisoma viride` <dbl>, `Caranx ruber` <dbl>, `Scarus vetula` <dbl>,
#> #   `Halichoeres bivittatus` <dbl>, `Scarus iseri` <dbl>, ...

Created on 2022-04-12 by the reprex package (v2.0.1)

CodePudding user response:

We may use distinct and then pivot_wider

library(dplyr)
library(tidyr)
pivot_wider(distinct(df), names_from = Full_name, values_from = counts500)

-output

# A tibble: 3 × 29
   Year Location Site  Depth Transect ID                `Stegastes plan… `Anisotremus vi… `Stegastes adus… `Stegastes part… `Stegastes vari… `Serranus tortu…
  <int> <chr>    <chr> <int>    <int> <chr>                        <dbl>            <dbl>            <dbl>            <dbl>            <dbl>            <dbl>
1  2019 Tela     AD       10        1 2019_Tela_AD_1_10                0                1                0                0                0                0
2  2019 Tela     AD       10        2 2019_Tela_AD_2_10               NA                0                0                0               NA               NA
3  2019 Tela     AD       10        3 2019_Tela_AD_3_10               NA               NA                0                0                0               NA
# … with 17 more variables: Microspathodon chrysurus <dbl>, Abudefduf saxatilis <dbl>, Haemulon carbonarium <dbl>, Acanthurus coeruleus <dbl>,
#   Haemulon chrysargyreum <dbl>, Sparisoma viride <dbl>, Caranx ruber <dbl>, Scarus vetula <dbl>, Halichoeres bivittatus <dbl>, Scarus iseri <dbl>,
#   Stegastes diencaeus <dbl>, Haemulon sciurus <dbl>, Lutjanus mahogoni <dbl>, Holacanthus tricolor <dbl>, Halichoeres maculipinna <dbl>,
#   Thalassoma bifasciatum <dbl>, Chaetodon capistratus <dbl>
  • Related