I ran a merge function to combine some data sets and when I look at them the second row is showing "No data available in the table". Here is my code I'm working on this for a class project so I'm not the most experienced in R. This results in EX1 having data and EX2 and EX3 saying "No data available in the table". The other slim functions I created do have data in them and when i run the sapply(EX3, is.numeric) on EX1 EX2 and EX3 it says all of them are true except geoid which the teacher said is how its supposed to be. Anyone got any idea about this? Let me know if you need anymore information i will do my best to provide it. Thanks
Here are the files I am using https://easyupload.io/lkcd8u
install.packages("dplyr")
library(dplyr)
library(readxl)
S0801 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S0801.xlsx")
View(S0801)
library(readxl)
S1501 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S1501.xlsx")
View(S1501)
library(readxl)
S1502 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S1502.xlsx")
View(S1502)
library(readxl)
S2404 <- read_excel("C:/Users/Ethan/Desktop/School/_Spring 2022/301/D EX 3/Final Sheets/S2404.xlsx")
View(S2404)
S2404_slim <- select(S2404, GEO_ID, S2404_C01_001E, S2404_C01_002E, S2404_C01_005E, S2404_C01_006E, S2404_C01_007E, S2404_C01_008E, S2404_C01_009E, S2404_C01_012E, S2404_C01_013E, S2404_C01_016E, S2404_C01_020E, S2404_C01_023E, S2404_C01_027E)
S1501_slim <- select(S1501, GEO_ID, S1501_C02_009E, S1501_C02_010E, S1501_C02_011E, S1501_C02_012E, S1501_C02_013E, S1501_C02_015E, S1501_C02_018E, S1501_C02_021E, S1501_C02_024E, S1501_C02_027E, S1501_C01_059E)
S1502_slim <- select(S1502, GEO_ID, S1502_C02_002E, S1502_C02_003E, S1502_C02_004E, S1502_C02_005E, S1502_C02_006E)
S0801_slim <- select(S0801, GEO_ID, S0801_C01_001E, S0801_C01_002E, S0801_C01_003E, S0801_C01_004E, S0801_C01_005E, S0801_C01_006E, S0801_C01_007E, S0801_C01_009E, S0801_C01_010E, S0801_C01_011E, S0801_C01_012E, S0801_C01_013E, S0801_C01_046E)
EX1 <- merge(S2404_slim,S1501_slim)
EX2 <- merge(S1502_slim,S0801_slim)
EX3 <- merge(EX1,EX2)
sapply(EX3, is.numeric)
CodePudding user response:
You missed all=TRUE
argument in merge()
. Without it only rows are merged, that are common in all data frames. S0801_slim$GEO_ID
is different from that of the others.
EX1 <- merge(S2404_slim, S1501_slim)
EX2 <- merge(S1502_slim, S0801_slim, all=TRUE)
EX3 <- merge(EX1, EX2, all=TRUE)
Note that you may use this Reduce
approach to merge
multiple data frames at once:
Reduce(\(...) merge(..., all=TRUE), list(S2404_slim, S1501_slim, S1502_slim, S0801_slim))
# GEO_ID S2404_C01_001E S2404_C01_002E S2404_C01_005E S2404_C01_006E S2404_C01_007E
# 1 0100000US NA NA NA NA NA
# 2 0400000US06 10907989 207254 754372 1397968 445030
# S2404_C01_008E S2404_C01_009E S2404_C01_012E S2404_C01_013E S2404_C01_016E S2404_C01_020E
# 1 NA NA NA NA NA NA
# 2 1072689 596688 360822 861556 1404664 1907454
# S2404_C01_023E S2404_C01_027E S1501_C02_009E S1501_C02_010E S1501_C02_011E S1501_C02_012E
# 1 NA NA NA NA NA NA
# 2 755348 642283 20.4 20.9 8 21.6
# S1501_C02_013E S1501_C02_015E S1501_C02_018E S1501_C02_021E S1501_C02_024E S1501_C02_027E
# 1 NA NA NA NA NA NA
# 2 13.1 34.7 37.7 38 32.7 32.1
# S1501_C01_059E S1502_C02_002E S1502_C02_003E S1502_C02_004E S1502_C02_005E S1502_C02_006E
# 1 NA NA NA NA NA NA
# 2 44077 40.7 7.9 18.6 7.2 25.7
# S0801_C01_001E S0801_C01_002E S0801_C01_003E S0801_C01_004E S0801_C01_005E S0801_C01_006E
# 1 139255035 86.4 76 10.4 8 1.4
# 2 NA NA NA NA NA NA
# S0801_C01_007E S0801_C01_009E S0801_C01_010E S0801_C01_011E S0801_C01_012E S0801_C01_013E
# 1 1 4.9 2.8 0.5 1.2 4.1
# 2 NA NA NA NA NA NA
# S0801_C01_046E
# 1 25.2
# 2 NA
Data:
temp <- tempdir() ## create tempdir
temp <- unzip('~/Downloads/Final Sheets.zip') ## unzip download
list2env(setNames(lapply(temp, openxlsx::read.xlsx), ## unpack to workspace
c('S0801', 'S1501', 'S1502', 'S2404')), .GlobalEnv)
unlink(temp) ## delete tempdir
S2404_slim <- subset(S2404,
select=c(GEO_ID, S2404_C01_001E, S2404_C01_002E,
S2404_C01_005E, S2404_C01_006E, S2404_C01_007E,
S2404_C01_008E, S2404_C01_009E, S2404_C01_012E,
S2404_C01_013E, S2404_C01_016E, S2404_C01_020E,
S2404_C01_023E, S2404_C01_027E))
S1501_slim <- subset(S1501, select=c(GEO_ID, S1501_C02_009E, S1501_C02_010E,
S1501_C02_011E, S1501_C02_012E, S1501_C02_013E,
S1501_C02_015E, S1501_C02_018E, S1501_C02_021E,
S1501_C02_024E, S1501_C02_027E, S1501_C01_059E))
S1502_slim <- subset(S1502, select=c(GEO_ID, S1502_C02_002E, S1502_C02_003E,
S1502_C02_004E, S1502_C02_005E, S1502_C02_006E))
S0801_slim <- subset(S0801, select=c(GEO_ID, S0801_C01_001E, S0801_C01_002E,
S0801_C01_003E, S0801_C01_004E, S0801_C01_005E,
S0801_C01_006E, S0801_C01_007E, S0801_C01_009E,
S0801_C01_010E, S0801_C01_011E, S0801_C01_012E,
S0801_C01_013E, S0801_C01_046E))
CodePudding user response:
I'm using you're trying to merge on the GEOID
? If that's the case, you might want to include on = GEOID
in your merge.
merge(EX1,EX2, on="GEOID")
But in regards to your data, it looks like there is only one row in each of the excel sheets you're reading in. Is it supposed to be that way?
The reason EX2 and EX3 don't have any data is because the dataframes you are trying to merge do not share any of the same the data.
Also, just a note: you don't need to import a library more than once.