I want your help in finding median age in a race table I have. example of my data
Asian Black Hispanic White
15 0 0 0 6
17 0 0 0 9
19 0 0 0 8
20 0 0 0 12
20.8388888888889 2 0 0 0
20.8583333333333 2 0 0 0
21 1 7 1 31
21.4888888888889 0 0 2 0
21.5277777777778 0 0 2 0
the first column continues to age 99. I tried to do so using the lapply function as below, but I get the median for the Asian race only.
RaceAge= tapply(age,RACE, median)
RaceAge
Asian Black Hispanic White
43 NA NA NA
Many thanks.
CodePudding user response:
We may use
library(dplyr)
library(spatstat)
as.data.frame.matrix(tbl1) %>%
rownames_to_column('age') %>%
summarise(across(-age, ~ weighted.median(age, w = .x, type = 1)))
-output
Asian Black Hispanic White
1 26 17 19 21.8
Or another option is
df2 <- transform(as.data.frame(tbl1),
AgeNiki = as.numeric(as.character(AgeNiki)))
by(df2, df2$Var2, FUN = function(x) weighted.median(x$AgeNiki,
w = x$Freq, type = 1))
df2$Var2: Asian
[1] 26
---------------------------------------------------------------------------------------------------------------------
df2$Var2: Black
[1] 17
---------------------------------------------------------------------------------------------------------------------
df2$Var2: Hispanic
[1] 19
---------------------------------------------------------------------------------------------------------------------
df2$Var2: White
[1] 21.8
data
tbl1 <- structure(c(0L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 0L, 0L, 0L, 5L, 0L,
0L, 0L, 5L, 7L, 13L, 11L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L,
0L, 0L, 0L, 3L, 3L, 0L, 0L, 0L, 8L, 6L, 31L, 8L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 2L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 1L, 4L, 5L, 26L,
28L, 6L, 9L, 8L, 12L, 0L, 0L, 31L, 0L, 0L, 0L, 0L, 36L), dim = c(18L,
4L), dimnames = list(AgeNiki = c("15", "17", "19", "20", "20.8",
"21", "21.4", "21.5", "21.8", "21.9", "22", "23", "23.1", "23.2",
"24", "25", "26", "27"), c("Asian", "Black", "Hispanic", "White"
)), class = "table")
CodePudding user response:
data <- data.frame(age = c(15, 17, 19, 20, 20.8388888888889, 20.8583333333333,
21, 21.4888888888889, 21.5277777777778), Asian = c(0L, 0L, 0L,
0L, 2L, 2L, 1L, 0L, 0L), Black = c(0L, 0L, 0L, 0L, 0L, 0L, 7L,
0L, 0L), Hispanic = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 2L), White = c(6L,
9L, 8L, 12L, 0L, 0L, 31L, 0L, 0L))
apply(data[-1], 2, \(x) median(rep(data$age,x)))
#> Asian Black Hispanic White
#> 20.85833 21.00000 21.48889 20.00000
CodePudding user response:
You can use DescTools::Untable()
with tapply()
:
with(DescTools::Untable(tbl1, colnames = c("age", "race")),
tapply(as.numeric(levels(age))[age], race, median))
# Asian Black Hispanic White
# 26.0 17.0 19.0 21.8
or with aggregate()
:
aggregate(age ~ race, DescTools::Untable(tbl1, colnames = c("age", "race")),
\(x) median(as.numeric(levels(x))[x]))
# race age
# 1 Asian 26.0
# 2 Black 17.0
# 3 Hispanic 19.0
# 4 White 21.8
Data tbl1
is credited to @akrun's answer.