Home > Software engineering >  A method to calculate the median age in a race table in R
A method to calculate the median age in a race table in R

Time:12-28

I want your help in finding median age in a race table I have. example of my data

                  
                   Asian Black Hispanic White
  15                   0     0        0     6
  17                   0     0        0     9
  19                   0     0        0     8
  20                   0     0        0    12
  20.8388888888889     2     0        0     0
  20.8583333333333     2     0        0     0
  21                   1     7        1    31
  21.4888888888889     0     0        2     0
  21.5277777777778     0     0        2     0

the first column continues to age 99. I tried to do so using the lapply function as below, but I get the median for the Asian race only.

RaceAge= tapply(age,RACE, median)
RaceAge
   Asian    Black Hispanic    White 
      43       NA       NA       NA 

Many thanks.

CodePudding user response:

We may use

library(dplyr)
library(spatstat)
as.data.frame.matrix(tbl1) %>% 
  rownames_to_column('age') %>%
  summarise(across(-age, ~ weighted.median(age, w = .x, type = 1)))

-output

  Asian Black Hispanic White
1    26    17       19  21.8

Or another option is

df2 <- transform(as.data.frame(tbl1),
    AgeNiki = as.numeric(as.character(AgeNiki)))
by(df2, df2$Var2, FUN = function(x) weighted.median(x$AgeNiki,
     w = x$Freq, type = 1))
 df2$Var2: Asian
[1] 26
--------------------------------------------------------------------------------------------------------------------- 
df2$Var2: Black
[1] 17
--------------------------------------------------------------------------------------------------------------------- 
df2$Var2: Hispanic
[1] 19
--------------------------------------------------------------------------------------------------------------------- 
df2$Var2: White
[1] 21.8

data

tbl1 <- structure(c(0L, 0L, 0L, 0L, 2L, 2L, 1L, 0L, 0L, 0L, 0L, 5L, 0L, 
0L, 0L, 5L, 7L, 13L, 11L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 
0L, 0L, 0L, 3L, 3L, 0L, 0L, 0L, 8L, 6L, 31L, 8L, 0L, 0L, 0L, 
0L, 0L, 0L, 1L, 2L, 2L, 2L, 2L, 0L, 0L, 0L, 0L, 1L, 4L, 5L, 26L, 
28L, 6L, 9L, 8L, 12L, 0L, 0L, 31L, 0L, 0L, 0L, 0L, 36L), dim = c(18L, 
4L), dimnames = list(AgeNiki = c("15", "17", "19", "20", "20.8", 
"21", "21.4", "21.5", "21.8", "21.9", "22", "23", "23.1", "23.2", 
"24", "25", "26", "27"), c("Asian", "Black", "Hispanic", "White"
)), class = "table")

CodePudding user response:

data <- data.frame(age = c(15, 17, 19, 20, 20.8388888888889, 20.8583333333333, 
21, 21.4888888888889, 21.5277777777778), Asian = c(0L, 0L, 0L, 
0L, 2L, 2L, 1L, 0L, 0L), Black = c(0L, 0L, 0L, 0L, 0L, 0L, 7L, 
0L, 0L), Hispanic = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 2L, 2L), White = c(6L, 
9L, 8L, 12L, 0L, 0L, 31L, 0L, 0L))

apply(data[-1], 2, \(x) median(rep(data$age,x)))
#>    Asian    Black Hispanic    White 
#> 20.85833 21.00000 21.48889 20.00000

CodePudding user response:

You can use DescTools::Untable() with tapply():

with(DescTools::Untable(tbl1, colnames = c("age", "race")),
     tapply(as.numeric(levels(age))[age], race, median))

#   Asian    Black Hispanic    White
#    26.0     17.0     19.0     21.8

or with aggregate():

aggregate(age ~ race, DescTools::Untable(tbl1, colnames = c("age", "race")),
          \(x) median(as.numeric(levels(x))[x]))

#       race  age
# 1    Asian 26.0
# 2    Black 17.0
# 3 Hispanic 19.0
# 4    White 21.8

Data tbl1 is credited to @akrun's answer.

  • Related