Suppose I've got this data simulated from the below R code:
library(RNGforGPD)
set.seed(1)
sample.size = 10; no.gpois = 3
lambda.vec = c(-0.2, 0.2, -0.3); theta.vec = c(1, 3, 4)
M = c(0.352, 0.265, 0.342); N = diag(3); N[lower.tri(N)] = M
TV = N t(N); diag(TV) = 1
cstar = CmatStarGpois(TV, theta.vec, lambda.vec, verbose = TRUE)
data = GenMVGpois(sample.size, no.gpois, cstar, theta.vec, lambda.vec, details = FALSE)
> prop.table(table(data[,1]))
0 1 2
0.3 0.4 0.3
> prop.table(table(data[,2]))
2 3 6 8 10
0.2 0.4 0.1 0.2 0.1
> prop.table(table(data[,3]))
2 3 4 5 6
0.2 0.3 0.1 0.3 0.1
> table(data)
data
0 1 2 3 4 5 6 8 10
3 4 7 7 1 3 2 2 1
I'd like to create a proportion matrix for each of the three categorical variables. If the category is missing for a specific column, it will be identified as 0.
Cat | X1 | X2 | X3 |
---|---|---|---|
0 | 0.3 | 0.0 | 0.0 |
1 | 0.4 | 0.0 | 0.0 |
2 | 0.3 | 0.2 | 0.2 |
3 | 0.0 | 0.4 | 0.3 |
4 | 0.0 | 0.0 | 0.1 |
5 | 0.0 | 0.0 | 0.3 |
6 | 0.0 | 0.1 | 0.1 |
8 | 0.0 | 0.2 | 0.0 |
10 | 0.0 | 0.1 | 0.0 |
CodePudding user response:
colnames(data) <- c("X1", "X2", "X3")
as_tibble(data) %>%
pivot_longer(cols = "X1":"X3", values_to = "Cat") %>%
group_by(name, Cat) %>%
count() %>%
ungroup(Cat) %>%
summarize(name, Cat, proportion = n / sum(n)) %>%
pivot_wider(names_from = name, values_from = proportion) %>%
arrange(Cat) %>%
replace(is.na(.), 0)
# A tibble: 9 × 4
Cat X1 X2 X3
<dbl> <dbl> <dbl> <dbl>
1 0 0.3 0 0
2 1 0.4 0 0
3 2 0.3 0.2 0.2
4 3 0 0.4 0.3
5 4 0 0 0.1
6 5 0 0 0.3
7 6 0 0.1 0.1
8 8 0 0.2 0
9 10 0 0.1 0
If you would like it as a matrix, you can use as.matrix()
CodePudding user response:
Tried to put logic at appropriate points in code sequence.
props <- data.frame(Cat = sort(unique(c(data))) ) # Just the Cat column
#Now fill in the entries
# the entries will be obtained with table function
apply(data, 2, table) # run `table(.)` over the columns individually
[[1]]
0 1 2 # these are actually character valued names
3 4 3 # while these are the count values
[[2]]
2 3 6 8 10
2 4 1 2 1
[[3]]
2 3 4 5 6
2 3 1 3 1
Now iterate over that list to fill in values that match the Cat column:
props2 <- cbind(props, # using dfrm first argument returns dataframe object
lapply( apply(data, 2, table) , # irregular results are a list
function(col) { # first make a named vector of zeros
x <- setNames(rep(0,length(props$Cat)), props$Cat)
# could have skipped that step by using `tabulate`
# then fill with values using names as indices
x[names(col)] <- col # values to matching names
x}) )
props2
#-------------
Cat V1 V2 V3
0 0 3 0 0
1 1 4 0 0
2 2 3 2 2
3 3 0 4 3
4 4 0 0 1
5 5 0 0 3
6 6 0 1 1
8 8 0 2 0
10 10 0 1 0
#---
# now just "proportionalize" those counts
props2[2:4] <- prop.table(data.matrix(props2[2:4]), margin=2)
props2
#-------------
Cat V1 V2 V3
0 0 0.3 0.0 0.0
1 1 0.4 0.0 0.0
2 2 0.3 0.2 0.2
3 3 0.0 0.4 0.3
4 4 0.0 0.0 0.1
5 5 0.0 0.0 0.3
6 6 0.0 0.1 0.1
8 8 0.0 0.2 0.0
10 10 0.0 0.1 0.0