I have a data like this
df<- structure(list(Accession = c("Washington", "DC", "NY", "TM",
"VA", "UTSAW", "UTDFS", "FLOR", "HYTAS", "HUTT", "ITA", "BELI"
), FirstCon = c("UNS", "UNS", "UNS", "UNS", "UNS", "", "UNS",
"UNS", "UNS", "UNS", "UNS", "UNS"), SeconCon = c("", "", "",
"UNS", "", "", "", "", "", "UTN", "UTN", "UTN"), Upcond = c("",
"", "", "UNS", "", "UNS", "", "", "", "", "", "UTBTS")), class = "data.frame", row.names = c(NA,
-12L))
it looks like the following
Accession FirstCon SeconCon Upcond
1 Washington UNS
2 DC UNS
3 NY UNS
4 TM UNS UNS UNS
5 VA UNS
6 UTSAW UNS
7 UTDFS UNS
8 FLOR UNS
9 HYTAS UNS
10 HUTT UNS UTN
11 ITA UNS UTN
12 BELI UNS UTN UTBTS
I want to have an output like this
FirstCon SeconCon Upcond FirstConSeconCon FirstConUpcond SeconConUpcond
11 4 3 4 2 2
FirstConSeconConUpcond
2
basically it shows how many of the strings are repeated in each column and a combination of all
for instance
Accession FirstCon SeconCon
4 TM **UNS UNS**
10 HUTT **UNS UTN**
11 ITA **UNS UTN**
12 BELI **UNS UTN**
FirstConSeconCon is 4 because FirstCon
has the four UNS and the SeconCon
also has the string on the same row (they dont need to be similar but as long as they have string on the same corresponding row it counts)
CodePudding user response:
As i understand here is a solution :
df<- structure(list(Accession = c("Washington", "DC", "NY", "TM", "VA", "UTSAW",
"UTDFS", "FLOR", "HYTAS", "HUTT", "ITA", "BELI"),
FirstCon = c("UNS", "UNS", "UNS", "UNS", "UNS", "", "UNS","UNS",
"UNS", "UNS", "UNS", "UNS"),
SeconCon = c("", "", "", "UNS", "", "", "", "", "", "UTN",
"UTN", "UTN"),
Upcond = c("","", "", "UNS", "", "UNS", "", "", "", "", "", "UTBTS")),
class = "data.frame", row.names = c(NA,-12L))
# this function returns the existing of strings in the rows
occurence <- function(df){
oc <- 0L
for(i in 1:nrow(df)){
if(all(nchar(df[i , ]) > 0)){
oc <- oc 1L
}
}
oc
}
res <- c()
nm <- c()
for( i in 1:(length(names(df[-1])))){
com <- combn(1:ncol(df[-1]) , i)
for(c in 1:ncol(com)){
dfsub <- df[,com[,c] 1L , drop = F]
coln <- names(dfsub)
oc <- occurence(dfsub)
nm <- append(nm , do.call(paste0 , as.list(coln)))
res <- append(res , oc)
names(res) <- nm
}
}
res
#> FirstCon SeconCon Upcond
#> 11 4 3
#> FirstConSeconCon FirstConUpcond SeconConUpcond
#> 4 2 2
#> FirstConSeconConUpcond
#> 2
Created on 2022-06-03 by the reprex package (v2.0.1)
CodePudding user response:
Here's one way with base R but without loops:
df[df == ""] <- NA
cols <- names(df)[-1]
combins <- do.call("c", lapply(2:length(cols), function(x) combn(cols, x, FUN = list)))
combin_vals <- sapply(1:length(combins), function(x) sum(rowSums(is.na(df[,combins[[x]]])) == 0))
names(combin_vals) <- sapply(combins, function(x) paste(x, collapse = ""))
c(colSums(!is.na(df[,cols])), combin_vals)
FirstCon SeconCon Upcond FirstConSeconCon
11 4 3 4
FirstConUpcond SeconConUpcond FirstConSeconConUpcond
2 2 2