Determine the total value of a frequency table-CodePudding

I have a dataset in which the number of pixels is counted for each value looking like this:

      HISTO_2 HISTO_3 HISTO_4 HISTO_5 HISTO_6 HISTO_7 HISTO_10 HISTO_11 HISTO_14 HISTO_18  HISTO_19 HISTO_23
1        0     390     652     157      32       7       0       0        0        0        0        0
2        0      22      41      27      23      11       8       5        4       11        2        4
3        0     916     671     167      40       7       4       5        2        1        2        2
4        0    2600     810     172      38       0       0       0        0        0        0        0
5        0     110     987     791     248      59      11       5        0        1        0        0
6        0     778     808     182      43       5       0       0        0        0        0        0
7        0    1095     846     199      55      12       8       3        0        0        0        0
8        0    1045     545      60       0       0       0       0        0        0        0        0
9        0     868     422      92       2       0       0       0        0        0        0        0
10       0    1225     597     160      57      27       0       0        0        0        0        0
11       0    1092    1096     635     150      33       0       0        0        0        0        0

HISTO_2 caputres the number of pixels with value 2, HISTO_3 caputres the number of pixels with value 3 and so on. I need to find a way that enables me to efficiently count the total value of all pixels per row. This has to be done for ten datasets. As can be seen in the table, the value of the columns doesn't follow a balanced sequence and for each dataset, the sequence can be different.

Any efficient solutions to my problem?
PS: If you come up better title for my question, feel free to edit :)

CodePudding user response：

Is this what you are looking for:

apply(df,1,sum,na.rm=T)

   1    2    3    4    5    6    7    8    9   10   11 
1238  158 1817 3620 2212 1816 2218 1650 1384 2066 3006

CodePudding user response：

library(dplyr)

dataset1 <- data.frame(HISTO_2 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
                       HISTO_3 = c(390, 22, 916, 2600, 110, 778, 1095, 1045, 868, 1225, 1092),
                       HISTO_4 = c(652, 41, 671, 810, 987, 808, 846, 545, 422, 597, 1096),
                       HISTO_5 = c(157, 27, 167, 172, 791, 182, 199, 60, 92, 160, 635),
                       HISTO_6 = c(32, 23, 40, 38, 248, 43, 55, 0, 2, 57, 150),
                       HISTO_7 = c(7, 11, 7, 0, 59, 5, 12, 0, 0, 27, 33),
                       HISTO_10 = c(0, 8, 4, 0, 11, 0, 8, 0, 0, 0, 0),
                       HISTO_11 = c(0, 5, 5, 0, 5, 0, 3, 0, 0, 0, 0),
                       HISTO_14 = c(0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0),
                       HISTO_18 = c(0, 11, 1, 0, 1, 0, 0, 0, 0, 0, 0),
                       HISTO_19 = c(0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0),
                       HISTO_23 = c(0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0))

dataset1 %>% 
  summarize(total_pixels = rowSums(.[1:11]))

CodePudding user response：

With dplyr

library(dplyr)

df %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(everything()))) %>% 
  ungroup() %>% 
  print(Inf)
# A tibble: 11 × 13
   HISTO_2 HISTO_3 HISTO_4 HISTO_5 HISTO_6 HISTO_7 HISTO_10 HISTO_11 HISTO_14
     <int>   <int>   <int>   <int>   <int>   <int>    <int>    <int>    <int>
 1       0     390     652     157      32       7        0        0        0
 2       0      22      41      27      23      11        8        5        4
 3       0     916     671     167      40       7        4        5        2
 4       0    2600     810     172      38       0        0        0        0
 5       0     110     987     791     248      59       11        5        0
 6       0     778     808     182      43       5        0        0        0
 7       0    1095     846     199      55      12        8        3        0
 8       0    1045     545      60       0       0        0        0        0
 9       0     868     422      92       2       0        0        0        0
10       0    1225     597     160      57      27        0        0        0
11       0    1092    1096     635     150      33        0        0        0
   HISTO_18 HISTO_19 HISTO_23 total
      <int>    <int>    <int> <int>
 1        0        0        0  1238
 2       11        2        4   158
 3        1        2        2  1817
 4        0        0        0  3620
 5        1        0        0  2212
 6        0        0        0  1816
 7        0        0        0  2218
 8        0        0        0  1650
 9        0        0        0  1384
10        0        0        0  2066
11        0        0        0  3006

For all 10 data sets

df_list <- list(df1, df2, df3, df4, df5, df6, df7, df8, df9, df10)

lapply(df_list ,function(x)
  x %>% 
    rowwise() %>% 
    mutate(total = sum(c_across(everything()))) %>% 
    ungroup())
}

Data

df <- structure(list(HISTO_2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L), HISTO_3 = c(390L, 22L, 916L, 2600L, 110L, 778L, 1095L, 
1045L, 868L, 1225L, 1092L), HISTO_4 = c(652L, 41L, 671L, 810L, 
987L, 808L, 846L, 545L, 422L, 597L, 1096L), HISTO_5 = c(157L, 
27L, 167L, 172L, 791L, 182L, 199L, 60L, 92L, 160L, 635L), HISTO_6 = c(32L, 
23L, 40L, 38L, 248L, 43L, 55L, 0L, 2L, 57L, 150L), HISTO_7 = c(7L, 
11L, 7L, 0L, 59L, 5L, 12L, 0L, 0L, 27L, 33L), HISTO_10 = c(0L, 
8L, 4L, 0L, 11L, 0L, 8L, 0L, 0L, 0L, 0L), HISTO_11 = c(0L, 5L, 
5L, 0L, 5L, 0L, 3L, 0L, 0L, 0L, 0L), HISTO_14 = c(0L, 4L, 2L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_18 = c(0L, 11L, 1L, 0L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L), HISTO_19 = c(0L, 2L, 2L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L), HISTO_23 = c(0L, 4L, 2L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))

CodePudding user response：

Since you are interested in the TOTAL VALUE of the row pixels, you could do:

Base R:

colSums(t(df) * as.numeric(gsub('\\D', '',names(df))))
    1     2     3     4     5     6     7     8     9    10    11 
 4804  1099  6781 12128 10317  6769  8191  5615  4764  7394 11966

or even:

as.matrix(df)%*%as.numeric(gsub('\\D', '',names(df)))
    [,1]
1   4804
2   1099
3   6781
4  12128
5  10317
6   6769
7   8191
8   5615
9   4764
10  7394
11 11966

if you are unfamiliar with gsub and \\D ie regex, then use

library(readr)
as.matrix(df) %*% parse_number(names(df))
    [,1]
1   4804
2   1099
3   6781
4  12128
5  10317
6   6769
7   8191
8   5615
9   4764
10  7394
11 11966