Home > OS >  Having trouble with my first for loop which counts the number of times row of X dataframe exists in
Having trouble with my first for loop which counts the number of times row of X dataframe exists in

Time:11-27

I have these two dataframes:

   df1<-structure(list(Measures = c("space and shape", "space and shape", 
    "space and shape", "space and shape", "space and shape", "change and relationships", 
    "change and relationships", "change and relationships", "change and relationships", 
    "change and relationships", "space and shape", "space and shape", 
    "space and shape", "space and shape", "uncertainty and data", 
    "quantity", "uncertainty and data", "uncertainty and data", "uncertainty and data", 
    "quantity", "change and relationships", "change and relationships", 
    "space and shape", "space and shape", "space and shape", "quantity", 
    "quantity", "quantity", "quantity", "quantity", "uncertainty and data", 
    "change and relationships", "quantity", "quantity", "uncertainty and data", 
    "change and relationships", "uncertainty and data", "quantity", 
    "change and relationships", "change and relationships", "quantity", 
    "quantity", "quantity", "quantity", "quantity", "quantity", "change and relationships", 
    "uncertainty and data", "change and relationships", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "quantity", "quantity", 
    "quantity", "space and shape", "change and relationships", "quantity", 
    "space and shape", "space and shape", "change and relationships", 
    "change and relationships", "uncertainty and data", "uncertainty and data", 
    "quantity", "change and relationships", "quantity", "change and relationships", 
    "space and shape", "quantity", "quantity", "quantity", "space and shape", 
    "space and shape", "space and shape", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "change and relationships", 
    "change and relationships", "change and relationships", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "change and relationships", 
    "change and relationships", "change and relationships", "change and relationships", 
    "change and relationships", "uncertainty and data", "space and shape", 
    "space and shape", "uncertainty and data", "uncertainty and data", 
    "uncertainty and data", "uncertainty and data", "uncertainty and data", 
    "quantity", "quantity", "space and shape", "space and shape", 
    "space and shape", "space and shape", "change and relationships", 
    "space and shape", "space and shape", "quantity", "change and relationships", 
    "change and relationships"), Format = c("Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Auto-coded", 
    "Constructed Response Expert", "Constructed Response Expert", 
    "Constructed Response Expert", "Complex Multiple Choice", "Complex Multiple Choice", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Expert", "Complex Multiple Choice", "Constructed Response Manual", 
    "Simple Multiple Choice", "Complex Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Manual", 
    "Constructed Response Expert", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Auto-coded", "Constructed Response Manual", 
    "Complex Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Constructed Response Auto-coded", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Manual", "Complex Multiple Choice", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
    "Constructed Response Manual", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Simple Multiple Choice", "Constructed Response Expert", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Complex Multiple Choice", "Complex Multiple Choice", "Constructed Response Expert", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Expert", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Expert", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Expert", "Constructed Response Manual", 
    "Complex Multiple Choice", "Constructed Response Manual", "Constructed Response Manual", 
    "Complex Multiple Choice", "Simple Multiple Choice", "Simple Multiple Choice", 
    "Simple Multiple Choice", "Constructed Response Manual", "Simple Multiple Choice", 
    "Constructed Response Expert", "Constructed Response Manual", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Constructed Response Manual", "Constructed Response Expert", 
    "Simple Multiple Choice", "Constructed Response Manual", "Complex Multiple Choice"
    )), row.names = c(NA, -109L), class = "data.frame")
    






   df2<- structure(list(Measures = c("space and shape", "space and shape", 
"space and shape", "space and shape"), Format = c("Constructed Response Expert", 
"Constructed Response Manual", "Simple Multiple Choice", "Constructed Response Auto-coded"
)), row.names = c(1L, 2L, 4L, 5L), class = "data.frame")

I use this code to subset all the rows of first dataframe which are present in second dataframe.

library(tidyverse)
inner_join(df1, df2)

But i also want to count the number of rows which are available in first dataframe for each row of df2 and store these counts in a vector. This is a little hard to explain with words so i made a short video to explain:

https://youtu.be/gxIaNxSXSUM

i tried doing it on my own and created a for loop to do this. This is the first time ever i have actually used a loop and it failed miserably:

for(i in 1:nrow(df2)) {  
    b<- data.frame('NA','NA')
    b[i,]<- inner_join(df1, df2[i,])
    num[i]<- nrow(b)
}

When i print num i get

[1] 1 2 3 4 5 6 7 8

CodePudding user response:

Instead of using a loop, one strategy could be to count the groups in df1 prior to your inner_join. One approach might look like this:

library(dplyr)

df1 %>% 
  count(across(everything())) %>% 
  inner_join(df2)

#> Joining, by = c("Measures", "Format")
#>          Measures                          Format n
#> 1 space and shape Constructed Response Auto-coded 1
#> 2 space and shape     Constructed Response Expert 2
#> 3 space and shape     Constructed Response Manual 1
#> 4 space and shape          Simple Multiple Choice 1

I took a subset of df1 since it's pretty big:

df1 <- structure(list(
    Measures = c(
      "space and shape",
      "space and shape",
      "space and shape",
      "space and shape",
      "space and shape",
      "change and relationships"
    ),
    Format = c(
      "Constructed Response Expert",
      "Constructed Response Manual",
      "Constructed Response Expert",
      "Simple Multiple Choice",
      "Constructed Response Auto-coded",
      "Constructed Response Expert"
    )
  ),
  row.names = c(NA, 6L),
  class = "data.frame"
)

df2 <- structure(list(
  Measures = c(
    "space and shape",
    "space and shape",
    "space and shape",
    "space and shape"
  ),
  Format = c(
    "Constructed Response Expert",
    "Constructed Response Manual",
    "Simple Multiple Choice",
    "Constructed Response Auto-coded"
  )
),
row.names = c(1L, 2L, 4L, 5L),
class = "data.frame")

CodePudding user response:

Using dplyr and tidyr is this what you were looking for?

library(dplyr)
library(tidyr)

df <- 
  df1 %>% 
  left_join(df2, keep = TRUE) %>% 
  group_by(Measures.y, Format.y) %>% 
  summarise(n = n())

``` r

#> # A tibble: 5 x 3
#> # Groups:   Measures.y [2]
#>   Measures.y      Format.y                            n
#>   <chr>           <chr>                           <int>
#> 1 space and shape Constructed Response Auto-coded     2
#> 2 space and shape Constructed Response Expert         9
#> 3 space and shape Constructed Response Manual         6
#> 4 space and shape Simple Multiple Choice              6
#> 5 <NA>            <NA>                               86

Created on 2021-11-26 by the reprex package (v2.0.1)

  •  Tags:  
  • r
  • Related