Home > Blockchain >  Elegantly create a data frame from a larger data frame
Elegantly create a data frame from a larger data frame

Time:01-18

I have a data set from which I need to extract another data set. I am interested in married women and their living conditions, and would like to create a data set for married women only, that would, for each woman, contain the following information:

  • Her age;
  • Her husband’s age;
  • Her level of education;
  • Her husband’s level of education;
  • Number of children (regardless of the age);
  • Number of children under the age of seven;
  • Whether or not she lives with her mother (or her mother lives with her);
  • Whether or not she lives with her father (or vice versa);
  • Whether or not she lives with her father-in-law (or vice versa);
  • Whether or not she lives with her mother-in-law (or vice versa).

I wrote the code below, but I would like someone to help me replace it with a faster and more elegant code using dplyr/tidyverse:

library(tidyverse)
library(dplyr)
library(tidyr)
library(haven)

data <- read_sav("Individus  ENCDM 2014 en format SAV.sav")

married_women <- data %>% filter(Sexe==2 & Etat_matrimonial==2 & Liendeparenté==2)

child <- c() # children
c_six <- c() # children below six
h_age <- c() # husband's age
h_edu <- c() # husband's education 
w_mum <- c() # wife's mum
w_dad <- c() # wife's dad
h_mum <- c() # husband's mum
h_dad <- c() # husband's dad

for (i in married_women$N_ménage) {
  a <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==3,])
  child <- append(child, a)
  b <- individual[individual$N_ménage==i & individual$Liendeparenté==1,]$Age
  h_age <- append(h_age, b)
  c <- individual[individual$N_ménage==i & individual$Liendeparenté==1,]$Niveau_scolaire_agreg
  h_edu <- append(h_edu, c)
  d <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==7 & individual$Sexe==2,])
  w_mum <- append(w_mum, d)
  e <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==6 & individual$Sexe==2,])
  w_dad <- append(w_dad, e)
  f <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==7 & individual$Sexe==1,])
  h_mum <- append(h_mum, f)
  g <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==6 & individual$Sexe==1,])
  h_dad <- append(h_dad, g)
  h <- nrow(individual[individual$N_ménage==i & individual$Liendeparenté==3 & individual$Age<=6,])
  c_six <- append(c_six, h)
}

married_women$Number_children <- child
married_women$N_chl_below_six <- c_six
married_women$Husband_age     <- h_age
married_women$Husband_educati <- h_edu
married_women$Mother_of_wife  <- w_mum
married_women$Mother_of_husb  <- h_mum
married_women$Father_of_wife  <- w_dad
married_women$Father_of_husb  <- h_dad

and here is a small chunk of the data set I am using:

> dput(d_head)
structure(list(N_ménage = structure(c(1, 2, 3, 4, 5, 6, 7, 8, 
9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), label = "Numéro du ménage", format.spss = "F8.2"), 
    Nordre = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2), label = "Numéro d'ordre de l'individu", format.spss = "F2.0", display_width = 4L), 
    Coef_individu = structure(c(168, 178, 183, 178, 157, 183, 
    178, 121, 157, 160, 168, 178, 183, 178, 157, 183, 178, 121, 
    157, 160), label = "Coefficient individu", format.spss = "F8.2", display_width = 4L), 
    Milieu = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1), label = "Milieu de résidence", format.spss = "F1.0", display_width = 4L, labels = c(Urbain = 1, 
    Rural = 2), class = c("haven_labelled", "vctrs_vctr", "double"
    )), Région_12 = structure(c(12, 12, 12, 12, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), label = "Régions", format.spss = "F2.0", display_width = 11L, labels = c(`Tanger-Tétouan-Al Hoceïma` = 1, 
    Oriental = 2, `Fès-Meknès` = 3, `Rabat-Salé-Kénitra` = 4, 
    `Béni Mellal-Khénifra` = 5, `Casablanca-Settat` = 6, `Marrakech-Safi` = 7, 
    `Drâa-Tafilalet` = 8, `Souss-Massa` = 9, `Guelmim-Oued Noun` = 10, 
    `Laâyoune-Sakia El Hamra` = 11, `Dakhla-Oued Ed Dahab` = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Sexe = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2), label = "Sexe", format.spss = "F1.0", display_width = 4L, labels = c(Masculin = 1, 
    Féminin = 2), class = c("haven_labelled", "vctrs_vctr", 
    "double")), Age = structure(c(17, 31, 27, 30, 41, 28, 32, 
    50, 40, 39, 17, 31, 27, 30, 41, 28, 32, 50, 40, 39), label = "Age", format.spss = "F2.0", display_width = 4L), 
    Age_quin = structure(c(4, 7, 6, 7, 9, 6, 7, 11, 9, 8, 4, 
    7, 6, 7, 9, 6, 7, 11, 9, 8), label = "Age quinquenal de l'individu", format.spss = "F8.2", display_width = 10L, labels = c(`0-4` = 1, 
    `5-9` = 2, `10-14` = 3, `15-19` = 4, `20-24` = 5, `25-29` = 6, 
    `30-34` = 7, `35-39` = 8, `40-45` = 9, `45-49` = 10, `50-54` = 11, 
    `55-59` = 12, `60-64` = 13, `65-69` = 14, `70-74` = 15, `75 et plus` = 16
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Liendeparenté = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2), label = "Lien de parenté avec le chef de ménage", format.spss = "F2.0", display_width = 4L, labels = c(`Chef de ménage` = 1, 
    `Epoux (se) du chef de ménage` = 2, `Fils (fille) du chef de ménage` = 3, 
    `Epouse du fils /Epoux de la fille du chef de ménage` = 4, 
    `Petit fils (petite fille) du chef de ménage` = 5, `Père ou mère du chef de ménage` = 6, 
    `Père ou mère de l’époux (épouse) du chef de ménage` = 7, 
    `Frère ou sœur du chef de ménage` = 8, `Autres personne en lien de parenté avec le chef de ménage` = 9, 
    `Autres personne sans lien de parenté avec le chef de ménage` = 10, 
    `Personne travaillant pour le compte du ménage: aude ménagère, chauffeur, jardinier...` = 11
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Etat_matrimonial = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), label = "Etat matrimonial", format.spss = "F1.0", display_width = 15L, labels = c(Célibataire = 1, 
    `Marié(e)` = 2, `Divorcé(e)` = 3, `Veuf(ve)` = 4), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Niveau_scolaire_agreg = structure(c(2, 
    3, 2, 5, 5, 4, 2, 4, 0, 4, 2, 3, 2, 5, 5, 3, 2, 4, 0, 4), label = "Le plus haut niveau scolaire", format.spss = "F8.2", display_width = 7L, labels = c(`Sans niveau` = 0, 
    Préscolaire = 1, Primaire = 2, `Secondaire collégial` = 3, 
    `Secondaire qualifiant` = 4, Supérieur = 5, `Autre niveau` = 6
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Diplôme_agregé = structure(c(1, 2, 1, 3, 3, 2, 1, 2, 1, 
    2, 1, 2, 1, 3, 3, 2, 1, 2, 1, 2), label = "Le plus haut diplôme", format.spss = "F4.0", display_width = 10L, labels = c(`Age moins de 10 ans` = 0, 
    `Sans diplôme` = 1, `Niveau moyen` = 2, `Niveau supérieur` = 3
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Type_activité_dominante = structure(c(4, 4, 4, 3, 1, 4, 
    4, 1, 4, 4, 4, 4, 4, 3, 1, 4, 4, 1, 4, 4), label = "Type d'activité habituelle durant les 12 dérniers mois", format.spss = "F2.0", display_width = 9L, labels = c(`Actif occupé` = 1, 
    `Chômeur ayant déjà travaillé` = 2, `Chômeur n' ayant jamais travaillé` = 3, 
    `Femme au foyer` = 4, `Elève, étudiant` = 5, Enfant = 6, 
    `Personne  âgée` = 7, Retraité = 8, Rentier = 9, `Malade/infirme` = 10, 
    `Autre inactif` = 11, `Non déclaré` = 99), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Profession_agreg = structure(c(NA, 
    NA, NA, 0, 2, NA, NA, 2, NA, NA, NA, NA, NA, 0, 2, NA, NA, 
    2, NA, NA), label = "Catégories professionnelles dans l'activité habituelle durant les 12 dérniers mois", format.spss = "F1.0", display_width = 16L, labels = c(`Chômeur n'ayant jamais travaillés` = 0, 
    `directeurs et cadres de direction, membres des corps législatifs,cadres supérieurs et membres des professions libérales` = 1, 
    `Cadres moyens et employés de bureau` = 2, `Commerçants, intermédiaires commerciaux  et financiers` = 3, 
    `Exploitants et ouvriers agricoles (y compris pêche, chasse et forêt)` = 4, 
    `Artisans et ouvriers qualifies et conducteurs d'installations et de machines et ouvriers  de l'assemblage` = 5, 
    `Manœuvres non agricoles, manutentionnaires et  travailleurs des petits métiers` = 6, 
    `Non déclaré` = 9), class = c("haven_labelled", "vctrs_vctr", 
    "double")), Secteur_activité_agreg = structure(c(NA, NA, 
    NA, 0, 5, NA, NA, 5, NA, NA, NA, NA, NA, 0, 5, NA, NA, 5, 
    NA, NA), label = "Secteur d'activité habituelle durant les 12 dérniers mois", format.spss = "F1.0", display_width = 14L, labels = c(`Chômeur n'ayant jamais travaillé` = 0, 
    `Agriculture, forêt et pêhe` = 1, Industrie = 2, `Batiment et travaux publics` = 3, 
    Commerce = 4, Service = 5, `Non déclaré` = 9), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Situation_profession_agreg = structure(c(1, 
    1, 1, 0, 2, 1, 1, 2, 1, 1, 1, 1, 1, 0, 2, 1, 1, 2, 1, 1), label = "Situation dans la profession habituelle durant les 12 dérniers mois", format.spss = "F1.0", labels = c(`Chômeur n'ayant jamais travaillé` = 0, 
    Inactif = 1, Salarié = 2, Indépendant = 3, Employeur = 4, 
    `Autres situation` = 5, `Non déclaré` = 9), class = c("haven_labelled", 
    "vctrs_vctr", "double")), N_Children = c(0L, 1L, 1L, 1L, 
    2L, 2L, 2L, 4L, 6L, 5L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 6L, 
    5L), Husband_age = c(26, 36, 38, 34, 42, 33, 40, 50, 49, 
    53, 26, 36, 38, 34, 42, 33, 40, 50, 49, 53), Husband_edu = structure(c(2, 
    4, 2, 5, 5, 4, 3, 4, 4, 0, 2, 4, 2, 5, 5, 4, 3, 4, 4, 0), label = "Le plus haut niveau scolaire", format.spss = "F8.2", display_width = 7L, labels = c(`Sans niveau` = 0, 
    Préscolaire = 1, Primaire = 2, `Secondaire collégial` = 3, 
    `Secondaire qualifiant` = 4, Supérieur = 5, `Autre niveau` = 6
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    wife_mother = c(0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), Wife_mother = c(0L, 
    0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
    0L, 0L, 0L, 0L), Husband_mother = c(0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    Wife_father = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Husband_father = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), Act = c(0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 
    0, 0, 1, 1, 0, 0, 1, 0, 0), Below_six = c(0L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 3L, 0L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
    3L), Activity = c(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 
    0, 1, 0, 0, 1, 0, 0)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

Can someone please help me rewrite the code in a more concise manner. Thank you very much in advance.

PS: this is dput for the raw data (before modification):

> dput(d_head)
structure(list(N_ménage = structure(c(1, 1, 2, 2, 2, 3, 3, 3, 
4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6), label = "Numéro du ménage", format.spss = "F8.2"), 
    Nordre = structure(c(2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 1, 4, 
    3, 2, 1, 4, 3, 5, 2, 1), label = "Numéro d'ordre de l'individu", format.spss = "F2.0", display_width = 4L), 
    Coef_individu = structure(c(168, 392, 191, 178, 307, 187, 
    183, 307, 191, 178, 427, 164, 162, 157, 306, 191, 164, 118, 
    183, 427), label = "Coefficient individu", format.spss = "F8.2", display_width = 4L), 
    Milieu = structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1), label = "Milieu de résidence", format.spss = "F1.0", display_width = 4L, labels = c(Urbain = 1, 
    Rural = 2), class = c("haven_labelled", "vctrs_vctr", "double"
    )), Région_12 = structure(c(12, 12, 12, 12, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), label = "Régions", format.spss = "F2.0", display_width = 11L, labels = c(`Tanger-Tétouan-Al Hoceïma` = 1, 
    Oriental = 2, `Fès-Meknès` = 3, `Rabat-Salé-Kénitra` = 4, 
    `Béni Mellal-Khénifra` = 5, `Casablanca-Settat` = 6, `Marrakech-Safi` = 7, 
    `Drâa-Tafilalet` = 8, `Souss-Massa` = 9, `Guelmim-Oued Noun` = 10, 
    `Laâyoune-Sakia El Hamra` = 11, `Dakhla-Oued Ed Dahab` = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Sexe = structure(c(2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 
    2, 1, 2, 1, 2, 2, 1), label = "Sexe", format.spss = "F1.0", display_width = 4L, labels = c(Masculin = 1, 
    Féminin = 2), class = c("haven_labelled", "vctrs_vctr", 
    "double")), Age = structure(c(17, 26, 1, 31, 36, 0, 27, 38, 
    2, 30, 34, 6, 11, 41, 42, 2, 9, 47, 28, 33), label = "Age", format.spss = "F2.0", display_width = 4L), 
    Age_quin = structure(c(4, 6, 1, 7, 8, 1, 6, 8, 1, 7, 7, 2, 
    3, 9, 9, 1, 2, 10, 6, 7), label = "Age quinquenal de l'individu", format.spss = "F8.2", display_width = 10L, labels = c(`0-4` = 1, 
    `5-9` = 2, `10-14` = 3, `15-19` = 4, `20-24` = 5, `25-29` = 6, 
    `30-34` = 7, `35-39` = 8, `40-45` = 9, `45-49` = 10, `50-54` = 11, 
    `55-59` = 12, `60-64` = 13, `65-69` = 14, `70-74` = 15, `75 et plus` = 16
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Liendeparenté = structure(c(2, 1, 3, 2, 1, 3, 2, 1, 3, 2, 
    1, 3, 3, 2, 1, 3, 3, 7, 2, 1), label = "Lien de parenté avec le chef de ménage", format.spss = "F2.0", display_width = 4L, labels = c(`Chef de ménage` = 1, 
    `Epoux (se) du chef de ménage` = 2, `Fils (fille) du chef de ménage` = 3, 
    `Epouse du fils /Epoux de la fille du chef de ménage` = 4, 
    `Petit fils (petite fille) du chef de ménage` = 5, `Père ou mère du chef de ménage` = 6, 
    `Père ou mère de l’époux (épouse) du chef de ménage` = 7, 
    `Frère ou sœur du chef de ménage` = 8, `Autres personne en lien de parenté avec le chef de ménage` = 9, 
    `Autres personne sans lien de parenté avec le chef de ménage` = 10, 
    `Personne travaillant pour le compte du ménage: aude ménagère, chauffeur, jardinier...` = 11
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Etat_matrimonial = structure(c(2, 2, 1, 2, 2, 1, 2, 2, 1, 
    2, 2, 1, 1, 2, 2, 1, 1, 3, 2, 2), label = "Etat matrimonial", format.spss = "F1.0", display_width = 15L, labels = c(Célibataire = 1, 
    `Marié(e)` = 2, `Divorcé(e)` = 3, `Veuf(ve)` = 4), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Niveau_scolaire_agreg = structure(c(2, 
    2, NA, 3, 4, NA, 2, 2, NA, 5, 5, 2, 2, 5, 5, NA, 2, 2, 4, 
    4), label = "Le plus haut niveau scolaire", format.spss = "F8.2", display_width = 7L, labels = c(`Sans niveau` = 0, 
    Préscolaire = 1, Primaire = 2, `Secondaire collégial` = 3, 
    `Secondaire qualifiant` = 4, Supérieur = 5, `Autre niveau` = 6
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Diplôme_agregé = structure(c(1, 2, 0, 2, 2, 0, 1, 1, 0, 
    3, 3, 0, 1, 3, 3, 0, 0, 1, 2, 2), label = "Le plus haut diplôme", format.spss = "F4.0", display_width = 10L, labels = c(`Age moins de 10 ans` = 0, 
    `Sans diplôme` = 1, `Niveau moyen` = 2, `Niveau supérieur` = 3
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    Type_activité_dominante = structure(c(4, 1, NA, 4, 1, NA, 
    4, 1, NA, 3, 1, NA, 5, 1, 1, NA, 5, 1, 4, 1), label = "Type d'activité habituelle durant les 12 dérniers mois", format.spss = "F2.0", display_width = 9L, labels = c(`Actif occupé` = 1, 
    `Chômeur ayant déjà travaillé` = 2, `Chômeur n' ayant jamais travaillé` = 3, 
    `Femme au foyer` = 4, `Elève, étudiant` = 5, Enfant = 6, 
    `Personne  âgée` = 7, Retraité = 8, Rentier = 9, `Malade/infirme` = 10, 
    `Autre inactif` = 11, `Non déclaré` = 99), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Profession_agreg = structure(c(NA, 
    6, NA, NA, 4, NA, NA, 5, NA, 0, 2, NA, NA, 2, 4, NA, NA, 
    4, NA, 4), label = "Catégories professionnelles dans l'activité habituelle durant les 12 dérniers mois", format.spss = "F1.0", display_width = 16L, labels = c(`Chômeur n'ayant jamais travaillés` = 0, 
    `directeurs et cadres de direction, membres des corps législatifs,cadres supérieurs et membres des professions libérales` = 1, 
    `Cadres moyens et employés de bureau` = 2, `Commerçants, intermédiaires commerciaux  et financiers` = 3, 
    `Exploitants et ouvriers agricoles (y compris pêche, chasse et forêt)` = 4, 
    `Artisans et ouvriers qualifies et conducteurs d'installations et de machines et ouvriers  de l'assemblage` = 5, 
    `Manœuvres non agricoles, manutentionnaires et  travailleurs des petits métiers` = 6, 
    `Non déclaré` = 9), class = c("haven_labelled", "vctrs_vctr", 
    "double")), Secteur_activité_agreg = structure(c(NA, 4, 
    NA, NA, 1, NA, NA, 3, NA, 0, 5, NA, NA, 5, 2, NA, NA, 2, 
    NA, 2), label = "Secteur d'activité habituelle durant les 12 dérniers mois", format.spss = "F1.0", display_width = 14L, labels = c(`Chômeur n'ayant jamais travaillé` = 0, 
    `Agriculture, forêt et pêhe` = 1, Industrie = 2, `Batiment et travaux publics` = 3, 
    Commerce = 4, Service = 5, `Non déclaré` = 9), class = c("haven_labelled", 
    "vctrs_vctr", "double")), Situation_profession_agreg = structure(c(1, 
    3, NA, 1, 2, NA, 1, 3, NA, 0, 2, NA, 1, 2, 2, NA, 1, 2, 1, 
    2), label = "Situation dans la profession habituelle durant les 12 dérniers mois", format.spss = "F1.0", labels = c(`Chômeur n'ayant jamais travaillé` = 0, 
    Inactif = 1, Salarié = 2, Indépendant = 3, Employeur = 4, 
    `Autres situation` = 5, `Non déclaré` = 9), class = c("haven_labelled", 
    "vctrs_vctr", "double"))), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

CodePudding user response:

Here's an approach where I summarize individuals as a few lookup tables, each with various columns but only one row per N_ménage, which can then be joined to the married_women table using that as a key. Joins should be much faster than a looped lookup, especially as the data gets larger.

(As you tweak your code, check and make sure the lookups only ever have one row per N_ménage, e.g. by running count(child_count, N_ménage) %>% count(n). That will count how many times each N_ménage appears (n), and then count those counts (nn). We should see only one n value, and then a number nn corresponding to the number of N_ménage where this facet of data is available. I wonder if you ever would face the situation of, for example, multiple husbands over the survey period; perhaps you would then want to take the average / highest / lowest education level...)

For the sample data there are many NAs, but I hope the general logic would apply to your larger data, and you'd presumably want to replace all the NA's with zeroes.

library(tidyverse)

child_count <- individuals %>% filter(Liendeparenté == 3) %>%
  group_by(N_ménage) %>%
  summarize(child_count = n(),
            child_under6 = sum(Age <= 6))

husbands <- individuals %>% filter(Liendeparenté == 1) %>% 
  transmute(N_ménage, H_age = Age, H_edu = Niveau_scolaire_agreg)

parents_with <- individuals %>% filter(Liendeparenté %in% 6:7) %>%
  count(N_ménage, type = case_when(
    Liendeparenté == 7 & Sexe == 2 ~ "w_mum",
    Liendeparenté == 6 & Sexe == 2 ~ "w_dad",
    Liendeparenté == 7 & Sexe == 1 ~ "h_mum",
    Liendeparenté == 6 & Sexe == 1 ~ "h_dad")) %>%
  pivot_wider(names_from = type, values_from = n)

married_women %>%
  select(N_ménage, Age, Niveau_scolaire_agreg) %>%
  left_join(child_count) %>%
  left_join(husbands) %>%
  left_join(parents_with)
  • Related