I have the following, working, code:
test_hierarchie <- tribble(~child, ~parent,
"A", "B",
"B", "C",
"D", "E"
)
test_hierarchie_transformed <- test_hierarchie %>%
left_join(test_hierarchie, by = c("parent" = "child"), suffix = c("", "_grant")) %>%
left_join(test_hierarchie, by = c("parent_grant" = "child"), suffix = c("", "_grant")) %>%
left_join(test_hierarchie, by = c("parent_grant_grant" = "child"), suffix = c("", "_grant")) %>%
left_join(test_hierarchie, by = c("parent_grant_grant_grant" = "child"), suffix = c("", "_grant")) %>%
left_join(test_hierarchie, by = c("parent_grant_grant_grant_grant" = "child"), suffix = c("", "_grant")) %>%
pivot_longer(names_to = "relation", cols = contains("parent"), values_to = "parent") %>%
filter(!is.na(parent))
With result:
# A tibble: 4 x 3
child relation parent
<chr> <chr> <chr>
1 A parent B
2 A parent_grant C
3 B parent C
4 D parent E
This is the desired result, the large amount of left_joins are there because I'm for the real data not sure what is the maximum hierarchy.
My question is: is there a way to do this more succinct and dynamic? Thanks!
EDIT 1: Yes, I do mean 'grand' instead of 'grant', haha EDIT 2: Great solution, exactly what I was looking for! Thanks everyone for pitching in, the other day I was thinking about another project and iGraph does seem very helpful for that.
CodePudding user response:
Following the suggestion by @zx8754 one option to achieve your desired result would be to do the left_joins
via a recursive function which stops when there are no more matches:
library(dplyr)
library(tidyr)
test_hierarchie <- tribble(
~child, ~parent,
"A", "B",
"B", "C",
"D", "E"
)
left_join_recursive <- function(x, by) {
x <- left_join(x, test_hierarchie, by = setNames("child", by), suffix = c("", "_grant"))
byby <- paste0(by, "_grant")
if (!all(is.na(x[[byby]]))) {
left_join_recursive(x, byby)
} else {
x
}
}
test_hierarchie_transformed <- left_join_recursive(test_hierarchie, "parent") %>%
pivot_longer(names_to = "relation", cols = contains("parent"), values_to = "parent") %>%
filter(!is.na(parent))
test_hierarchie_transformed
#> # A tibble: 4 × 3
#> child relation parent
#> <chr> <chr> <chr>
#> 1 A parent B
#> 2 A parent_grant C
#> 3 B parent C
#> 4 D parent E
To check wether the approach works in a more general case I added another row to your example data:
test_hierarchie <- add_row(test_hierarchie, child = "C", parent = "D")
test_hierarchie_transformed <- left_join_recursive(test_hierarchie, "parent") %>%
pivot_longer(names_to = "relation", cols = contains("parent"), values_to = "parent") %>%
filter(!is.na(parent))
test_hierarchie_transformed
#> # A tibble: 10 × 3
#> child relation parent
#> <chr> <chr> <chr>
#> 1 A parent B
#> 2 A parent_grant C
#> 3 A parent_grant_grant D
#> 4 A parent_grant_grant_grant E
#> 5 B parent C
#> 6 B parent_grant D
#> 7 B parent_grant_grant E
#> 8 D parent E
#> 9 C parent D
#> 10 C parent_grant E
CodePudding user response:
As was mentioned you can use the igraph package, but it probably only pays off for more complex cases:
library(tidyverse)
library(igraph)
test_hierarchie <- tribble(~child, ~parent,
"A", "B",
"B", "C",
"D", "E"
)
g <- graph_from_data_frame(test_hierarchie)
finals <- V(g)[degree(g, mode = "out") == 0]
starts <- V(g)[!V(g) %in% finals]
#starts <- V(g)[degree(g, mode = "in") == 0] # use this to avoid sub-paths
imap_dfr(starts,
~enframe(all_simple_paths(g, from = starts[[.y]], to = finals)[[1]],
name = "parent") %>%
mutate(child = .y)) %>%
filter(child != parent) %>%
select(-value) %>%
group_by(child) %>%
mutate(nr = row_number() - 1) %>%
ungroup() %>%
mutate(relation = map_chr(nr, ~str_c("parent", str_c(rep("_grant", .x), collapse = "")))) %>%
select(child, relation, parent)
# # A tibble: 4 x 3
# child relation parent
# <chr> <chr> <chr>
# 1 A parent B
# 2 A parent_grant C
# 3 B parent C
# 4 D parent E