I have transportation data between UK cities:
- from
- to
- Transit: if there is transit between these two cities = 1, otherwise =0
- weight: average number of passengers
Here is given sample from my data:
df2 <- data.frame (from = c("London", "London", "London", "London" ,"Liverpool","Liverpool","Liverpool" , "Manchester", "Manchester", "Bristol"),
to = c("Liverpool", "Manchester", "Bristol","Derby", "Manchester", "Bristol","Derby","Bristol","Derby","Derby"),
Transit = c(1,0,1,1,1,1,1,1,0,1),
ave.pas = c(10,0,11,24,40,45,12,34,9,29))
output
from to Transit weight
1 London Liverpool 1 10
2 London Manchester 0 0
3 London Bristol 1 11
4 London Derby 1 24
5 Liverpool Manchester 1 40
6 Liverpool Bristol 1 45
7 Liverpool Derby 1 12
8 Manchester Bristol 1 34
9 Manchester Derby 0 9
10 Bristol Derby 1 29
Now I want to convert it in the list of 2 to get data like this (this is different data but idea is to get the same from my df):
$nodes
# A tibble: 16 x 2
id label
<int> <chr>
1 1 France
2 2 Belgium
3 3 Germany
4 4 Danemark
5 5 Croatia
6 6 Slovenia
7 7 Hungary
8 8 Spain
9 9 Italy
10 10 Netherlands
11 11 UK
12 12 Austria
13 13 Poland
14 14 Switzerland
15 15 Czech republic
16 16 Slovania
$edges
# A tibble: 18 x 3
from to weight
<int> <int> <dbl>
1 1 3 9
2 2 1 4
3 1 8 3
4 1 9 4
5 1 10 2
6 1 11 3
7 3 12 2
8 3 13 2
9 2 3 3
10 3 14 2
11 3 15 2
12 3 10 2
13 4 3 2
14 5 3 2
15 5 16 2
16 5 7 2
17 6 3 2
18 7 16 2.5
CodePudding user response:
In base R:
f2 = c('from', 'to')
nodes = data.frame(label = unique(unlist(df2[f2])))
nodes$id = seq_len(nrow(nodes))
edges = df2[df2$Transit == 1, c(f2, 'ave.pas')]
edges[f2] = lapply(edges[f2], match, nodes$label)
nodes
# label id
# 1 London 1
# 2 Liverpool 2
# 3 Manchester 3
# 4 Bristol 4
# 5 Derby 5
edges
# from to ave.pas
# 1 1 2 10
# 3 1 4 11
# 4 1 5 24
# 5 2 3 40
# 6 2 4 45
# 7 2 5 12
# 8 3 4 34
# 10 4 5 29
CodePudding user response:
Create the dataframe of unique factor levels and create ids using as.numeric
, then use match
to replace the values with the id.
df1 <- data.frame(id = as.numeric(factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)])))),
label = factor(unique(unlist(df2[c(1,2)])), levels = unique(unlist(df2[c(1,2)]))))
# id label
#1 1 London
#2 2 Liverpool
#3 3 Manchester
#4 4 Bristol
#5 5 Derby
df2$from <- df1$id[match(df2$from, df1$label)]
df2$to <- df1$id[match(df2$to, df1$label)]
# from to Transit ave.pas
#1 1 2 1 10
#2 1 3 0 0
#3 1 4 1 11
#4 1 5 1 24
#5 2 3 1 40
#6 2 4 1 45
#7 2 5 1 12
#8 3 4 1 34
#9 3 5 0 9
#10 4 5 1 29
Edit: you actually don't need to convert to factor (this comes then very close to @sindri_baldur's answer):
un <- unique(unlist(df2[c(1, 2)]))
df1 <- data.frame(id = seq_along(un), label = un)
df2[c(1, 2)] <- sapply(df2[c(1, 2)], match, df1$label)