i have 2 datasets
i=structure(list(State = c("x1", "x2", "x3", "x4", "x5", "x6",
"x7", "x8", "x9", "x10"), centroid_lat = c(25.0567526488506,
24.5506057345197, 25.108046481826, 25.0238523142178, 24.994141005348,
25.0811976922289, 24.9630616206443, 24.9924361621629, 25.2103128046815,
25.2173817704152), centroid_lon = c(76.5385899060845, 76.7828927388028,
76.4028924220128, 76.4542417907297, 76.4114079037918, 76.4363750139148,
76.4794068845473, 76.4328442796457, 76.5591513892293, 76.600503986722
), fin = c(3219L, 1519L, 2184L, 1919L, 3003L, 2040L, 1843L, 3502L,
1085L, 2215L), fin2 = c(3789L, 1452L, 2420L, 1971L, 1719L, 2006L,
1463L, 1278L, 1351L, 3163L), fin3 = c(1513L, 1740L, 2115L, 2431L,
1148L, 2251L, 1370L, 2122L, 2679L, 1999L), fin4 = c(1903L, 1891L,
1687L, 1301L, 3080L, 2713L, 2006L, 3891L, 2537L, 1145L)), class = "data.frame", row.names = c(NA,
-10L))
and
k=structure(list(State = c("x11", "x12", "x13", "x14", "x15", "x16",
"x17", "x18", "x19", "x20"), centroid_lat = c(24.05675265, 23.55060573,
24.10804648, 24.02385231, 23.99414101, 35.08119769, 34.96306162,
34.99243616, 35.2103128, 35.21738177), centroid_lon = c(75.53858991,
75.78289274, 75.40289242, 75.45424179, 75.4114079, 86.43637501,
86.47940688, 86.43284428, 86.55915139, 86.60050399), fin1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), fin2 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), fin3 = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), fin4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-10L))
lat and lon are longitude and latitude.
How to subtract from each coordinate lat and lon from the dataset i
each coordinate lat and lon from the file from file k
.
I.E. take the first coordinate of file i
and subtract all the coordinates from the file k
Then we take the second coordinate from the file i
and subtract in order the coordinates from the file k.
Using this formula.
L = SQRT((X1-X2)^2 (Y1-Y2)^2)
where x1=is lat of i
file and x2 is lat of k
files
and y1 is lon of i
file and y2 is lon of k
file
In other words, at the output we should get something like
state..dataset.i X X.1 state._k.dataset lat
1 x1 25.05675 76.53859 x11 24.05675
2 x1 NA NA x12 23.55061
3 x1 NA NA x13 24.10805
4 x1 NA NA x14 24.02385
5 x1 NA NA x15 23.99414
6 x1 NA NA x16 35.08120
7 x1 NA NA x17 34.96306
8 x1 NA NA x18 34.99244
9 x1 NA NA x19 35.21031
10 x1 NA NA x20 35.21738
11 NA NA NA
12 x2 24.55061 76.78289 24.05675
13 NA NA 23.55061
14 NA NA 24.10805
15 NA NA 24.02385
16 NA NA 23.99414
17 NA NA 35.08120
18 NA NA 34.96306
19 NA NA 34.99244
20 NA NA 35.21031
21 NA NA 35.21738
lon diff.lat diff.lot
1 75.53859 1.0000000 1.0000000
2 75.78289 1.5061469 0.7556972
3 75.40289 0.9487062 1.1356975
4 75.45424 1.0329003 1.0843481
5 75.41141 1.0626116 1.1271820
6 86.43638 -10.0244450 -9.8977851
7 86.47941 -9.9063090 -9.9408170
8 86.43284 -9.9356835 -9.8942544
9 86.55915 -10.1535602 -10.0205615
10 86.60050 -10.1606291 -10.0619141
11 NA NA NA
12 75.53859 0.4938531 1.2443028
13 75.78289 1.0000000 1.0000000
14 75.40289 0.4425593 1.3800003
15 75.45424 0.5267534 1.3286509
16 75.41141 0.5564647 1.3714848
17 86.43638 -10.5305920 -9.6534823
18 86.47941 -10.4124559 -9.6965141
19 86.43284 -10.4418304 -9.6499515
20 86.55915 -10.6597071 -9.7762587
21 86.60050 -10.6667760 -9.8176112
or dput(
structure(list(state..dataset.i = c("x1", "x1", "x1", "x1", "x1",
"x1", "x1", "x1", "x1", "x1", "", "x2", "", "", "", "", "", "",
"", "", ""), X = c(25.05675265, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 24.55060573, NA, NA, NA, NA, NA, NA, NA, NA, NA), X.1 = c(76.53858991,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 76.78289274, NA, NA,
NA, NA, NA, NA, NA, NA, NA), state._k.dataset = c("x11", "x12",
"x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", "", "",
"", "", "", "", "", "", "", "", ""), lat = c(24.05675265, 23.55060573,
24.10804648, 24.02385231, 23.99414101, 35.08119769, 34.96306162,
34.99243616, 35.2103128, 35.21738177, NA, 24.05675265, 23.55060573,
24.10804648, 24.02385231, 23.99414101, 35.08119769, 34.96306162,
34.99243616, 35.2103128, 35.21738177), lon = c(75.53858991, 75.78289274,
75.40289242, 75.45424179, 75.4114079, 86.43637501, 86.47940688,
86.43284428, 86.55915139, 86.60050399, NA, 75.53858991, 75.78289274,
75.40289242, 75.45424179, 75.4114079, 86.43637501, 86.47940688,
86.43284428, 86.55915139, 86.60050399), diff.lat = c(1, 1.506146914,
0.948706167, 1.032900335, 1.062611644, -10.02444504, -9.906308972,
-9.935683513, -10.15356016, -10.16062912, NA, 0.493853086, 1,
0.442559253, 0.52675342, 0.556464729, -10.53059196, -10.41245589,
-10.44183043, -10.65970707, -10.66677604), diff.lot = c(1, 0.755697167,
1.135697484, 1.084348115, 1.127182002, -9.897785108, -9.940816978,
-9.894254374, -10.02056148, -10.06191408, NA, 1.244302833, 1,
1.380000317, 1.328650948, 1.371484835, -9.653482275, -9.696514146,
-9.649951541, -9.77625865, -9.817611248)), class = "data.frame", row.names = c(NA,
-21L))
)
Thanks for your valuable help.
CodePudding user response:
If you are trying to work out distances between pairs of points given as degrees of latitude and longitude, then Pythagoras won't be accurate (since the Earth isn't flat), and interpreting the result is problematic (what units are the results in, and how do you interpret these?)
Instead, use a tool that is designed to measure distances between pairs of points. The library sf
makes your specific task very straightforward:
i_sf <- sf::st_as_sf(i, coords = c('centroid_lon', 'centroid_lat'), crs = 4326)
k_sf <- sf::st_as_sf(k, coords = c('centroid_lon', 'centroid_lat'), crs = 4326)
sf::st_distance(i_sf, k_sf)
#> Units: [m]
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 150308.6 184153.8 155934.9 158811.2 164204.2 1464401 1457540 1456991 1482511 1485655
#> [2,] 137535.8 150581.0 148227.8 146849.3 152168.4 1494367 1487074 1486721 1512525 1515587
#> [3,] 145955.2 184220.0 150280.8 154069.7 159364.9 1468413 1461668 1461067 1486506 1489671
#> [4,] 141922.7 177386.0 147221.4 150326.5 155697.6 1472531 1465705 1465140 1490636 1493786
#> [5,] 136603.5 172694.4 141818.0 144977.8 150342.6 1477798 1470983 1470413 1495901 1499054
#> [6,] 145666.9 182621.2 150424.3 153914.9 159246.6 1468648 1461866 1461281 1486746 1489904
#> [7,] 138623.0 172199.9 144556.4 147198.0 152607.2 1476280 1469404 1468862 1494392 1497533
#> [8,] 137872.3 173332.5 143255.6 146296.2 151672.6 1476609 1469780 1469216 1494714 1497864
#> [9,] 164600.5 200597.6 169336.0 172858.1 178189.9 1449719 1442932 1442349 1467818 1470975
#> [10,] 167853.1 202992.0 172803.6 176177.2 181526.0 1446517 1439707 1439135 1464620 1467773
CodePudding user response:
Maybe expand.grid()
and merge()
combined do the job, then just add a column using whichever formula you need use.
out <- expand.grid(i$State,k$State)
out <- merge(out, k, by.x = "Var2", by.y = "State", all.y = TRUE)
out <- merge(out, i, by.x = "Var1", by.y = "State", all.y = TRUE)
colnames(out) <- c("i_State","k_State","k_centroid_lat", "k_centroid_lon", "k_fin1", "k_fin2", "k_fin3", "k_fin4","k_centroid_lat", "i_centroid_lon", "i_fin1", "i_fin2", "i_fin3", "i_fin4")
out$distance <- sqrt((out$k_centroid_lat - out$i_centroid_lat)^2 (out$k_centroid_lon - out$i_centroid_lon)^2)