I have the variables price (dollars) and size (sq.feet). From my dataset, I want to find the 10 houses that best fit the description "price close to 100 and size close to 30". I can see that I have to use the abs() function, and find the smallest absolute difference. However, I want both conditions to be true at the same time, I don't know how to do that with abs. I thought about doing
which(abs(...))
but I don't know how.
Any ideas?
The data:
data_struct <-
structure(
list(
new_baltimore.SQFT = c(
11.25,
28.92,
30.62,
26.12,
22.04,
39.42,
21.88,
25.6,
44.12,
19.88,
12.08,
10.99,
12.8,
29.79,
14.3,
13.72,
11.84,
18.06,
10.72,
8.96,
14.38,
36.75,
20,
22.82,
24.86,
19.2,
11.58,
26,
14.4,
11.62,
23.08,
23.76,
15.6,
10,
22.8,
16.76,
22.1,
14.28,
15.36,
16,
23.04,
24.94,
11.82,
12.88,
11.2,
18.12,
38.25,
17.68,
19.02,
32.8,
15.16,
21.975,
12.6,
23.52,
17.52,
47.61,
20.55,
35.52,
8.4,
13.68,
14.48,
12.8,
12.8,
18,
15.4,
10.08,
8.96,
8.96,
20,
12.88,
12,
18.16,
14.28,
26,
12.02,
20.8,
11.78,
8.68,
17.6,
11.4,
44.55,
46.32,
10.24,
9.6,
31.2,
26.4,
13.6,
27.48,
17.86,
18.04,
14.84,
10.46,
14.56,
6.96,
9.5,
11.86,
12.88,
12.32,
6.72,
10.08,
15.6,
6.72,
11.52,
11.76,
10.24,
11.52,
9.28,
6.72,
15.6,
15.5,
9.84,
15.6,
13.76,
10.24,
5.76,
10.08,
11.52,
12.15,
9.77,
15,
14.4,
14.5,
22.54,
10.24,
7.8,
8.4,
10.92,
42.9,
9,
10.5,
10.08,
12.6,
8.96,
8.58,
7.56,
10.8,
13.44,
10.24,
14.44,
12.24,
13.2,
9.6,
15.22,
24.16,
10.24,
10.24,
9.88,
23.2,
17.68,
24.3,
35.94,
21.6,
11.02,
21,
23.92,
14.4,
28,
11.44,
21.94,
10.24,
16.86,
9.92,
13.44,
12,
14.76,
8.96,
11.52,
8.64,
8.12,
11.12,
11.28,
10.36,
11.52,
17.1,
17.52,
10.73,
11.2,
12.8,
12,
41.07,
12.8,
22.36,
10.56,
13.44,
11.02,
17.98,
18.88,
11.76,
9.36,
11.52,
27.3,
23.04,
17.68,
13.36,
11.6,
11.52,
9.98,
12.96,
11.13,
19.6,
11.52,
12.16,
0,
10.64
),
new_baltimore.PRICE = c(
47,
113,
165,
104.3,
62.5,
70,
127.5,
64.5,
145,
63.5,
58.9,
65,
48,
3.5,
12.8,
17.5,
36,
41.9,
53.5,
24.5,
24.5,
55.5,
60,
51,
46,
46,
44,
54.9,
42.5,
44,
44.9,
37.9,
33,
43.9,
49.6,
52,
37.5,
50,
35.9,
42.9,
107,
112,
44.9,
55,
102,
35.5,
62.9,
39,
110,
8,
62,
85.9,
57,
110,
67.7,
89.5,
70,
74,
13,
48,
24,
53.5,
34.5,
53,
87.5,
33.5,
24,
9.6,
30,
41,
30,
38.9,
20.7,
49.9,
18.6,
39,
34,
16,
18.9,
15.2,
41.5,
53,
22,
24.9,
6.7,
32.5,
30,
59,
29.5,
26,
16.5,
39,
48.9,
33.5,
46,
54,
57.9,
37.9,
32,
31,
34,
29,
32.5,
51.9,
31,
41.8,
48,
28,
35,
46.5,
51.9,
35.4,
16,
35,
35,
36.5,
35.9,
45,
40,
35,
38,
37,
23,
25.5,
39.5,
21.5,
9,
67.5,
13.4,
12.5,
28.5,
23,
33.5,
9,
11,
30.9,
31.65,
33,
33.4,
47,
40,
46,
45.5,
57,
29.9,
30,
34,
51,
64.5,
57.5,
85.5,
61,
38,
56.5,
60.4,
51.5,
54,
69,
56,
27.9,
37.5,
32.9,
22,
29.9,
39.9,
32.6,
38.5,
21.5,
25.9,
27.5,
22.9,
31.5,
8.5,
5.5,
33,
57,
47,
43.5,
43.9,
68.5,
44.25,
61,
40,
44.5,
57,
35,
35.1,
64.5,
40,
42.6,
50,
58,
58,
55,
43,
54,
39,
45,
42,
38.9,
43.215,
26.5,
30,
29.5
)
),
class = "data.frame",
row.names = c(NA,-204L)
)
CodePudding user response:
I think you're right with abs
olute differences. Try to subtract price and size to get a zero point, similar to de-meaning. Using replicate
we subtract a respective matrix. From the absolute differences first rank
the columns then again the rowSums
. The first k
values of the order
should give you the desired appartements!
f <- \(price, size, k, data) {
aux <- abs(data - t(replicate(nrow(data), c(size, price))))
data[order(rank(rowSums(as.data.frame(lapply(aux, rank)))))[1:k], ]
}
f(price=100, size=30, k=10, data=data_struct[1:2])
# new_baltimore.SQFT new_baltimore.PRICE
# 2 28.920 113.0
# 4 26.120 104.3
# 42 24.940 112.0
# 54 23.520 110.0
# 58 35.520 74.0
# 41 23.040 107.0
# 151 35.940 85.5
# 8 25.600 64.5
# 88 27.480 59.0
# 52 21.975 85.9
CodePudding user response:
You could do:
data_struct[head(order(pdist::pdist(data_struct, c(30,100))@dist),10), ]
new_baltimore.SQFT new_baltimore.PRICE
4 26.120 104.3
41 23.040 107.0
54 23.520 110.0
42 24.940 112.0
2 28.920 113.0
49 19.020 110.0
151 35.940 85.5
52 21.975 85.9
45 11.200 102.0
65 15.400 87.5
using tidyverse
you could do:
data_struct %>%
mutate(price_diff = (new_baltimore.PRICE -100)^2,
size_diff = (new_baltimore.SQFT-30)^2) %>%
slice_min(price_diff size_diff, n=10) %>%
select(-ends_with('diff'))
new_baltimore.SQFT new_baltimore.PRICE
1 26.120 104.3
2 23.040 107.0
3 23.520 110.0
4 24.940 112.0
5 28.920 113.0
6 19.020 110.0
7 35.940 85.5
8 21.975 85.9
9 11.200 102.0
10 15.400 87.5
in base R:
data_struct[head(order(colSums((t(data_struct) - c(30, 100))^2)), 10),]
new_baltimore.SQFT new_baltimore.PRICE
4 26.120 104.3
41 23.040 107.0
54 23.520 110.0
42 24.940 112.0
2 28.920 113.0
49 19.020 110.0
151 35.940 85.5
52 21.975 85.9
45 11.200 102.0
65 15.400 87.5