Home > front end >  Extracting column values based on condition from another data table in R
Extracting column values based on condition from another data table in R

Time:06-23

I have 2 data table (participant and region) and I want to extract the region based on the participant buildingId. Could anyone advise me on how I can do this? I will basically have a new column under the participant data table called region to map the region based on his apartment.

dput(Participant)

dput(participants_1)
structure(list(participantId = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 
131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 
157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 
170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 
183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 
196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 
222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 
235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 
248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 
261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 
274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 
287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 
300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 
313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 
326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 
339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 
352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 
365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 
378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 
391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 
404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 
417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 
430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 
443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 
456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 
469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 
482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 
495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 
508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 
521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 
534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 
547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 
560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 
573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 
586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 
599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 
612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 
625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 
638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 
651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 
664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 
677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 
690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 
703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 
716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 
729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 
742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 
755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 
768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 
781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 
794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 
807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 
820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 
833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 
846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 
859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 
872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 
885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 
898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 
911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 
924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 
937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 
950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 
963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 
976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 
989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 
1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010), 
    Apartmentvisits = c(786L, 1093L, 949L, 827L, 963L, 997L, 
    1220L, 1118L, 1174L, 860L, 871L, 993L, 849L, 1173L, 1096L, 
    823L, 876L, 754L, 738L, 985L, 982L, 1316L, 1098L, 939L, 1041L, 
    750L, 1036L, 768L, 1006L, 825L, 1176L, 660L, 648L, 614L, 
    1185L, 1153L, 873L, 901L, 767L, 1231L, 1236L, 1075L, 709L, 
    848L, 8L, 708L, 821L, 941L, 1072L, 685L, 1106L, 988L, 1135L, 
    984L, 1150L, 1004L, 1015L, 810L, 910L, 984L, 1051L, 842L, 
    759L, 944L, 1016L, 998L, 1187L, 934L, 959L, 1069L, 999L, 
    1059L, 805L, 1012L, 1187L, 1057L, 816L, 1107L, 761L, 531L, 
    844L, 1050L, 1126L, 488L, 1017L, 1019L, 953L, 1060L, 1175L, 
    725L, 849L, 1073L, 813L, 994L, 947L, 941L, 966L, 1179L, 779L, 
    941L, 766L, 441L, 817L, 646L, 987L, 1041L, 1046L, 912L, 618L, 
    964L, 904L, 1103L, 894L, 1111L, 845L, 900L, 690L, 1127L, 
    769L, 1064L, 1190L, 975L, 777L, 734L, 730L, 1163L, 1007L, 
    8L, 1090L, 1200L, 1008L, 1202L, 1046L, 796L, 537L, 1245L, 
    1053L, 1018L, 1064L, 1075L, 726L, 1120L, 7L, 1101L, 1054L, 
    1018L, 1028L, 1066L, 664L, 1139L, 663L, 757L, 980L, 1152L, 
    7L, 851L, 1024L, 565L, 996L, 626L, 655L, 5L, 1152L, 1063L, 
    1158L, 1083L, 1032L, 987L, 907L, 932L, 933L, 666L, 1033L, 
    972L, 1175L, 1039L, 1149L, 1018L, 618L, 1271L, 456L, 1084L, 
    942L, 464L, 1156L, 462L, 677L, 451L, 698L, 449L, 985L, 766L, 
    442L, 683L, 454L, 452L, 922L, 1112L, 466L, 924L, 447L, 457L, 
    923L, 1141L, 450L, 933L, 1038L, 644L, 1020L, 612L, 1056L, 
    453L, 1260L, 652L, 452L, 444L, 460L, 1174L, 1209L, 682L, 
    1051L, 917L, 1002L, 1087L, 468L, 452L, 971L, 987L, 438L, 
    608L, 444L, 1051L, 942L, 460L, 1014L, 616L, 590L, 458L, 447L, 
    1244L, 1065L, 452L, 812L, 1018L, 958L, 730L, 1109L, 1220L, 
    455L, 795L, 1006L, 874L, 1012L, 896L, 735L, 1046L, 8L, 1018L, 
    1138L, 521L, 944L, 1057L, 9L, 688L, 1018L, 488L, 1124L, 8L, 
    741L, 1050L, 986L, 1008L, 1097L, 904L, 1024L, 624L, 753L, 
    1067L, 584L, 8L, 599L, 932L, 612L, 1050L, 1017L, 8L, 953L, 
    1131L, 8L, 1117L, 783L, 954L, 1027L, 810L, 1098L, 847L, 979L, 
    507L, 9L, 1018L, 1025L, 7L, 614L, 981L, 627L, 1150L, 957L, 
    1152L, 776L, 860L, 1073L, 890L, 522L, 475L, 1025L, 734L, 
    790L, 974L, 817L, 707L, 1003L, 1205L, 1047L, 1084L, 691L, 
    737L, 996L, 538L, 830L, 491L, 1026L, 866L, 712L, 554L, 981L, 
    836L, 1060L, 723L, 1088L, 563L, 699L, 744L, 958L, 912L, 892L, 
    991L, 8L, 698L, 1004L, 955L, 1005L, 811L, 9L, 903L, 661L, 
    1032L, 8L, 967L, 1213L, 1180L, 947L, 766L, 1092L, 688L, 587L, 
    959L, 654L, 924L, 690L, 782L, 1097L, 973L, 1091L, 917L, 927L, 
    923L, 818L, 464L, 1095L, 476L, 8L, 941L, 6L, 8L, 9L, 991L, 
    841L, 973L, 1118L, 992L, 1079L, 529L, 10L, 1031L, 972L, 1111L, 
    1028L, 907L, 1090L, 783L, 1000L, 724L, 1071L, 1041L, 687L, 
    889L, 8L, 7L, 973L, 1039L, 856L, 1012L, 1032L, 801L, 820L, 
    616L, 972L, 1094L, 1126L, 1107L, 460L, 945L, 933L, 466L, 
    1085L, 948L, 898L, 1061L, 452L, 833L, 1190L, 896L, 951L, 
    439L, 1071L, 1285L, 1090L, 972L, 806L, 1247L, 740L, 1315L, 
    999L, 1113L, 565L, 1023L, 756L, 1029L, 870L, 981L, 859L, 
    635L, 1088L, 1085L, 880L, 748L, 996L, 548L, 1017L, 1018L, 
    1057L, 479L, 1119L, 926L, 978L, 466L, 860L, 1026L, 800L, 
    801L, 644L, 457L, 1125L, 1055L, 446L, 848L, 943L, 986L, 825L, 
    965L, 707L, 1047L, 960L, 715L, 1134L, 759L, 852L, 946L, 979L, 
    546L, 654L, 678L, 790L, 958L, 445L, 1040L, 637L, 1123L, 1046L, 
    1045L, 807L, 1137L, 1047L, 1113L, 742L, 1218L, 866L, 922L, 
    1127L, 9L, 7L, 804L, 8L, 730L, 11L, 983L, 1142L, 987L, 957L, 
    1236L, 745L, 797L, 706L, 7L, 722L, 475L, 11L, 1081L, 450L, 
    952L, 1072L, 767L, 805L, 1083L, 822L, 1016L, 934L, 1023L, 
    1250L, 8L, 1073L, 8L, 890L, 1111L, 877L, 1073L, 877L, 731L, 
    814L, 714L, 1093L, 1089L, 507L, 7L, 1020L, 1087L, 1021L, 
    1055L, 9L, 1064L, 765L, 1038L, 689L, 1108L, 954L, 1148L, 
    921L, 8L, 8L, 1171L, 1090L, 875L, 8L, 988L, 9L, 8L, 1095L, 
    8L, 1138L, 1114L, 8L, 1131L, 1026L, 956L, 597L, 570L, 449L, 
    1115L, 1126L, 8L, 824L, 581L, 662L, 1047L, 913L, 12L, 1142L, 
    922L, 775L, 7L, 1151L, 1044L, 8L, 11L, 5L, 8L, 1020L, 967L, 
    1054L, 745L, 976L, 9L, 735L, 1050L, 802L, 8L, 1089L, 9L, 
    1002L, 739L, 1105L, 12L, 1124L, 949L, 1072L, 1066L, 1152L, 
    633L, 8L, 7L, 1059L, 1024L, 894L, 807L, 9L, 1023L, 747L, 
    881L, 708L, 8L, 793L, 9L, 646L, 8L, 809L, 851L, 1147L, 11L, 
    997L, 705L, 1129L, 770L, 1032L, 7L, 1302L, 5L, 675L, 8L, 
    6L, 1105L, 991L, 819L, 628L, 9L, 701L, 932L, 728L, 805L, 
    7L, 640L, 8L, 1002L, 984L, 905L, 844L, 1082L, 526L, 694L, 
    1131L, 1087L, 1059L, 1041L, 1079L, 788L, 1001L, 1004L, 1222L, 
    992L, 920L, 826L, 1068L, 1055L, 1081L, 864L, 979L, 795L, 
    1054L, 1026L, 733L, 457L, 1052L, 914L, 473L, 1100L, 1132L, 
    1150L, 1006L, 448L, 1023L, 688L, 711L, 609L, 450L, 617L, 
    1136L, 1129L, 473L, 455L, 760L, 1123L, 1100L, 459L, 957L, 
    639L, 781L, 980L, 989L, 1141L, 775L, 888L, 1104L, 1127L, 
    1052L, 872L, 892L, 584L, 736L, 1045L, 1104L, 1042L, 1087L, 
    447L, 916L, 712L, 966L, 838L, 776L, 862L, 710L, 606L, 453L, 
    1151L, 698L, 853L, 453L, 914L, 8L, 7L, 827L, 1077L, 8L, 8L, 
    7L, 931L, 796L, 1131L, 1030L, 785L, 8L, 791L, 1113L, 4L, 
    1057L, 9L, 7L, 1093L, 829L, 771L, 1145L, 775L, 4L, 857L, 
    929L, 900L, 945L, 8L, 454L, 853L, 906L, 8L, 7L, 8L, 7L, 8L, 
    8L, 1024L, 1025L, 1005L, 1162L, 8L, 640L, 1031L, 7L, 663L, 
    1004L, 973L, 8L, 638L, 7L, 768L, 625L, 748L, 1006L, 941L, 
    1011L, 1056L, 5L, 8L, 10L, 733L, 477L, 652L, 1153L, 665L, 
    6L, 8L, 790L, 9L, 7L, 571L, 916L, 8L, 5L, 578L, 8L, 1167L, 
    813L, 891L, 817L, 8L, 1064L, 1135L, 8L, 847L, 1039L, 533L, 
    7L, 8L, 1001L, 1161L, 1120L, 1094L, 897L, 8L, 804L, 8L, 8L, 
    1123L, 7L, 5L, 9L, 816L, 8L, 1016L, 8L, 678L, 1298L, 8L, 
    1041L, 931L, 1146L, 772L, 8L, 740L, 950L, 8L, 4L, 1021L, 
    995L, 979L, 862L, 1214L, 717L, 4L, 8L, 5L, 8L, 8L, 1070L, 
    509L, 1158L, 904L, 7L, 958L, 996L, 840L, 6L, 7L, 977L, 1005L, 
    8L, 8L, 4L, 649L, 717L, 967L, 1123L, 7L, 866L, 4L, 1104L, 
    4L, 1220L, 640L, 852L, 1187L, 1064L, 1031L, 989L, 4L, 6L, 
    1150L, 841L, 5L, 5L, 5L, 460L, 458L, 955L, 1017L, 625L, 1082L, 
    997L, 834L, 993L, 1127L, 764L, 742L, 1005L, 1264L, 879L, 
    1140L, 1124L, 1044L, 1062L, 1084L, 445L, 823L, 1134L, 1016L, 
    518L, 1118L, 768L, 1064L, 990L, 637L, 574L, 952L, 625L, 915L, 
    946L, 488L, 1118L, 457L, 1073L, 456L, 451L, 1080L, 1231L, 
    966L, 1043L, 843L, 896L, 855L, 1053L, 699L, 603L, 911L, 865L, 
    710L, 1008L, 746L, 740L, 540L, 985L, 789L, 950L, 517L, 648L, 
    1049L, 1075L, 1143L, 757L, 1210L, 1102L, 969L, 988L, 914L, 
    880L, 838L, 1151L, 463L, 703L, 933L, 635L, 947L, 976L, 1262L, 
    574L, 1057L, 811L)), row.names = c(NA, -1011L), class = c("tbl_df", 
"tbl", "data.frame"))

dput(head(Apartment,100))

structure(list(units = c(NA, "[481,498,534,652,818]", "[382]", 
NA, "[231]", NA, NA, "[1339]", NA, "[1050,1233]", "[1390,1459,1499,1563,1650]", 
NA, "[18,172]", "[214]", NA, NA, "[837,883]", "[145,227,247]", 
NA, "[94]", "[1388,1404,1426,1484,1534,1555,1632]", "[451,661,733]", 
NA, "[326,327,331]", NA, "[1761]", "[1346]", "[1775]", "[443]", 
"[463,513,728,743]", "[267,329]", "[831]", "[20,209,362]", "[435]", 
NA, NA, "[1747]", "[1402,1567,1596,1606]", NA, NA, NA, NA, "[36,320]", 
NA, NA, NA, "[1326]", "[607,646,668,716,758,816]", "[1294,1308]", 
"[1073,1180,1208,1238]", NA, "[634,658,707,759]", NA, NA, NA, 
"[1027,1093,1144]", "[1500,1549]", "[447]", "[508,691,725,765,824]", 
"[546,639,683,689]", "[7,63,104,119,219]", "[942,995,1057,1091,1231]", 
NA, "[1305]", "[230,252]", NA, NA, NA, NA, NA, "[601,635,734]", 
NA, "[205]", NA, "[945,1155,1229]", "[1302]", "[962,1055,1105,1132,1200,1264]", 
"[493,505,549,588]", "[1379,1483,1661,1728]", NA, "[647,766]", 
NA, "[169,375]", "[1505]", NA, "[844,899]", NA, "[1309,1323]", 
"[1372,1506,1571,1579,1642]", "[124,207]", NA, "[1352,1444,1580,1615]", 
"[833,863]", "[21,302,316]", "[1360,1386,1581,1651,1704,1724]", 
NA, "[49,337]", NA, "[1418,1612,1623,1677,1713]", NA), region = c("Central", 
"Central", "South", "Central", "East", "South", "South", "North-west", 
"Central", "Central", "North-west", "North-west", "South", "South", 
"Central", "South", "Central", "East", "East", "South", "North-west", 
"Central", "North-west", "East", "South", "North-west", "North-west", 
"North-west", "East", "Central", "South", "Central", "East", 
"East", "South", "South", "North-west", "North-west", "South", 
"Central", "Central", "South", "South", "North-west", "Central", 
"East", "North-west", "Central", "Central", "Central", "Central", 
"Central", "Central", "North-west", "South", "North-west", "North-west", 
"South", "Central", "Central", "South", "Central", "Central", 
"Central", "South", "Central", "Central", "Central", "South", 
"Central", "Central", "Central", "South", "North-west", "North-west", 
"North-west", "Central", "Central", "North-west", "Central", 
"Central", "East", "East", "North-west", "North-west", "Central", 
"Central", "North-west", "North-west", "South", "Central", "North-west", 
"Central", "East", "North-west", "Central", "East", "North-west", 
"North-west", "East")), row.names = c(NA, 100L), class = "data.frame")

CodePudding user response:

We could use tidyr::separate_rows's defaults to split the units sequences into separate lines for each, and then join the two tables. Since the apartment version of the variable is an integer, I use convert = TRUE to let tidyr guess the intended output format, which it does correctly in this case. If it hadn't, we could add a mutate(units = as.integer(units)) %>% line to do so.

library(dplyr)
apartment_long <- apartment %>% tidyr::separate_rows(units, convert = TRUE)
participant %>% left_join(apartment_long, by = c("Apartmentvisits" = "units"))

Result

 A tibble: 1,011 × 3
   participantId Apartmentvisits region    
           <dbl>           <int> <chr>     
 1             0             786 NA        
 2             1            1093 North-west
 3             2             949 NA        
 4             3             827 NA        
 5             4             963 NA        
 6             5             997 NA        
 7             6            1220 NA        
 8             7            1118 NA        
 9             8            1174 NA        
10             9             860 NA        
# … with 1,001 more rows
  • Related