Home > front end >  Extract specific values of string and make new column in R
Extract specific values of string and make new column in R

Time:09-08

Im want to extract the numbers and add some - (minus) for new variables. Are 1800 rows like that.

test-> structure(list(GNUMBER = c("G12879D", "G13004A", "G51770", "G51771", 
"G51772", "G10017", "G12862", "G19898A", "G51361", "G51664B", 
"G51665", "G51665A", "G51666", "G51663", "G51663A", "G51664", 
"G51664A", "G51653C", "G51652B"), LATITUD = c("18.35N", NA, "20.62N", 
"20.62N", "20.62N", "19.00N", "19.78N", "27.34S", "27.39S", "9.90N", 
"9.90N", "9.90N", "9.90N", "9.90N", "9.90N", "9.90N", "9.90N", 
"9.90N", "9.90N"), LONGITUD = c("99.99W", NA, "101.72W", "101.72W", 
"101.72W", "99.11W", "103.11W", "65.96W", "65.98W", "83.93W", 
"83.93W", "83.93W", "83.93W", "83.93W", "83.93W", "83.93W", "83.93W", 
"83.93W", "83.93W")), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -19L))


library(tidyverse)
library(stringr)

regexp <- "[^1:9] $"

test %>%
  mutate(LATITUD_NEW = str_extract(test$LATITUD, regexp))  

enter image description here

Im want this. Add (-) when the column have a S and W, but no N. Because are the coordinates and need make a map.

enter image description here

CodePudding user response:

It is easier with parse_number as it converts to numeric as well. For the condition, use case_when with str_detect

library(dplyr)
library(stringr)
test %>% 
  mutate(across(LATITUD:LONGITUD, 
   ~ case_when(str_detect(.x, 'S|W')~ -1 * readr::parse_number(.x),
     TRUE ~ readr::parse_number(.x)), .names = "{.col}_NEW"))

-output

# A tibble: 19 × 5
   GNUMBER LATITUD LONGITUD LATITUD_NEW LONGITUD_NEW
   <chr>   <chr>   <chr>          <dbl>        <dbl>
 1 G12879D 18.35N  99.99W          18.4       -100. 
 2 G13004A <NA>    <NA>            NA           NA  
 3 G51770  20.62N  101.72W         20.6       -102. 
 4 G51771  20.62N  101.72W         20.6       -102. 
 5 G51772  20.62N  101.72W         20.6       -102. 
 6 G10017  19.00N  99.11W          19          -99.1
 7 G12862  19.78N  103.11W         19.8       -103. 
 8 G19898A 27.34S  65.96W         -27.3        -66.0
 9 G51361  27.39S  65.98W         -27.4        -66.0
10 G51664B 9.90N   83.93W           9.9        -83.9
11 G51665  9.90N   83.93W           9.9        -83.9
12 G51665A 9.90N   83.93W           9.9        -83.9
13 G51666  9.90N   83.93W           9.9        -83.9
14 G51663  9.90N   83.93W           9.9        -83.9
15 G51663A 9.90N   83.93W           9.9        -83.9
16 G51664  9.90N   83.93W           9.9        -83.9
17 G51664A 9.90N   83.93W           9.9        -83.9
18 G51653C 9.90N   83.93W           9.9        -83.9
19 G51652B 9.90N   83.93W           9.9        -83.9

Or a slighly compact option

test %>% 
  mutate(across(LATITUD:LONGITUD, 
    ~ c(1, -1)[1   str_detect(.x, "S|W")] * readr::parse_number(.x), 
      .names = "{.col}_NEW"))

-output

# A tibble: 19 × 5
   GNUMBER LATITUD LONGITUD LATITUD_NEW LONGITUD_NEW
   <chr>   <chr>   <chr>          <dbl>        <dbl>
 1 G12879D 18.35N  99.99W          18.4       -100. 
 2 G13004A <NA>    <NA>            NA           NA  
 3 G51770  20.62N  101.72W         20.6       -102. 
 4 G51771  20.62N  101.72W         20.6       -102. 
 5 G51772  20.62N  101.72W         20.6       -102. 
 6 G10017  19.00N  99.11W          19          -99.1
 7 G12862  19.78N  103.11W         19.8       -103. 
 8 G19898A 27.34S  65.96W         -27.3        -66.0
 9 G51361  27.39S  65.98W         -27.4        -66.0
10 G51664B 9.90N   83.93W           9.9        -83.9
11 G51665  9.90N   83.93W           9.9        -83.9
12 G51665A 9.90N   83.93W           9.9        -83.9
13 G51666  9.90N   83.93W           9.9        -83.9
14 G51663  9.90N   83.93W           9.9        -83.9
15 G51663A 9.90N   83.93W           9.9        -83.9
16 G51664  9.90N   83.93W           9.9        -83.9
17 G51664A 9.90N   83.93W           9.9        -83.9
18 G51653C 9.90N   83.93W           9.9        -83.9
19 G51652B 9.90N   83.93W           9.9        -83.9
  • Related