Home > Blockchain >  Clean coordiates data from same colum in R
Clean coordiates data from same colum in R

Time:11-02

Im want to obtain the each coordinate, long and lat, but all data are in the same cell. The idea is put each coordeante in a diferent colums.

Im create number variable but dont get all number of Store Location.

liquor2  <- structure(list(`Store Location` = c(" -93.619455 42.022848", 
" -93.669896 42.02160500000001", " -93.669896 42.02160500000001", 
NA, NA, " -93.618911 42.022854", " -93.669896 42.02160500000001", 
" -93.619455 42.022848", " -93.669896 42.02160500000001", NA, 
" -93.669896 42.02160500000001", NA, " -93.618911 42.022854", 
NA, " -93.618911 42.022854", NA, " -93.669896 42.02160500000001", 
" -93.669896 42.02160500000001", " -93.610343 42.017115", " -93.618911 42.022854"
)), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"
))

### Run this code but only obtained long data

liquor2 %>% 
  mutate(number= as.numeric(parse_number(`Store Location`)),
    long=str_sub(number,1,10),
    lat=str_sub(number, 11,13)) %>% View()

# `Store Location`                   number long       lat  
# <chr>                               <dbl> <chr>      <chr>
#   1 " -93.619455 42.022848"          -93.6 -93.619455 ""   
# 2 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 3 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 4  NA                               NA   NA         NA   
# 5  NA                               NA   NA         NA   
# 6 " -93.618911 42.022854"          -93.6 -93.618911 ""   
# 7 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 8 " -93.619455 42.022848"          -93.6 -93.619455 ""   
# 9 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 10  NA                               NA   NA         NA   
# 11 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 12  NA                               NA   NA         NA   
# 13 " -93.618911 42.022854"          -93.6 -93.618911 ""   
# 14  NA                               NA   NA         NA   
# 15 " -93.618911 42.022854"          -93.6 -93.618911 ""   
# 16  NA                               NA   NA         NA   
# 17 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 18 " -93.669896 42.02160500000001"  -93.7 -93.669896 ""   
# 19 " -93.610343 42.017115"          -93.6 -93.610343 ""   
# 20 " -93.618911 42.022854"          -93.6 -93.618911 "" 
# 

CodePudding user response:

Using tidyr::separate:

library(tidyverse)

liquor2 %>%
  mutate(`Store Location` = trimws(`Store Location`)) %>%
  separate(`Store Location`, sep = " ", into = c("Lat", "Lon")) %>%
  mutate(across(Lat:Lon, as.numeric))
#> # A tibble: 20 x 2
#>      Lat   Lon
#>    <dbl> <dbl>
#>  1 -93.6  42.0
#>  2 -93.7  42.0
#>  3 -93.7  42.0
#>  4  NA    NA  
#>  5  NA    NA  
#>  6 -93.6  42.0
#>  7 -93.7  42.0
#>  8 -93.6  42.0
#>  9 -93.7  42.0
#> 10  NA    NA  
#> 11 -93.7  42.0
#> 12  NA    NA  
#> 13 -93.6  42.0
#> 14  NA    NA  
#> 15 -93.6  42.0
#> 16  NA    NA  
#> 17 -93.7  42.0
#> 18 -93.7  42.0
#> 19 -93.6  42.0
#> 20 -93.6  42.0

Edit

To see more decimal points in the output, we can change the number of significant figures printed:

options(pillar.sigfig = 8)

liquor2 %>%
  mutate(`Store Location` = trimws(`Store Location`)) %>%
  separate(`Store Location`, sep = " ", into = c("Lat", "Lon")) %>%
  mutate(across(Lat:Lon, as.numeric))
#> # A tibble: 20 x 2
#>           Lat       Lon
#>         <dbl>     <dbl>
#>  1 -93.619455 42.022848
#>  2 -93.669896 42.021605
#>  3 -93.669896 42.021605
#>  4  NA        NA       
#>  5  NA        NA       
#>  6 -93.618911 42.022854
#>  7 -93.669896 42.021605
#>  8 -93.619455 42.022848
#>  9 -93.669896 42.021605
#> 10  NA        NA       
#> 11 -93.669896 42.021605
#> 12  NA        NA       
#> 13 -93.618911 42.022854
#> 14  NA        NA       
#> 15 -93.618911 42.022854
#> 16  NA        NA       
#> 17 -93.669896 42.021605
#> 18 -93.669896 42.021605
#> 19 -93.610343 42.017115
#> 20 -93.618911 42.022854
  • Related