Home > database >  SPLIT Long complicated string
SPLIT Long complicated string

Time:03-16

The problem I am having is the string may start with a quote. The pattern is ,<true|false>,,,,,,

Some the numbers with decimals can be negative or positive. The final array I need looks like this...

  • "Cloaca",false,0,0.0,0.0,0.0,0.0,0.0

  • "Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461

  • Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0

  • Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464

  • Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539

Original String "Cloaca",false,0,0.0,0.0,0.0,0.0,0.0,"Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461,Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0,Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464,Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539

Patterns Tried $pattern = '/("?[\sa-zA-Z] "?,(true|false),(\d ,-?\d .\d ,-?\d .\d ,-?\d .\d ,-?\d .\d ,-?\d .\d ,?))/';

$pattern = '/("?[\sa-zA-Z] "?,(false|true),\d ,-?\d \.\d ,-?\d \.\d ,-?\d\.\d ,-?\d\.\d ,-?\d\.\d )/';

$pattern = '/(. ,(false|true),[0-9]{1,},[0-9]{1,}\.[0-9]{1,},-?[0-9]{1,}\.[0-9]{1,},-?[0-9]{1,}\.[0-9]{1,},-?[0-9]{1,}\.[0-9]{1,},-?[0-9]{1,}\.[0-9]{1,})/';

I've have to many patterns to show. Any help will help save a head of hair. Still learning RegEx.

CodePudding user response:

You can use str_​getcsv to split that string into an array, then use array_​chunk to get the chunks you need:

<?php
$string = '"Cloaca",false,0,0.0,0.0,0.0,0.0,0.0,"Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461,Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0,Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464,Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539';

$array = str_getcsv($string);
print_r(array_chunk($array, 8));

will output

Array
(
    [0] => Array
        (
            [0] => Cloaca
            [1] => false
            [2] => 0
            [3] => 0.0
            [4] => 0.0
            [5] => 0.0
            [6] => 0.0
            [7] => 0.0
        )

    [1] => Array
        (
            [0] => Transverse septum
            [1] => true
            [2] => 71
            [3] => 89.87341772151899
            [4] => 0.08377172
            [5] => 0.02481389578163773
            [6] => 0.8709677419354839
            [7] => 0.8461538461538461
        )

    [2] => Array
        (
            [0] => Vitelline duct
            [1] => false
            [2] => 0
            [3] => 0.0
            [4] => 0.0
            [5] => 0.0
            [6] => 0.0
            [7] => 0.0
        )

    [3] => Array
        (
            [0] => Allantois
            [1] => false
            [2] => 1
            [3] => 1.2658227848101267
            [4] => 0.36915635
            [5] => -0.038461538461538464
            [6] => 0.0
            [7] => 0.038461538461538464
        )

    [4] => Array
        (
            [0] => Ventral mesentery
            [1] => true
            [2] => 7
            [3] => 8.860759493670885
            [4] => 0.05629368
            [5] => 0.013647642679900734
            [6] => 0.12903225806451613
            [7] => 0.11538461538461539
        )
)

Edit: this will not keep the quotes though ("Cloaca"...)

CodePudding user response:

You can omit the capture groups, and match instead of splitting.

$re = '/"?[\sa-zA-Z] "?,(?:false|true),\d ,-?\d \.\d ,-?\d \.\d ,-?\d\.\d ,-?\d\.\d ,-?\d\.\d /';
$str = '"Cloaca",false,0,0.0,0.0,0.0,0.0,0.0,"Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461,Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0,Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464,Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539';

preg_match_all($re, $str, $matches);

print_r($matches[0]);

Output

Array
(
    [0] => "Cloaca",false,0,0.0,0.0,0.0,0.0,0.0
    [1] => "Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461
    [2] => Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0
    [3] => Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464
    [4] => Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539
)

If you want all the values in an array itself and only keep the group values, you can use all capture groups and remove the first entry which is the full match:

$re = '/("?[\sa-zA-Z] "?),(false|true),(\d ,-?\d \.\d ),(-?\d \.\d ),(-?\d\.\d ),(-?\d\.\d ),(-?\d\.\d )/';
$str = '"Cloaca",false,0,0.0,0.0,0.0,0.0,0.0,"Transverse septum",true,71,89.87341772151899,0.08377172,0.02481389578163773,0.8709677419354839,0.8461538461538461,Vitelline duct,false,0,0.0,0.0,0.0,0.0,0.0,Allantois,false,1,1.2658227848101267,0.36915635,-0.038461538461538464,0.0,0.038461538461538464,Ventral mesentery,true,7,8.860759493670885,0.05629368,0.013647642679900734,0.12903225806451613,0.11538461538461539';

preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);
$matches = array_map(function($x){
    unset($x[0]);
    return $x;
    }, $matches
);
print_r($matches);

That will give an output like

Array
(
    [0] => Array
        (
            [1] => "Cloaca"
            [2] => false
            [3] => 0,0.0
            [4] => 0.0
            [5] => 0.0
            [6] => 0.0
            [7] => 0.0
        )
...etc.
  • Related