Home > Net >  javascript: parse a string and split it between tags and text
javascript: parse a string and split it between tags and text

Time:01-16

I am building a script that takes a string and separates left and right part based on some characthers.

However, I am having some difficulties handling edge cases.

The script will be of the following format: @tag1@tag2@tag3:lorem ipsum quare id... and I would like to get to get something like:

{
 tags:["tag1", "tag2", "tag3"],
 text:"lorem ipsum quare id..."
}

However, there are some edge cases that I need to consider:

  • The number of initial "@tag" could vary from 0 to n
  • If there are 0 "@tag", there won't be any ":" to separate the tag part from the "text" and we should return a default tag "default"

Examples

  • @mario:lorem ipsum should return {tags:['mario'], text:"llorem ipsum"}
  • @mario@luigi:lorem ipsum should return {tags:['mario','luigi'], text:"lorem ipsum"}
  • lorem ipsum should return {tags:['default'], text:"lorem ipsum"}

I initially thought about using something like this, but it surely doesn't take into consideration these edge cases and it doesn't work very cleanly

function splitTagText(text){
  return text.split(/:(.*)/s)
}

function extractTags(text){
  return text.split('@').slice(1)
}

function processInput(text){
  const tagText = splitTagText(text)
  const tags = extractTags(tagText[0])
  const todo = tagText[1]

  return {tags,todo}
}


const tmp1 = '@mario:lorem ipsum'
const tmp2 = '@mario@luigi:lorem ipsum'
const tmp3 = 'lorem ipsum'


console.log(processInput(tmp1)) 
console.log(processInput(tmp2)) 
console.log(processInput(tmp3)) // breaks



CodePudding user response:

/^(@. ?:)?(. )/ appears to work fine:

const tmp1 = '@mario:lorem ipsum'
const tmp2 = '@mario@luigi:lorem ipsum'
const tmp3 = 'lorem ipsum'


function processInput(s) {
    let m = s.match(/^(@. ?:)?(. )/)

    return {
        tags: m[1] ? m[1].slice(1, -1).split('@') : ['default'],
        text: m[2]
    }
}



console.log(processInput(tmp1))
console.log(processInput(tmp2))
console.log(processInput(tmp3))

CodePudding user response:

Just another approach using no regex.

It correctly addresses cases in which the string is empty or contains a non valid encoded data like @tagwithnotext compared to what the solution with regex that doesn't.

Sometimes using the coolest tool doesn't mean the most consistent approach.

/*
@mario:lorem ipsum should return {tags:['mario'], text:"llorem ipsum"}
@mario@luigi:lorem ipsum should return {tags:['mario','luigi'], text:"lorem ipsum"}
lorem ipsum should return {tags:['default'], text:"lorem ipsum"}
*/
function decode(encoded){
  //if the string doesn't begin with @ it means there are no tags and just text
  if (encoded[0] != '@'){
    return {tags:['default'], text: encoded};
  }
  //if there's no semicolon
  const p = encoded.indexOf(':');
  if(p == -1)
    //returns null
    return null;
    
  //otherwise returns the object with tags array and text as properties
  const tags = encoded.substring(1, p);
  const text = encoded.substring(p 1);  
  return { tags: tags.split('@'), text: text };
}

console.log( decode('@mario:lorem ipsum') );
console.log( decode('@mario@luigi:lorem ipsum') );
console.log( decode('lorem ipsum') );
console.log( decode('@invalidbecausemissingtextaftertags') );

  • Related