How can I use regex to get an array of all the individual characters not contained within anchor tags? So for example, with this text:
DOWNLOAD <a href="https://this.com/" target="_blank">THIS</a> OR <a href="https://that.io/" target="_blank">THAT</a>
I want an array of the indices for the characters D,O,W,N,L,O,A,D, ,T,H,I,S, , ... etc.
I managed to figure out how to get everything I don't want selected, using this: /(?:<.*?>)
But I don't know how to use that to get all the characters outside of that group.
CodePudding user response:
As already pointed out by @Cid, don't do this with regular expressions. Instead, use something like below and read the input character by character:
function reader(el) {
let i = 0;
let src = el.innerHTML;
const r = {
done() {
return i >= src.length;
},
advance() {
i = 1;
},
char() {
let c = !r.done() ? src[i] : '';
r.advance();
return c;
},
peek() {
return !r.done() ? src[i] : '';
}
};
return r;
}
function collector(el) {
const r = reader(el);
const skipUntil = char => {
while (r.peek() !== char) {
r.advance();
}
r.advance();
};
return {
collect() {
const v = [];
while (!r.done()) {
if (r.peek() === '<') {
skipUntil('>');
} else if (r.peek() === '\n') {
r.advance();
} else {
v.push(r.char());
}
}
return v;
}
};
}
/* --- */
const el = document.querySelector('#source');
const cl = collector(el);
console.log(cl.collect());
<div id="source">
DOWNLOAD <a href="#noop">THIS</a> OR <a href="#noop2">THAT</a>
</div>