I'm trying to produce some javascript code that will traverse an HTML document and pick out words from a JSON array, if matched the javascript would wrap the text in a <a href='glossary/#[matched text]'>[matched text]</a>
and render to screen.
I seem to have that part semi-down, the bit where I'm falling over is how best to tell the system to ignore certain elements (i.e text already in a
, buttons
, input
, element attributes...etc). I've tried to resolve this with the regex and managed to fumble along and get the following:
/(?<!<(a|button|submit|pre|img|svg|path|h[0-9]|.*data-ignore.*>|input\/>|textarea|pre|code))((?<!(="|data-))\btext\b(?!"))(?!<\/(a|button|submit|pre|img|svg|path|h[0-9])>)/gi
(text is the word I'm trying to auto-link) - https://regex101.com/r/u7cLPR/1
If you follow the Regex101 link you'll see I "think" I've managed to cover all bases bar one which is when the word occurs in a class=''
tag (and therefore others like style and such)
Any help here would be greatly appreciated here, as always with Regex I always seem to miss the mark or over-complicate the solution, (is Regex even the right tool for the job here?)
CodePudding user response:
It would be recursive and quite fast. Check out my answer about changing size of English letters in all text nodes - It's the same idea.
var words = ["text", "one"]
var skip_elements = ["BUTTON", "TEXTAREA"]
var EnglishCharFixer = {
do_elem: function(elem) {
var nodes = this.textNodesUnder(elem);
this.process_text_nodes(nodes)
},
textNodesUnder: function(node) {
var all = [];
for (node = node.firstChild; node; node = node.nextSibling) {
if (node.nodeType == 3) {
all.push(node);
} else {
if (skip_elements.indexOf(node.tagName) == -1) {
all = all.concat(this.textNodesUnder(node));
}
}
}
return all;
},
replace_node: function(node, str) {
var replacementNode = document.createElement('span');
replacementNode.innerHTML = str
node.parentNode.insertBefore(replacementNode, node);
node.parentNode.removeChild(node);
},
do_text: function(str) {
// improve this function please
words.forEach(function(word) {
str = str.replace(word, '<a href="glossary/#' word '">' word "</a>");
})
return str;
},
process_text_nodes: function(nodes) {
for (var index = 0; index < nodes.length; index ) {
var node = nodes[index];
var value = node.nodeValue
var str = this.do_text(value)
if (str != value) {
this.replace_node(node, str)
}
}
}
}
EnglishCharFixer.do_elem(document.body);
<body>
<h1>some title with text</h1>
<button>text shouldn't change</button> just a text node
<div style="padding:30px">
<p>paragraph with text</p>
another one
<br>
<img src="https://picsum.photos/100" title="this text shouldn't change as well">
</div>
<textarea>hello text</textarea>
</body>