I have a piece of code that tries to find URLs and put wrap them in <a>
tags. It works fine for shorter strings but on longer strings it doesn't work at all. Does anyone know why?
function urlify(text) {
var urlRegex = `/(([a-z] :\/\/)?(([a-z0-9\-] \.) ([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~] )*(\/([a-z0-9_\-\.]*)(\?[a-z0-9 _\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()* .=-_~:@/?]*)?)(\s |$)/gi`
return text.replace(urlRegex, function(url) {
return '<a href="' url '">' url '</a>';
});
}
If I run urlify('www.example.com is a cool website')
it returns <a href="www.example.com">www.example.com</a> is a cool website
but if I have a string that has 5000 characters that has links it doesn't change the original string at all.
CodePudding user response:
Here is a more efficient version of the same regex:
function urlify(text) {
var urlRegex = /(?:[a-z] :\/\/)?(?:[a-z\d-] \.) (?:a(?:ero|rpa)|biz|co(?:m|op)|edu|gov|in(?:ternal|fo|t)|jobs|m(?:il|useum)|n(?:a(?:me|to)|et)|org|pro|travel|local|[a-z]{2})(?::\d{1,5})?(?:\/[\w.~-] )*(?:\/[\w.-]*(?:\?[\w .%=&;-]*)?)?(?:#[\w!$&'()* .=~:@\/?-]*)?(?!\S)/gi
return text.replace(urlRegex, '<a href="$&">$&</a>');
}
See the regex demo.
The part that is difficult to optimize at this moment is the starting (?:[a-z] :\/\/)?(?:[a-z\d-] \.)
, as this allows matching pattern of unknown length anywhere in the string, and this involves quite a bit of overhead. If you wanted to only start matching from a whitespace or start of string, a (?<!\S)
at the start would greatly speed up matching.