The problem is that \b doesn't work with Russian and Ukrainian letters.
Here I try to find all matches of a word 'февраля' it the text, change them to tempword, then make it a link and change it back to 'февраля'.
function addLinks(word, siteurl) {
var id = 'doc\'s ID';
var doc = DocumentApp.openById(id);
var body = doc.getBody();
var tempword = 'ASDFDSGDDKDSL2';
var searchText = "\\b" word "\\b";
var element = body.findText(searchText);
console.log(element);
while (element) {
var start = element.getStartOffset();
var text = element.getElement().asText();
text.replaceText(searchText, tempword);
text.setLinkUrl(start, start tempword.length - 1, siteurl);
element = body.findText(searchText);
}
body.replaceText(tempword, word);
}
addLinks('февраля', 'example.com');
It works as it should, if I change Russian word 'февраля' to English 'february'.
addLinks('february', 'example.com');
I need regular expression, because if I just look for 'февраля' script will apply it to other words like 'февралям', 'февралями' etc. So, it is a question, how to make it work. Mistake "Exception: Invalid regular expression pattern" occurs with this code:
var searchText = "(?<=[\\s,.:;\"']|^)" word "(?=[\\s,.:;\"']|$)";
or this:
var searchText = "(^|\s)" word "(?=\s|$)";
and some other.
CodePudding user response:
I think next code does what is needed... At least in this situation.
function addLinks(word, siteurl) {
var id = 'doc\'s ID';
var doc = DocumentApp.openById(id);
var body = doc.getBody();
var tempword = 'ASDFGFDSA';
var searchText = word;
var tempwordRegex = "[^А-Яа-я]" tempword "[^А-Яа-я]";
body.replaceText(searchText, tempword); // We replace all matches of the **word** in cyrillic with **tempword** in latin without boundries.
var element = body.findText(tempwordRegex); //now find only **tempword**, that is not surrounded with any other cyrillic letters
console.log(element);
while (element) {
var start = element.getStartOffset();
var text = element.getElement().asText();
text.setLinkUrl(start, start tempword.length, siteurl); // make it a clickable url
element = body.findText(tempwordRegex, element); // find next
}
body.replaceText(tempword, word); // change back all **tempword** to **word**
}
addLinks('февраля', 'example.com');
CodePudding user response:
Here is my solution:
function main() {
addLinks('февралями', 'example.com');
}
function addLinks(word, url) {
var doc = DocumentApp.getActiveDocument();
var pgfs = doc.getParagraphs();
var bound = '[^А-яЁё]'; // any letter except Russian one
var patterns = [
{regex: bound word bound, start: 1, end: 1},
{regex: '^' word bound, start: 0, end: 1},
{regex: bound word '$', start: 1, end: 0},
{regex: '^' word '$', start: 0, end: 0}
];
for (var pgf of pgfs) for (var pattern of patterns) {
var location = pgf.findText(pattern.regex);
while (location) {
var start = location.getStartOffset() pattern.start;
var end = location.getEndOffsetInclusive() - pattern.end;
pgf.editAsText().setLinkUrl(start, end, url);
location = pgf.findText(pattern.regex, location);
}
}
}
Test output: