I need a little help with regex extraction. The body content of the email appears like this when retrieved in google sheet from gmail (has asterisk before and after name /phone / email which is hyperlinked)
Body Content after being retrived from Gmail
Email: *[email protected] `<[email protected]>`*
First Name: *John Doe*
Phone Number: *123456789*
My current Regex code retrieves the data like this
*[email protected] `<[email protected]>`*
*John Doe*
*123456789*
What changes should be made to the code so that it ignores the asterisk before and after for all these and the email is retrieved as [email protected] ignoring the second part of the hyperlink format? Like
[email protected]
John Doe
123456789
My Code is
function extractDetails(message){
var emailData = {
date: "Null",
fullName: "Null",
emailAddr: "Null",
phoneNum: "Null",
}
var emailKeywords = {
fullName: "First Name:",
emailAddr: "Email:",
phoneNum: "Phone Number:",
}
emailData.date = message.getDate();
emailData.body = message.getPlainBody();
var regExp;
regExp = new RegExp("(?<=" emailKeywords.fullName ").*");
emailData.fullName = emailData.body.match(regExp).toString().trim();
regExp = new RegExp("(?<=" emailKeywords.phoneNum ").*");
emailData.phoneNum = emailData.body.match(regExp).toString().trim();
regExp = new RegExp("(?<=" emailKeywords.emailAddr ").*");
emailData.emailAddr = emailData.body.match(regExp).toString().trim();
CodePudding user response:
Replace the last 6 lines of your code with:
regExp = new RegExp("(?<=" emailKeywords.fullName "\\s*\\*).*?(?=\\*)");
emailData.fullName = emailData.body.match(regExp).toString();
regExp = new RegExp("(?<=" emailKeywords.phoneNum "\\s*\\*).*?(?=\\*)");
emailData.phoneNum = emailData.body.match(regExp).toString();
regExp = new RegExp("(?<=" emailKeywords.emailAddr "\\s*\\*).*?(?=\\s)");
emailData.emailAddr = emailData.body.match(regExp).toString();
(?<=Email:\s*\*).*?(?=\s)
(?<=Email:\s*\*)
go to the point where it preceded byEmail:
followed by zero or more whitespace character\s*
, followed by a literal*
..*?
then match any character except for new lines as few times as possible. Until a whitespace character appears(?=\s)
, See regex demo.
(?<=First Name:\s*\*).*?(?=\*)
(?<=First Name:\s*\*)
go to the point where it preceded byFirst Name:
followed by zero or more whitespace character\s*
, followed by a literal*
.*?
then match any character except for new lines as few times as possible. Until a literal*
character appears(?=\*)
, See regex demo.