Home > Software design >  Extract data from gmail into a spreadsheet ignoring the first same field
Extract data from gmail into a spreadsheet ignoring the first same field

Time:12-01

With this script I am trying to parse the content from my Gmail to Google Spreadsheet and it works well with Google Apps Script.

The problem is that in my mail there are two identical fields called "Indirizzo mail:" that I need to be parsed, but I would to parse only the second.

How could I ignore the first one into the body of the mail?

--- Mail Body Example:

Indirizzo mail: [email protected]
Phone: 00000000
Website: www.test.it


Lorem Ipsum

Cognome: XXX
Nome: XXX
Codice fiscale: XXX
Indirizzo mail: [email protected]


function parseEmailMessages(start) {

start = start || 0;

var label = GmailApp.getUserLabelByName("testparser");
var threads = label.getThreads();
var sheet = SpreadsheetApp.getActiveSheet();

for (var i = 0; i < threads.length; i  ) {

var tmp,
  message = threads[i].getMessages()[0],
  content = message.getPlainBody();

if (content) {

  tmp = content.match(/Cognome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var Cognome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Nome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var Nome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Codice fiscale:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var CF = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Indirizzo mail:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var Mail = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  sheet.appendRow([Cognome,Nome,CF,Mail]);

} // End if

  } // End for loop
}

CodePudding user response:

I'm not that great at regex but this should work based on your current code:

function parseEmailMessages(start) {

start = start || 0;

var label = GmailApp.getUserLabelByName("testparser");
var threads = label.getThreads();
var sheet = SpreadsheetApp.getActiveSheet();

for (var i = 0; i < threads.length; i  ) {

var tmp,
  message = threads[i].getMessages()[0],
  content = message.getPlainBody();

if (content) {

  tmp = content.match(/Cognome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var Cognome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Nome:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var Nome = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = content.match(/Codice fiscale:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/);
  var CF = (tmp && tmp[1]) ? tmp[1].trim() : 'Null';

  tmp = [...content.matchAll(/Indirizzo mail:\s*([A-Za-z0-9!"?`?|õüö’çëÅíšÃÉÁÇÃáéñãóú#&;()-,'@./\s\-] )(\r?\n)/g)];
  var Mail = (tmp && tmp[1][1]) ? tmp[1][1].trim() : 'Null';

  sheet.appendRow([Cognome,Nome,CF,Mail]);

} // End if

  } // End for loop
}

Explanation: I modified your regex to match all instances of the email and used matchAll() instead of match() to get multiple results. This results in a multidimensional array so tmp[0][1] holds the "bad" email and tmp[1][1] holds the "good" one.

CodePudding user response:

I'd try something like this:

function parseEmailMessages(start = 0) {
  const ss = SpreadsheetApp.getActive();
  const sh = ss.getActiveSheet();
  const label = GmailApp.getUserLabelByName("testparser");
  const threads = label.getThreads();
  for (let i = 0; i < threads.length; i  ) {
    let message = threads[i].getMessages()[0];
    let content = message.getPlainBody();
    if (content) {
      let obj = { pA: [] };
      content.split('\n').filter(l => ~l.indexOf(':')).forEach(l => {
        let lt = l.trim();
        let t = lt.split(':');
        if (t.length == 2) {
          let a = t[0].trim();
          let b = t[1].trim();
          if (!obj.hasOwnProperty(a)) {
            obj[a] = [b];
            obj.pA.push(a);
          } else {
            obj[a].push(b);
          }
        }
      });
      const Cognome = obj['Cognome'];
      const Nome = obj['Nome'];
      const CF = obj['Codice Fiscale']
      const Mail = obj['Indirizzo mail'][1]
      if (Cogname && Nome && CF && Mail) {
        sh.appendRow([Cognome, Nome, CF, Mail]);
      }
    }
  }
}
  • Related