Home > front end >  Remove Duplicate based on 2 columns in Google sheets
Remove Duplicate based on 2 columns in Google sheets

Time:09-22

This code removes the duplicate in the column name called "Receipt Number"

How to modify this code if the conditions matches to two column as duplicate. (i.e. Receipt Number Column and Mobile Number Column should be duplicate at the same time)

  function removeDuplicates() {
  var ss=SpreadsheetApp.getActive();
  var sh=ss.getSheetByName("Records");
  var vA=sh.getDataRange().getValues();
  var hA=vA[0];
  var hObj={};
  hA.forEach(function(e,i){hObj[e]=i;});//header title to index
  var uA=[];
  var d=0;
  for(var i=0;i<vA.length;i  ) {
    if(uA.indexOf(vA[i][hObj['Receipt Number']])==-1) {
      uA.push(vA[i][hObj['Receipt Number']]);
    }else{
      sh.deleteRow(i 1-d  );
    }  
  }
}

Thanks in advance.

CodePudding user response:

I believe your goal is as follows.

  • You want to remove the duplicated rows by using the condition of 2 columns.

  • For example, when the columns of Receipt Number and Mobile Number are as follows, you want to delete the following 2 rows (4 and 5).

      1  Receipt Number  Mobile Number
      2  a2              b2
      3  a3              b3
      4  a2              b2 <--- delete
      5  a3              b3 <--- delete
      6  a2              b3
      7  a3              b2
    

If my understanding is correct, how about the following modification? In this modification, removeDuplicates is used.

Modified script:

function removeDuplicates() {
  var checkHeadeTitles = ["Receipt Number", "Mobile Number"]; // This header titles are from your question.

  var ss = SpreadsheetApp.getActive();
  var sh = ss.getSheetByName("Records");
  var header = sh.getRange(1, 1, 1, sh.getLastColumn()).getValues()[0];
  var columnNumber = checkHeadeTitles.map(e => {
    var idx = header.indexOf(e);
    if (idx == -1) {
      throw new Error(`No header tilte of '${e}'.`);
    }
    return idx   1;
  });
  sh.getRange(2, 1, sh.getLastRow() - 1, sh.getLastColumn()).removeDuplicates(columnNumber);

  var row = sh.getDataRange().getValues().findIndex(r => r.join("") == "");
  if (row == -1) return;
  sh.deleteRow(row   1);
}
  • When this script is run to the above sample sheet, rows 4 and 5 are deleted.

References:

CodePudding user response:

function removeDuplicates() {
  var ss = SpreadsheetApp.getActive();
  var sh = ss.getSheetByName("Records");
  var [hA, ...vA] = sh.getDataRange().getValues();
  var idx = {};
  hA.forEach(function (e, i) { idx[e] = i; });
  var uA = [];
  var d = 0;
  for (var i = 0; i < vA.length; i  ) {
    let x = `${vA[i][idx['Receipt Number']]}-${vA[i][idx['Receipt Number']]}`
    if (!~uA.indexOf(x)) {
      uA.push(x);
    } else {
      sh.deleteRow(i   2 - d  );
    }
  }
}
  • Related