Home > database >  Combine duplicate tokens inside huge JSON file into nested array of objects using React
Combine duplicate tokens inside huge JSON file into nested array of objects using React

Time:02-19

I looked at several of the suggested solutions but none seemed to rise to this confounding data formatting challenge.

I have a huge JSON file (over 100k rows) and massive duplicates of data all as top level objects. Here's an example:

[
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326B",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2022-01-01",
      "fw_id":"A326BXXS4AVA1",
      "android":"R(Android 11)",
      "known_passcode":false,
      "afu":false,
      "bfu":false,
      "bruteforce":false
   },
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326U",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2021-03-01",
      "fw_id":"A326USQU1AUD4",
      "android":"R(Android 11)",
      "known_passcode":true,
      "afu":false,
      "bfu":true,
      "bruteforce":true
   },
   {
      "manufacturer":"Samsung",
      "device":"Galaxy A32 5G",
      "model":"SM-A326U1",
      "chipset":"Mediatek MT6853V/NZA",
      "date":"2021-09-01",
      "fw_id":"A326U1UEU5AUJ2",
      "android":"R(Android 11)",
      "known_passcode":true,
      "afu":false,
      "bfu":true,
      "bruteforce":true
   },
   {
      "manufacturer":"LGE",
      "device":"LG K31",
      "model":"LGL355DL",
      "chipset":"Mediatek MT6762",
      "date":"unknown",
      "fw_id":"L355DL10l",
      "android":"unknown",
      "known_passcode":false,
      "afu":false,
      "bfu":false,
      "bruteforce":false
   }
]

This needs to be organized so that data points like manufacturer, device, model are not duplicated hundreds of times.

Btw, here's a JSFiddle to play with: https://jsfiddle.net/xpancom/Lq7duahv/

Ideally, the JSON format would be the following:

[
  {
    "manufacturers": [
      {
        "manufacturer": "Samsung",
        "devices": [
          {
            "device": "Galaxy A32 5G",
            "models": [
              {
                "model": "SM-A326B",
                "data": [
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2022-01-01",
                    "fw_id": "A326BXXS4AVA1",
                    "android": "R(Android 11)",
                    "known_passcode": false,
                    "afu": false,
                    "bfu": false,
                    "bruteforce": false
                  },
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2021-09-01",
                    "fw_id": "A326BXXU3AUH7",
                    "android": "R(Android 11)",
                    "known_passcode": true,
                    "afu": false,
                    "bfu": true,
                    "bruteforce": true
                  }
                ]
              },
              {
                "model": "SM-A326U1",
                "data": [
                  {
                    "chipset": "Mediatek MT6853V/NZA",
                    "date": "2021-09-01",
                    "fw_id": "A326U1UEU5AUJ2",
                    "android": "R(Android 11)",
                    "known_passcode": true,
                    "afu": false,
                    "bfu": true,
                    "bruteforce": true
                  }
                ]
              }
            ]
          }
        ]
      },
      {
        "manufacturer": "LGE",
        "devices": [
          {
            "device": "LG K31",
            "models": [
              {
                "model": "SM-A326B",
                "data": [
                  {
                    "chipset": "Mediatek MT6762",
                    "date": "unknown",
                    "fw_id": "L355DL10l",
                    "android": "unknown",
                    "known_passcode": false,
                    "afu": false,
                    "bfu": false,
                    "bruteforce": false
                  }
                ]
              }
            ]
          }
        ]
      }
    ]
  }
]

Working in React, here's what I've got so far in trying to massage this data:

  const source = data;
  const destination = [];
  const classifiedTokens = []; // will be used to stored already classified tokens
  const classifiedTokensModel = []; // will be used to stored already classified tokens for models

  const getNextTokenArray = (source) => {
    let unusedToken = null;
    const nextTokenArray = source.filter(function (element) {
      if (!unusedToken && !classifiedTokens.includes(element['device'])) {
        unusedToken = element['device'];
        classifiedTokens.push(unusedToken);
      }
      return unusedToken ? unusedToken === element['device'] : false;
    });
    return unusedToken ? nextTokenArray : null;
  };

  // Pass in arrays deconstructed from addToDestination to process third tier nested objects for models
  const getNextTokenArrayModel = (tokenObject) => {
    let tokenObjectDevice = tokenObject['device'];
    let tokenObjectData = tokenObject['data'];
    let unusedTokenModel = null;
    const nextTokenArrayModel = tokenObjectData.filter(function (element) {
      if (!unusedTokenModel && !classifiedTokensModel.includes(element['model'])) {
        unusedTokenModel = element['model'];
        classifiedTokensModel.push(unusedTokenModel);
      }
      return unusedTokenModel ? unusedTokenModel === element['model'] : false;
    });
    //return unusedTokenModel ? nextTokenArrayModel : null;

    if (unusedTokenModel) {
      if (nextTokenArrayModel.length === 0) return;
      let res = {
        device: tokenObjectDevice,
        model: nextTokenArrayModel[0]['model'],
        data: [],
      };
      nextTokenArrayModel.forEach((element) => {
        res.data.push({
          manufacturer: element.manufacturer,
          chipset: element.chipset,
          date: element.date,
          fw_id: element.fw_id,
          android: element.android,
          knownPasscode: element.knownPasscode,
          afu: element.afu,
          bfu: element.bfu,
          bruteforce: element.bruteforce,
        });
      });
      destination.push(res);
    } else {
      return null;
    }
    
  };

  const addToDestination = (tokenArray) => {
    if (tokenArray.length === 0) return;
    let res = {
      device: tokenArray[0]['device'],
      data: [],
    };
    tokenArray.forEach((element) => {
      res.data.push({
        manufacturer: element.manufacturer,
        model: element.model,
        chipset: element.chipset,
        date: element.date,
        fw_id: element.fw_id,
        android: element.android,
        knownPasscode: element.knownPasscode,
        afu: element.afu,
        bfu: element.bfu,
        bruteforce: element.bruteforce,
      });
    });

    getNextTokenArrayModel(res); // Call this to process and group nested model duplicates by device

    //destination.push(res);
  };

  let nextTokenArray = getNextTokenArray(source);

  while (nextTokenArray) {
    addToDestination(nextTokenArray);
    nextTokenArray = getNextTokenArray(source);
  }

  setTimeout(() => {
    document.getElementById('root').innerHTML =
      '<pre>'   JSON.stringify(destination, null, 2)   '</pre>';
  }, 1000);

};


And here's the JSFiddle again: https://jsfiddle.net/xpancom/Lq7duahv/

Who can smash this data formatting dilemma?

CodePudding user response:

This answer is not React specific, but one approach would be to use array.reduce() to transform each level/node of the structure as shown in the code snippet below.

const source = [
  {
    manufacturer: 'Samsung',
    device: 'Galaxy A32 5G',
    model: 'SM-A326B',
    chipset: 'Mediatek MT6853V/NZA',
    date: '2022-01-01',
    fw_id: 'A326BXXS4AVA1',
    android: 'R(Android 11)',
    known_passcode: false,
    afu: false,
    bfu: false,
    bruteforce: false,
  },
  {
    manufacturer: 'Samsung',
    device: 'Galaxy A32 5G',
    model: 'SM-A326B',
    chipset: 'Mediatek MT6853V/NZA',
    date: '2022-01-01',
    fw_id: 'A326BXXS4AVA1',
    android: 'R(Android 11)',
    known_passcode: false,
    afu: false,
    bfu: false,
    bruteforce: false,
  },
  {
    manufacturer: 'Samsung',
    device: 'Galaxy A32 5G',
    model: 'SM-A326U',
    chipset: 'Mediatek MT6853V/NZA',
    date: '2021-03-01',
    fw_id: 'A326USQU1AUD4',
    android: 'R(Android 11)',
    known_passcode: true,
    afu: false,
    bfu: true,
    bruteforce: true,
  },
  {
    manufacturer: 'Samsung',
    device: 'Galaxy A32 5G',
    model: 'SM-A326U1',
    chipset: 'Mediatek MT6853V/NZA',
    date: '2021-09-01',
    fw_id: 'A326U1UEU5AUJ2',
    android: 'R(Android 11)',
    known_passcode: true,
    afu: false,
    bfu: true,
    bruteforce: true,
  },
  {
    manufacturer: 'LGE',
    device: 'LG K31',
    model: 'LGL355DL',
    chipset: 'Mediatek MT6762',
    date: 'unknown',
    fw_id: 'L355DL10l',
    android: 'unknown',
    known_passcode: false,
    afu: false,
    bfu: false,
    bruteforce: false,
  },
];

function generateTree(data, key) {
  return data.reduce((acc, val) => {
    // Split the key name from the child data
    const { [key.name]: keyName, ...childData } = val;

    // Find a tree item in the structure being generated
    const treeItem = acc.find((item) => item[key.name] === keyName);

    if (treeItem) {
      // If found, append child data
      treeItem[key.child].push(childData);
    } else {
      // If not found, create new key and append child data
      acc.push({ [key.name]: keyName, [key.child]: [childData] });
    }

    return acc;
  }, []);
}

// Generate manufacturer/device structure
const manufacturers = generateTree(source, {
  name: 'manufacturer', // Key name to use as grouping identifier
  child: 'devices', // Key name for child data
});

// Generate device/model structure
manufacturers.forEach((manufacturer) => {
  manufacturer.devices = generateTree(manufacturer.devices, {
    name: 'device',
    child: 'models',
  });

  // Generate model/data structure
  manufacturer.devices.forEach((device) => {
    device.models = generateTree(device.models, {
      name: 'model',
      child: 'data',
    });
  });
});

const destination = [{ manufacturers }];

console.log(destination);

  • Related