I looked at several of the suggested solutions but none seemed to rise to this confounding data formatting challenge.
I have a huge JSON file (over 100k rows) and massive duplicates of data all as top level objects. Here's an example:
[
{
"manufacturer":"Samsung",
"device":"Galaxy A32 5G",
"model":"SM-A326B",
"chipset":"Mediatek MT6853V/NZA",
"date":"2022-01-01",
"fw_id":"A326BXXS4AVA1",
"android":"R(Android 11)",
"known_passcode":false,
"afu":false,
"bfu":false,
"bruteforce":false
},
{
"manufacturer":"Samsung",
"device":"Galaxy A32 5G",
"model":"SM-A326U",
"chipset":"Mediatek MT6853V/NZA",
"date":"2021-03-01",
"fw_id":"A326USQU1AUD4",
"android":"R(Android 11)",
"known_passcode":true,
"afu":false,
"bfu":true,
"bruteforce":true
},
{
"manufacturer":"Samsung",
"device":"Galaxy A32 5G",
"model":"SM-A326U1",
"chipset":"Mediatek MT6853V/NZA",
"date":"2021-09-01",
"fw_id":"A326U1UEU5AUJ2",
"android":"R(Android 11)",
"known_passcode":true,
"afu":false,
"bfu":true,
"bruteforce":true
},
{
"manufacturer":"LGE",
"device":"LG K31",
"model":"LGL355DL",
"chipset":"Mediatek MT6762",
"date":"unknown",
"fw_id":"L355DL10l",
"android":"unknown",
"known_passcode":false,
"afu":false,
"bfu":false,
"bruteforce":false
}
]
This needs to be organized so that data points like manufacturer, device, model are not duplicated hundreds of times.
Btw, here's a JSFiddle to play with: https://jsfiddle.net/xpancom/Lq7duahv/
Ideally, the JSON format would be the following:
[
{
"manufacturers": [
{
"manufacturer": "Samsung",
"devices": [
{
"device": "Galaxy A32 5G",
"models": [
{
"model": "SM-A326B",
"data": [
{
"chipset": "Mediatek MT6853V/NZA",
"date": "2022-01-01",
"fw_id": "A326BXXS4AVA1",
"android": "R(Android 11)",
"known_passcode": false,
"afu": false,
"bfu": false,
"bruteforce": false
},
{
"chipset": "Mediatek MT6853V/NZA",
"date": "2021-09-01",
"fw_id": "A326BXXU3AUH7",
"android": "R(Android 11)",
"known_passcode": true,
"afu": false,
"bfu": true,
"bruteforce": true
}
]
},
{
"model": "SM-A326U1",
"data": [
{
"chipset": "Mediatek MT6853V/NZA",
"date": "2021-09-01",
"fw_id": "A326U1UEU5AUJ2",
"android": "R(Android 11)",
"known_passcode": true,
"afu": false,
"bfu": true,
"bruteforce": true
}
]
}
]
}
]
},
{
"manufacturer": "LGE",
"devices": [
{
"device": "LG K31",
"models": [
{
"model": "SM-A326B",
"data": [
{
"chipset": "Mediatek MT6762",
"date": "unknown",
"fw_id": "L355DL10l",
"android": "unknown",
"known_passcode": false,
"afu": false,
"bfu": false,
"bruteforce": false
}
]
}
]
}
]
}
]
}
]
Working in React, here's what I've got so far in trying to massage this data:
const source = data;
const destination = [];
const classifiedTokens = []; // will be used to stored already classified tokens
const classifiedTokensModel = []; // will be used to stored already classified tokens for models
const getNextTokenArray = (source) => {
let unusedToken = null;
const nextTokenArray = source.filter(function (element) {
if (!unusedToken && !classifiedTokens.includes(element['device'])) {
unusedToken = element['device'];
classifiedTokens.push(unusedToken);
}
return unusedToken ? unusedToken === element['device'] : false;
});
return unusedToken ? nextTokenArray : null;
};
// Pass in arrays deconstructed from addToDestination to process third tier nested objects for models
const getNextTokenArrayModel = (tokenObject) => {
let tokenObjectDevice = tokenObject['device'];
let tokenObjectData = tokenObject['data'];
let unusedTokenModel = null;
const nextTokenArrayModel = tokenObjectData.filter(function (element) {
if (!unusedTokenModel && !classifiedTokensModel.includes(element['model'])) {
unusedTokenModel = element['model'];
classifiedTokensModel.push(unusedTokenModel);
}
return unusedTokenModel ? unusedTokenModel === element['model'] : false;
});
//return unusedTokenModel ? nextTokenArrayModel : null;
if (unusedTokenModel) {
if (nextTokenArrayModel.length === 0) return;
let res = {
device: tokenObjectDevice,
model: nextTokenArrayModel[0]['model'],
data: [],
};
nextTokenArrayModel.forEach((element) => {
res.data.push({
manufacturer: element.manufacturer,
chipset: element.chipset,
date: element.date,
fw_id: element.fw_id,
android: element.android,
knownPasscode: element.knownPasscode,
afu: element.afu,
bfu: element.bfu,
bruteforce: element.bruteforce,
});
});
destination.push(res);
} else {
return null;
}
};
const addToDestination = (tokenArray) => {
if (tokenArray.length === 0) return;
let res = {
device: tokenArray[0]['device'],
data: [],
};
tokenArray.forEach((element) => {
res.data.push({
manufacturer: element.manufacturer,
model: element.model,
chipset: element.chipset,
date: element.date,
fw_id: element.fw_id,
android: element.android,
knownPasscode: element.knownPasscode,
afu: element.afu,
bfu: element.bfu,
bruteforce: element.bruteforce,
});
});
getNextTokenArrayModel(res); // Call this to process and group nested model duplicates by device
//destination.push(res);
};
let nextTokenArray = getNextTokenArray(source);
while (nextTokenArray) {
addToDestination(nextTokenArray);
nextTokenArray = getNextTokenArray(source);
}
setTimeout(() => {
document.getElementById('root').innerHTML =
'<pre>' JSON.stringify(destination, null, 2) '</pre>';
}, 1000);
};
And here's the JSFiddle again: https://jsfiddle.net/xpancom/Lq7duahv/
Who can smash this data formatting dilemma?
CodePudding user response:
This answer is not React specific, but one approach would be to use array.reduce() to transform each level/node of the structure as shown in the code snippet below.
const source = [
{
manufacturer: 'Samsung',
device: 'Galaxy A32 5G',
model: 'SM-A326B',
chipset: 'Mediatek MT6853V/NZA',
date: '2022-01-01',
fw_id: 'A326BXXS4AVA1',
android: 'R(Android 11)',
known_passcode: false,
afu: false,
bfu: false,
bruteforce: false,
},
{
manufacturer: 'Samsung',
device: 'Galaxy A32 5G',
model: 'SM-A326B',
chipset: 'Mediatek MT6853V/NZA',
date: '2022-01-01',
fw_id: 'A326BXXS4AVA1',
android: 'R(Android 11)',
known_passcode: false,
afu: false,
bfu: false,
bruteforce: false,
},
{
manufacturer: 'Samsung',
device: 'Galaxy A32 5G',
model: 'SM-A326U',
chipset: 'Mediatek MT6853V/NZA',
date: '2021-03-01',
fw_id: 'A326USQU1AUD4',
android: 'R(Android 11)',
known_passcode: true,
afu: false,
bfu: true,
bruteforce: true,
},
{
manufacturer: 'Samsung',
device: 'Galaxy A32 5G',
model: 'SM-A326U1',
chipset: 'Mediatek MT6853V/NZA',
date: '2021-09-01',
fw_id: 'A326U1UEU5AUJ2',
android: 'R(Android 11)',
known_passcode: true,
afu: false,
bfu: true,
bruteforce: true,
},
{
manufacturer: 'LGE',
device: 'LG K31',
model: 'LGL355DL',
chipset: 'Mediatek MT6762',
date: 'unknown',
fw_id: 'L355DL10l',
android: 'unknown',
known_passcode: false,
afu: false,
bfu: false,
bruteforce: false,
},
];
function generateTree(data, key) {
return data.reduce((acc, val) => {
// Split the key name from the child data
const { [key.name]: keyName, ...childData } = val;
// Find a tree item in the structure being generated
const treeItem = acc.find((item) => item[key.name] === keyName);
if (treeItem) {
// If found, append child data
treeItem[key.child].push(childData);
} else {
// If not found, create new key and append child data
acc.push({ [key.name]: keyName, [key.child]: [childData] });
}
return acc;
}, []);
}
// Generate manufacturer/device structure
const manufacturers = generateTree(source, {
name: 'manufacturer', // Key name to use as grouping identifier
child: 'devices', // Key name for child data
});
// Generate device/model structure
manufacturers.forEach((manufacturer) => {
manufacturer.devices = generateTree(manufacturer.devices, {
name: 'device',
child: 'models',
});
// Generate model/data structure
manufacturer.devices.forEach((device) => {
device.models = generateTree(device.models, {
name: 'model',
child: 'data',
});
});
});
const destination = [{ manufacturers }];
console.log(destination);