I have a CSV file I am trying to parse, however some of the entries contain "text,seperated,by,commas" This is causing an issue when parsing, and therefore I need to work out a way to replace only the commas inside quotation marks, and not the actual delimiter. Im guessing the best way to do this is regex and replaceAll? And just replace all of the selected with another character? Thanks
CodePudding user response:
The approach I like to use here is a regex replacement with a callback function. Here we are targeting only doubly quoted terms, which then undergo a second replacement to remove commas.
var input = "Hello world \"text,seperated,by,commas\" goodbye world.";
var output = input.replace(/".*?"/g, (x) => x.replace(/,/g, " "));
console.log(output);
CodePudding user response:
Data in CSV format needs to be parsed properly. A cell value may contain special chars: commas, quotes, newlines. Cells containing these special chars need to be escaped, and enclosed in quotes. Read the details at https://en.wikipedia.org/wiki/Comma-separated_values
There are many packages to parse CSV data. Here is one I wrote: https://github.com/peterthoeny/parse-csv-js
Here is a simplified version of that package that properly parses CSV for above mentioned special chars, and assumes ,
to separate cells, and \n
newline to separate rows (where newlines in escaped cells are excluded). The output is an array of arrays, e.g. grod, which is easy to traverse and use.
function parseCsv(csv) {
var nSep = '\x1D'; var nSepRe = new RegExp(nSep, 'g');
var qSep = '\x1E'; var qSepRe = new RegExp(qSep, 'g');
var cSep = '\x1F'; var cSepRe = new RegExp(cSep, 'g');
var fieldRe = new RegExp('(?<=(^|[,\\n]))"(|[\\s\\S] ?(?<![^"]"))"(?=($|[,\\n]))', 'g');
var grid = [];
csv.replace(/\r/g, '').replace(/\n $/, '').replace(fieldRe, function(match, p1, p2) {
return p2.replace(/\n/g, nSep).replace(/""/g, qSep).replace(/,/g, cSep);
}).split(/\n/).forEach(function(line) {
var row = line.split(',').map(function(cell) {
return cell.replace(nSepRe, '\n').replace(qSepRe, '"').replace(cSepRe, ',');
});
grid.push(row);
});
return grid;
}
const csv = 'A1,B1,C1\n"A ""2""","B, 2","C\n2"';
var grid = parseCsv(csv);
console.log(grid);
// expected: [ [ 'A1', 'B1', 'C1' ], [ 'A "2"', 'B, 2', 'C <br /> 2' ] ]
Output:
[
[
"A1",
"B1",
"C1"
],
[
"A \"2\"",
"B, 2",
"C\n2"
]
]