I am trying to convert an xlsx file to csv by doing the following:
public static bool saveAsCsv(string excelFilePath, string destinationCsvPath)
{
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
using (var stream = new FileStream(excelFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
{
IExcelDataReader? reader = null;
if (excelFilePath.EndsWith(".xls"))
{
reader = ExcelReaderFactory.CreateBinaryReader(stream);
}
else if (excelFilePath.EndsWith(".xlsx"))
{
reader = ExcelReaderFactory.CreateOpenXmlReader(stream);
}
if (reader == null)
return false;
var ds = reader.AsDataSet(new ExcelDataSetConfiguration()
{
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
UseHeaderRow = false
}
});
var csvContent = string.Empty;
int row_no = 0;
while (row_no < ds.Tables[0].Rows.Count)
{
var arr = new List<string>();
for (int i = 0; i < ds.Tables[0].Columns.Count; i )
{
#pragma warning disable CS8604 // Possible null reference argument.
arr.Add(ds.Tables[0].Rows[row_no][i].ToString());
#pragma warning restore CS8604 // Possible null reference argument.
}
row_no ;
csvContent = string.Join(",", arr) "\n";
}
StreamWriter csv = new StreamWriter(destinationCsvPath, false);
csv.Write(csvContent);
csv.Close();
return true;
}
}
This does work, however, the issue I am having is that when it reaches a field that has 2 values separated by commas, it counts them as 2 separate fields.
So for example:
Instead of having:
Test A, test B in 1 field it has them in separate fields, what do I need to change so that this doesn't happen?
CodePudding user response:
Wrap the values in quotes and escape any quotes in the values with another quote:
csvContent = string.Join(",", arr.Select(s => $"\"{s.Replace("\"", "\"\"")}\"") "\n";
Alternatively, leave that line as it was and make the modifications when adding the values to arr
:
arr.Add($"\"{ds.Tables[0].Rows[row_no][i].ToString().Replace("\"", "\"\"")}\"");
You could streamline that code quite a bit, once you have your DataSet
:
var table = ds.Tables[0];
var lines = table.Rows
.Cast<DataRow>()
.Select(dr => string.Join(",",
dr.ItemArray
.Select(o => $"\"{o.ToString().Replace("\"", "\"\"")}\"")));
File.WriteAllLines(destinationCsvPath, lines);
Note that you can replace table.Rows.Cast<DataRow>
with table.AsEnumerable()
.