Home > other >  Write UTF-16 encoded CSV using Golang
Write UTF-16 encoded CSV using Golang

Time:08-30

I need to write a UTF-16 conded csv and I can't figure out how to. I found a lot of questions and answers about reading UTF-16 csvs but nothing about writing.

This is what I've tried so far:

package main

import (
    "encoding/csv"
    "fmt"
    "os"

    "golang.org/x/text/encoding/unicode"

)

func main() {
    csvFile, err := os.Create("test.csv")
    if err != nil {
        panic(err)
    }
    defer csvFile.Close()

    message := "weird characters: дгодг"

    message, err = convertUtf8ToUtf16LE(message)
    if err != nil {
        panic(err)
    }
    fmt.Println(message)

    csvWriter := csv.NewWriter(csvFile)
    defer csvWriter.Flush()

    csvWriter.Write([]string{message})
    csvWriter.Write([]string{message})
}

func convertUtf8ToUtf16LE(message string) (string, error) {
    encoder := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewEncoder()
    return encoder.String(message)
}

But I get the following output in the csv:

weird characters: дгодг*矾攀椀爀搀 挀栀愀爀愀挀琀攀爀猀㨀 㐀㌄㸄㐄㌄਄

What am I doing wrong?

CodePudding user response:

What I ended up doing is I created a struct that implements io.Writer for a file but converts the input to UTF-16LE before writing:

type UTF16LEWriter struct {
    file    *os.File
    encoder *encoding.Encoder
}

func NewUTF16LEWriter(file *os.File) (*UTF16LEWriter, error) {
    _, err := file.Write([]byte{0xFF, 0xFE}) // UTF-16LE BOM
    if err != nil {
        return &UTF16LEWriter{}, err
    }

    return &UTF16LEWriter{
        file:    file,
        encoder: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewEncoder(),
    }, nil
}

func (w *UTF16LEWriter) Write(b []byte) (int, error) {
    b, err := w.encoder.Bytes(b)
    if err != nil {
        return 0, err
    }
    w.file.Write(b)
    return len(b), err
}

Then I only need to replace the io.Writer provided by os.Create with mine and pass that to the csvWriter:

package main

import (
    "encoding/csv"
    "fmt"
    "os"

    "golang.org/x/text/encoding/unicode"

)

func main() {
    csvFile, err := os.Create("test.csv")
    if err != nil {
        panic(err)
    }
    defer csvFile.Close()

    utf16Writer, err := NewUTF16LEWriter(csvFile)
    if err != nil {
        panic(err)
    }

    csvWriter := csv.NewWriter(utf16Writer)
    defer csvWriter.Flush()

    message := "weird characters: дгодг"
    csvWriter.Write([]string{message})
    csvWriter.Write([]string{message})
}
  • Related