Home > OS >  How to overwrite file with sequential chunks in Golang
How to overwrite file with sequential chunks in Golang

Time:04-01

How to read a large file by chunking it and process each chunk sequentially then overwrite the resulted chunk to where it exactly came from(the same position or offset of file).

e.g: i want to read 1 GB file with 4096 bytes chunk do something with it like removing the special characters(!@#$...) then, replace result with the original content and, go to the next 4096 chunk to reach the end of file.

I don't want to load all the file into memory, the order and offset of chunks is very matter and the main problem is with sequential read and overwrite chunk from the same file.


What i've just done:

func main(){
    file,err := os.Open("file.xt")
    if err != nil {
        log.Println(err)
    }
    chunkSize := 4096
    current := make([]byte, chunkSize)

    for {
        // read the file in 4096 bytes of chunk
        _, err := file.Read(current)
        if err != nil{
            if err == io.EOF {
                break
            }
            log.Fatal(err)
        }
        
        // 
        processedChunk := process(current)
        
        // we open the same file again with O_APPEND for overwriting the content, right?
        file2, err := os.OpenFile("file.txt", os.O_WRONLY|os.O_APPEND, os.ModePerm)
        if err != nil {
            log.Println(err)
        }
        
        // How to go ahead here with overwriting the processedChunk with currentChunk?
    }
}

func process(data []byte) []byte{
    // do something with the chunk
    return data
}

CodePudding user response:

Simply open the file in read-write mode, and use File.WriteAt() to write back the modified slice.

Note that File.Read() might not fill the full slice, especially if you're at the end of the file (and there's no more data), so store and use the number of read bytes it returns:

n, err := file.Read(current)
// ...
processedChunk := process(current[:n])

And don't forget to close the file!

Here's the complete solution:

file, err := os.OpenFile("file.txt", os.O_RDWR, 0755)
if err != nil {
    log.Println(err)
}
defer file.Close()

current := make([]byte, 4096)

for pos := int64(0); ; {
    n, err := file.Read(current)
    if err != nil {
        if err == io.EOF {
            break
        }
        log.Fatal(err)
    }

    processedChunk := process(current[:n])
    if _, err := file.WriteAt(processedChunk, pos); err != nil {
        log.Fatal(err)
    }

    pos  = int64(n)
}

CodePudding user response:

package main

import (
    "io"
    "log"
    "os"
)

func main() {
    file, err := os.Open("file.xt")
    if err != nil {
        log.Println(err)
    }
    chunkSize := 4
    current := make([]byte, chunkSize)
    file2, err := os.OpenFile("file.xt", os.O_WRONLY|os.O_CREATE, os.ModePerm)
    if err != nil {
        log.Println(err)
    }
    defer func() {
        file.Close()
        file2.Close()
    }()

    var seeker int64
    for {
        // read the file in 4096 bytes of chunk
        readByteCount, err := file.ReadAt(current, seeker)
        if err != nil {
            if err == io.EOF {
                break
            }
            log.Fatal(err)
        }

        //
        processedChunk := process(current)

        // we open the same file again with O_APPEND for overwriting the content, right?
        _, err = file2.WriteAt(processedChunk, seeker)
        if err != nil {
            log.Println(err)
        }
        seeker = seeker   int64(readByteCount)

        // How to go ahead here with overwriting the processedChunk with currentChunk?
    }
}

func process(data []byte) []byte {
    // do something with the chunk
    var filtered []byte
    for _, char := range data {
        if string(char) != ";" {
            filtered = append(filtered, char)
        }
    }
    return filtered
}

File.WriteAt() allows to write where currently the seeker is. The seeker can be moved to correct location based on bytes read.

  • Related