Home > database >  How to download a zip file from github repo and read her content in memory?
How to download a zip file from github repo and read her content in memory?

Time:09-13

I have uploaded a zip file compressed with 7-zip option add to .zip containing only a file with the name text.txt into this GitHub repo, how I could read the content of the file text.txt without writing it to disk?

I'm downloading the zip to memory using curl:

    #include <curl/curl.h>

    static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb,
                                      void* userp) {
        size_t realsize = size * nmemb;
        auto& mem = *static_cast<std::string*>(userp);
        mem.append(static_cast<char*>(contents), realsize);
        return realsize;
    }
    
    std::string Download(const std::string& url) 
    {
        CURL* curl_handle;
        CURLcode res;
    
        std::string chunk;
    
        curl_global_init(CURL_GLOBAL_ALL);
    
        curl_handle = curl_easy_init();
        curl_easy_setopt(curl_handle, CURLOPT_URL, url.c_str());
        curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
        curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &chunk);
        curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
    
        // added options that may be required
        curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);  // redirects
        curl_easy_setopt(curl_handle, CURLOPT_HTTPPROXYTUNNEL, 1L); // corp. proxies etc.
        curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L); // we want it all
        // curl_easy_setopt(curl_handle, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
    
        res = curl_easy_perform(curl_handle);
    
        if(res != CURLE_OK) {
            std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res) << '\n';
        } else {
            std::cout << chunk.size() << " bytes retrieved\n";
        }
    
        curl_easy_cleanup(curl_handle);
        curl_global_cleanup();
    
        return chunk;
    }

int _tmain(int argc, _TCHAR* argv[])
{
    std::string link = "https://github.com/R3uan3/test/raw/main/text.zip";
    auto data = Download(link);
}

While searching for a lib capable of uncompressing the zip on memory, I found this one: libzip (any lib is welcome).

Searching for examples I found this answer, however he's loading a zip from disk to memory and reading it content.

How I could read the zip downloaded with curl that is on the string data?

When I visualize the content of data in the debugger it shows PK, I tried passing it to zip *z, but z returns null

        //Open the ZIP archive
        int err = 0;
        zip *z = zip_open(data.c_str(), 0, &err);
    
        //Search for the file of given name
        const char *name = "text.txt";
        struct zip_stat st;
        zip_stat_init(&st);
        zip_stat(z, name, 0, &st);
    
        //Alloc memory for its uncompressed contents
        char *contents = new char[st.size];
    
        //Read the compressed file
        zip_file *f = zip_fopen(z, name, 0);
        zip_fread(f, contents, st.size);
        zip_fclose(f);

CodePudding user response:

I'm ignoring everything about in the question because we've verified that you've got the zip file stored in memory correctly.

How I could read the zip ... that is on the string data?

Since you have the whole file stored in memory, you need to create a zip_source from chunk.data() and open the archive using that zip_source - and then open the individual files in the archive.

Here's how (without error checking - you need to add that):

{
    // ...
    zip_error_t ze; // for errors

    // create a zip_source from the data you've stored in memory
    zip_source_t* zs = zip_source_buffer_create(chunk.data(), chunk.size(), 0, &ze);

    // open the archive from the zip_source
    zip_t* zip = zip_open_from_source(zs, ZIP_CHECKCONS | ZIP_RDONLY, &ze);

    // read how many files you've got in there
    zip_int64_t entries = zip_get_num_entries(zip, 0);

    std::cout << entries << '\n';

    // loop over the entries in the archive
    for(zip_int64_t idx = 0; idx < entries;   idx) {
        std::cout << zip_get_name(zip, idx, ZIP_FL_ENC_STRICT) << '\n';

        // open the file at this index
        zip_file_t* fp = zip_fopen_index(zip, idx, 0);

        // process the file
        zip_int64_t len;
        char buf[1024];
        while((len = zip_fread(fp, buf, sizeof buf)) > 0) {
            std::cout << "read " << len << " bytes\n";
            // do something with the `len` bytes you have in `buf`
        }
        zip_fclose(fp); // close this file
    }
    zip_close(zip); // close the whole archive
}
  • Related