Home > Software design >  Sending n number of lines of a large file via Flask?
Sending n number of lines of a large file via Flask?

Time:03-14

So I have this really big file (3GB ) on a server that contains a list of SMILES that users would need to access.

Example:

smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030

But often users will only want a small sample of the file and not the entire thing so I need a way to send just the first n lines of the file in a fast and efficient manner (i.e. not having to create a new file to send).

The way I am doing it now is to create a new file containing just those n lines and then sending it:

# Open our current file (3GB )
with open(old_file_path, "r") as old_file:
    # Open our new file and write the n lines to it
    with open(new_file_path, "w") as reduced_smile_file:
        # Write lines to new file
        for _ in range(quantity):
            # Read and store the line
            line = smile_file.readline()

            # Check if the line is empty
            # This happend when lines in the smile file are less than requested
            if len(line) == 0:
                break

            # Write to the file
            reduced_smile_file.write(line)

# Open the newly created file with the n lines
f = open(new_file_path, 'rb')

# Send the new file
response = flask.send_file(f, as_attachment=True, download_name="smile.csv")

# Delete the new file
os.remove(new_file_path)
return response

So basically what I want to know is:

Is it possible to send only the first n lines of a file via Flask?

Thank you in advance!

CodePudding user response:

I think you could use generator which will use for-loop to yield only few first lines and after loop it will automaticaly close connection.

Doc: Streaming Contents


Minimal working example

I use io.StringIO to simulate file but you could use open(),close()

If you connect http://127.0.0.1:5000/3 then you should get only 3 lines.

from flask import Flask
import io

data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''

app = Flask(__name__)

def generate(quantity):
    f = io.StringIO(data)
    #f = open(filename, 'r')
    
    for _ in range(quantity):
        yield f.readline()

    #f.close()

@app.route('/')
@app.route('/<int:number>')
def index(number=1):

    response = app.response_class(generate(number), mimetype='text/csv')
    response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'

    return response

if __name__ == '__main__':
    #app.debug = True 
    app.run()  

EDIT:

Version in which you can run /end or /start/end

from flask import Flask, send_file
import io

data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''

app = Flask(__name__)

def generate(start, end):
    f = io.StringIO(data)
    #f = open(filename, 'r')
    
    # skip lines
    for _ in range(start):
        f.readline()
        
    for i in range(end-start):
        yield f.readline()

@app.route('/')
@app.route('/<int:end>')
@app.route('/<int:start>/<int:end>')
def index(start=0, end=1):

    response = app.response_class(generate(start, end), mimetype='text/csv')
    response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'

    return response

if __name__ == '__main__':
    #app.debug = True 
    app.run()  

You can set filename as headers in response_class

def index(start=0, end=1):

    headers = {'Content-Disposition': 'attachment; filename="smile2.csv"'}
    response = app.response_class(generate(start, end), headers=headers, mimetype='text/csv')
    
    return response

EDIT:

If you would have positions of lines (offset in file) then you could use read(position) instead of for-loop with readline() - and it would send bytes from beginnig of file to position

  • Related