So I have this really big file (3GB ) on a server that contains a list of SMILES that users would need to access.
Example:
smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030
But often users will only want a small sample of the file and not the entire thing so I need a way to send just the first n lines of the file in a fast and efficient manner (i.e. not having to create a new file to send).
The way I am doing it now is to create a new file containing just those n lines and then sending it:
# Open our current file (3GB )
with open(old_file_path, "r") as old_file:
# Open our new file and write the n lines to it
with open(new_file_path, "w") as reduced_smile_file:
# Write lines to new file
for _ in range(quantity):
# Read and store the line
line = smile_file.readline()
# Check if the line is empty
# This happend when lines in the smile file are less than requested
if len(line) == 0:
break
# Write to the file
reduced_smile_file.write(line)
# Open the newly created file with the n lines
f = open(new_file_path, 'rb')
# Send the new file
response = flask.send_file(f, as_attachment=True, download_name="smile.csv")
# Delete the new file
os.remove(new_file_path)
return response
So basically what I want to know is:
Is it possible to send only the first n lines of a file via Flask?
Thank you in advance!
CodePudding user response:
I think you could use generator
which will use for
-loop to yield
only few first lines and after loop it will automaticaly close connection.
Doc: Streaming Contents
Minimal working example
I use io.StringIO
to simulate file but you could use open()
,close()
If you connect http://127.0.0.1:5000/3 then you should get only 3 lines.
from flask import Flask
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(quantity):
f = io.StringIO(data)
#f = open(filename, 'r')
for _ in range(quantity):
yield f.readline()
#f.close()
@app.route('/')
@app.route('/<int:number>')
def index(number=1):
response = app.response_class(generate(number), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
EDIT:
Version in which you can run /end
or /start/end
from flask import Flask, send_file
import io
data = '''smile id
0 CC(C)C(=O)Nc1nc2c(ncn2CC(=O)N2[C@@H]3CC[C@H]2C[C@@H](NC(=O)c2cnc[nH]2)C3)c(O)n1 ZINC001801458702
1 O=C(c1ccc(O)c([N ](=O)[O-])c1)N1CC[C@@]2(C1)CN(C(=O)[C@@H]1CC(=O)N(C3CCCC3)C1)CCO2 ZINC001781539777
2 C[C@@H]1CCc2c(C(=O)Nc3cc([C@@H]4CCCCN4C(=O)c4ccc5c(n4)NC(=O)CC5)[nH]n3)n[nH]c21 ZINC001818636963
3 O=C(CN1C(=O)C=CC1=O)N1CCC2(CCCN(C(=O)[C@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092425
4 NC(=O)c1nccnc1C(=O)N1CCC2(CCCN(C(=O)[C@@H]3CCc4nc(O)nc(O)c4C3)C2)CC1 ZINC001807092030'''
app = Flask(__name__)
def generate(start, end):
f = io.StringIO(data)
#f = open(filename, 'r')
# skip lines
for _ in range(start):
f.readline()
for i in range(end-start):
yield f.readline()
@app.route('/')
@app.route('/<int:end>')
@app.route('/<int:start>/<int:end>')
def index(start=0, end=1):
response = app.response_class(generate(start, end), mimetype='text/csv')
response.headers['Content-Disposition'] = 'attachment; filename="smile.csv"'
return response
if __name__ == '__main__':
#app.debug = True
app.run()
You can set filename as headers in response_class
def index(start=0, end=1):
headers = {'Content-Disposition': 'attachment; filename="smile2.csv"'}
response = app.response_class(generate(start, end), headers=headers, mimetype='text/csv')
return response
EDIT:
If you would have positions of lines (offset in file) then you could use read(position)
instead of for
-loop with readline()
- and it would send bytes from beginnig of file to position