I am trying to implement a way to send and receive files using the socket library, but when I run the code, I keep getting the error "UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte".
Sender code:
import os, socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect(("localhost", 9999))
with open("image.jpg", "rb") as file:
file_size = os.path.getsize("send_file.txt")
client.send("image.jpg".encode())
client.send(str(file_size).encode())
data = file.read()
client.sendall(data)
client.send(b"<DATA_END>")
client.close()
Receiver code:
import socket, tqdm
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(("localhost", 9999))
server.listen()
client, addr = server.accept()
file_name = client.recv(1024).decode()
print(file_name)
file_size = client.recv(1024).decode()
print(file_size)
with open(file_name, "wb") as file:
file_bytes = b""
done = False
progress = tqdm.tqdm(unit="B", unit_scale=True, unit_divisor=1000, total=int(file_size))
while not done:
data = client.recv(1024)
if file_bytes[-10:] == b"<DATA_END>":
done = True
else:
file_bytes = data
progress.update(1024)
file.write(file_bytes)
client.close()
server.close()
CodePudding user response:
You assume that sockets honor send boundaries on receive. But that is not how a stream oriented protocol like TCP works. send
may buffer before sending or split a send across multiple low level data link packets. recv
just takes the data currently available. You need some other mechanism to decide when useful chunks of data have arrived.
A protocol that would work well in your case is to pass a header where each field is separated by newlines. Neither filename or size have a newline, so that's an easy demarcation. The receiver would read up to newline boundaries until it has filename and size, then could read exactly size bytes for the payload.
client.py
import os, socket
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.connect(("localhost", 9999))
filename = "image.jpg"
with open(filename, "rb") as file:
file_size = os.path.getsize(filename)
# protocol <filename>\n<size>\n<data>
client.sendall(filename.encode("utf-8"))
client.sendall(b"\n")
client.sendall(str(file_size).encode("utf-8"))
client.sendall(b"\n")
data = file.read()
client.sendall(data)
client.close()
server.py
def recv_to_newline(s):
buf = []
while True:
c = s.recv(1)
if not len(c):
# socket closed
return None
if c == b"\n":
return b"".join(buf)
buf.append(c)
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.bind(("localhost", 9999))
server.listen()
client, addr = server.accept()
file_name = recv_to_newline(client).decode("utf-8")
print(file_name)
file_size = int(client.recv(1024).decode())
print(file_size)
error = False
with open(file_name, "wb") as file:
progress = tqdm.tqdm(unit="B", unit_scale=True, unit_divisor=1000, total=int(file_size))
while file_size:
data = client.recv(min(1024, file_size))
if not data:
print("Error: Truncated recieve")
error = True
break
file.write(data)
progress.update(len(data))
file_size -= len(data)
if error:
os.remove(file_name)
client.close()
server.close()