for the past few months i've been running a socketio server for my multiplayer game.
it seems to work alright, only after a day or two to crash with these errors :
[2024-06-18 14:55:02 +0300] [250979] [INFO] Handling signal: int[2024-06-18 14:55:02 +0300] [250981] [ERROR] Exception in worker processTraceback (most recent call last): File "/usr/lib/python3.8/ssl.py", line 1019, in _create self.getpeername()OSError: [Errno 107] Transport endpoint is not connectedDuring handling of the above exception, another exception occurred:Traceback (most recent call last): File "/usr/local/lib/python3.8/dist-packages/gunicorn/arbiter.py", line 609, in spawn_worker worker.init_process() File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/geventlet.py", line 143, in init_process super().init_process() File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/base.py", line 142, in init_process self.run() File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/geventlet.py", line 182, in run a.wait() File "/usr/local/lib/python3.8/dist-packages/eventlet/greenthread.py", line 225, in wait return self._exit_event.wait() File "/usr/local/lib/python3.8/dist-packages/eventlet/event.py", line 131, in wait current.throw(*self._exc) File "/usr/local/lib/python3.8/dist-packages/eventlet/greenthread.py", line 265, in main result = function(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/geventlet.py", line 83, in _eventlet_serve conn, addr = sock.accept() File "/usr/local/lib/python3.8/dist-packages/eventlet/greenio/base.py", line 228, in accept self._trampoline(fd, read=True, timeout=self.gettimeout(), timeout_exc=_timeout_exc) File "/usr/local/lib/python3.8/dist-packages/eventlet/greenio/base.py", line 206, in _trampoline return trampoline(fd, read=read, write=write, timeout=timeout, File "/usr/local/lib/python3.8/dist-packages/eventlet/hubs/__init__.py", line 157, in trampoline return hub.switch() File "/usr/local/lib/python3.8/dist-packages/eventlet/hubs/hub.py", line 310, in switch return self.greenlet.switch() File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/geventlet.py", line 102, in _eventlet_stop client.wait() File "/usr/local/lib/python3.8/dist-packages/eventlet/greenthread.py", line 225, in wait return self._exit_event.wait() File "/usr/local/lib/python3.8/dist-packages/eventlet/event.py", line 131, in wait current.throw(*self._exc) File "/usr/local/lib/python3.8/dist-packages/eventlet/greenthread.py", line 265, in main result = function(*args, **kwargs) File "/usr/local/lib/python3.8/dist-packages/gunicorn/workers/geventlet.py", line 156, in handle client = ssl_wrap_socket(client, self.cfg) File "/usr/local/lib/python3.8/dist-packages/gunicorn/sock.py", line 229, in ssl_wrap_socket return ssl_context(conf).wrap_socket(sock, File "/usr/local/lib/python3.8/dist-packages/eventlet/green/ssl.py", line 436, in wrap_socket return GreenSSLSocket(sock, *a, _context=self, **kw) File "/usr/local/lib/python3.8/dist-packages/eventlet/green/ssl.py", line 65, in __new__ ret = _original_sslsocket._create( File "/usr/lib/python3.8/ssl.py", line 1031, in _create notconn_pre_handshake_data = self.recv(1) File "/usr/lib/python3.8/ssl.py", line 1257, in recv return super().recv(buflen, flags)ConnectionResetError: [Errno 104] Connection reset by peer
and here is the server code :
from flask import Flask, request, jsonify, render_templateimport uuidimport sslimport timefrom flask_socketio import SocketIOfrom threading import Lockimport loggingapp = Flask(__name__)socketio = SocketIO(app, host='0.0.0.0', port=5000, cors_allowed_origins='*') # Allows connections from any IP addressapplications = {}#app.logger.setLevel(logging.DEBUG)logging.basicConfig(level=logging.INFO)logger = logging.getLogger(__name__)"""context = SSL.Context(SSL.TLSv1_2_METHOD)context.use_privatekey_file('/etc/ssl/private/private-unencrypted.key')context.use_certificate_file('/etc/ssl/certificate.crt')context.check_privatekey() # Optional: Check if the key matches the certificate"""context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)context.load_cert_chain('/etc/ssl/chained.pem', '/etc/ssl/private/private-unencrypted.key')connected_clients = {}@socketio.on('connect')def handle_connect(): print("Connection established!")@socketio.on('MakeRoom') def make_room(data): splitted = data.split('/') aplication_id = splitted[0] loby_id = splitted[1] hosted = int(splitted[2]) user_id = uuid.uuid4().hex client_id = request.sid if(hosted == 0): print("peer connecting") check_lobbies_periodically() host_id,host_client_id = find_host(aplication_id,loby_id) if(host_client_id == None): socketio.emit("HostNotFound","",room = client_id) print("Host not found for user") return #print(f"host_client_id : {host_client_id}" ) socketio.emit('PeerConnected',user_id,room=host_client_id) else: print(f"Created host : {client_id}") connected_clients[(aplication_id, loby_id,hosted,user_id)] = client_id print(f"got data : {aplication_id} + {loby_id} + {client_id}")@socketio.on('SendOffer')def send_offer(data): splitted = data.split(';/;/;/') aplication_id = splitted[0] loby_id = splitted[1] guid = splitted[2] sdp = splitted[3] client_id = get_client_id(aplication_id,loby_id,guid) myguid = get_guid(aplication_id,loby_id,request.sid) print(f"sending Offer from {request.sid}|{myguid} to {client_id}|{guid}") if client_id: message = sdp +";;//" + myguid socketio.emit('OfferReceived', message, room=client_id) return f'Message sent to Unity client with Application ID: {aplication_id} and Lobby ID: {loby_id}' else: return 'Client not found for the given IDs in send_offer'@socketio.on('SendICE')def send_ice(data): #print("send_ice : " + data) splitted = data.split(';/;/;/') aplication_id = splitted[0] loby_id = splitted[1] guid = splitted[2] ice = splitted[3] client_id = get_client_id(aplication_id,loby_id,guid) myguid = get_guid(aplication_id,loby_id,request.sid) print(f"sending ice from {request.sid}|{myguid} to {client_id}|{guid}") if client_id: message = ice +";;//" + myguid socketio.emit('IceGot', message, room=client_id) return f'Message sent to Unity client with Application ID: {aplication_id} and Lobby ID: {loby_id}' else: return 'Client not found for the given IDs in send_ice'clients_lock = Lock()@socketio.on('disconnect')def handle_disconnect(): print("Received disconnect") sid = request.sid with clients_lock: for (app_id, lobby_id, host, user_id), client_id in connected_clients.copy().items(): if client_id == sid: try: if host == 1: host_disconnected(app_id, lobby_id) del connected_clients[(app_id, lobby_id, host, user_id)] except KeyError: print("user not found in dictionary") pass breakdef host_disconnected(appId,lobbyId): clients_to_disconnect = [] for key, client_id in connected_clients.copy().items(): app_id, lobby_id, hosted, user_id = key if app_id == appId and lobbyId == lobby_id and hosted == 0: clients_to_disconnect.append(client_id) for client_id in clients_to_disconnect: socketio.emit('HostDisconnected', "", room=client_id) #socketio.disconnect(client_id)@socketio.on('SendAnswer')def handle_post_request(data): #print("Sending answer:" + data) splitted = data.split(';/;/;/') aplication_id = splitted[0] loby_id = splitted[1] guid = splitted[2] sdp = splitted[3] host_id, client_id = find_host(aplication_id,loby_id) if(client_id == None): socketio.emit("HostNotFound","",room = client_id) print("Host not found for user on answer creation") return myguid = get_guid(aplication_id,loby_id,request.sid) print(f"sending answer from {request.sid}|{myguid} to {client_id}|{guid}") message = sdp +";/;/;/" + myguid socketio.emit('AnswerCreated', message, room=client_id)def get_guid(application_id, lobby_id, client_id): for key, client_ids in connected_clients.items(): if key[0] == application_id and key[1] == lobby_id and client_ids == client_id: return key[3] return Nonedef get_client_id(application_id, lobby_id, user_id): for key, client_id in connected_clients.items(): if key[0] == application_id and key[1] == lobby_id and key[3] == user_id: return client_id return Nonedef find_host(application_id,lobby_id): for key, client_id in connected_clients.items(): print(f"searching for host keys : {key[0]} == {application_id} {key[1]} == {lobby_id} and host == {key[2]}") if key[0] == application_id and key[1] == lobby_id and key[2] == 1: user_id = key[3] return user_id, client_id return None, None# Function to add a new applicationdef add_application(application_id): if application_id not in applications: applications[application_id] = []# Function to add a lobby to an applicationdef add_lobby(application_id, lobby_id, lobby_info): if application_id in applications: applications[application_id].append({"lobby_id": lobby_id, "lobby_info": lobby_info, "last_update": int(time.time())}) return 1 else: return 2# Function to delete a lobby from an applicationdef delete_lobby(application_id, lobby_id): if application_id in applications: lobbies = applications[application_id] for lobby in lobbies: if lobby["lobby_id"] == lobby_id: lobbies.remove(lobby) return 0 return 1 else: return 2# Function to update lobby informationdef update_lobby(application_id, lobby_id, new_lobby_info): if application_id in applications: lobbies = applications[application_id] for lobby in lobbies: if lobby["lobby_id"] == lobby_id: lobby["lobby_info"] = new_lobby_info return 0 return 1 else: return 2@app.route('/', methods=['GET','POST'])def create_or_update_lobby(): Action = -1 try: Action = int(request.form.get('Action')) except: form_data = request.form.to_dict() # Print the form data to the console (for debugging) print(form_data) print("Received request form without Action key : {form_data)}" ) if(Action != -1): application_id = request.form.get('application_id') add_application(application_id) lobby_data = request.form.get('lobby_data') if Action == 0: lobby_id = uuid.uuid4().hex retValue = add_lobby(application_id,lobby_id,lobby_data) if retValue == 2: return "Application not found", 500 if retValue == 1: return jsonify({'lobby_id': lobby_id}), 200 lobby_id = request.form.get('lobby_id') if Action == 1: retValue = update_lobby(application_id,lobby_id,lobby_data) if retValue == 2: return "Application not found", 500 if retValue == 1: return f"Lobby {lobby_id} not found in application {application_id}.", 500 if retValue == 0: return f"Lobby {lobby_id} updated.", 200 elif Action == 2: retValue = delete_lobby(application_id,lobby_id) if retValue == 2: return "Application not found", 500 if retValue == 1: return f"Lobby {lobby_id} not found in application {application_id}.", 500 if retValue == 0: return f"Lobby {lobby_id} deleted.", 200 elif Action == 3: return jsonify(get_lobbies(application_id)), 200 elif Action == 4: found = 0 if application_id in applications: lobbies = applications[application_id] for lobby in lobbies: #print(f"Comparing : --{lobby['lobby_id']}-- with --{lobby_id}--") #print(f"types : {type(lobby['lobby_id']), {lobby_id}}") if lobby['lobby_id'] == lobby_id: lobby["last_update"] = int(time.time()) found = 1 break if found == 1: return f"Lobby {lobby_id} update time updated", 200 elif found == 0 : return f"Lobby with id {lobby_id} not found", 580 else: return f"Action key not found in arguments", 400 def get_lobbies(application_id): if application_id in applications: lobbies = applications[application_id] simplified_lobbies = [{"lobby_id": lobby["lobby_id"], "lobby_info": lobby["lobby_info"]} for lobby in lobbies] return simplified_lobbies else: return []def print_lobbies_periodically(): while True: print("All Lobbies:") for application_id, lobbies in applications.items(): print(f"Application ID: {application_id}") for lobby in lobbies: print(f"Lobby ID: {lobby['lobby_id']}, Lobby Info: {lobby['lobby_info']}, Last Update: {lobby['last_update']}") time.sleep(5) def check_lobbies_periodically(): currentTime = int(time.time()) for application_id, lobbies in applications.items(): for lobby in lobbies: if(currentTime - int(lobby['last_update']) > 15): print(f"Lobby in application : {application_id} with id : {lobby['lobby_id']} timed out") delete_lobby(application_id,lobby['lobby_id'])
I'm running it with gunicorn, but the same used to hapen when i ran it like a normal flask app. This is on ubuntu.
once the process is stopped and restarted, it works again fine, only to repeat after a few days again.
I've tried running the whole thing with gunicorn using this command :gunicorn --workers 1 --worker-class eventlet --bind 0.0.0.0:5000 --certfile=/etc/ssl/chained.pem --keyfile=/etc/ssl/private/private-unencrypted.key --error-logfile ./error.txt --access-logfile ./access.txt RequestManager:app
I've tried logging when the crash happens, seems to be on disconnection, but really not sure, cant pin point exactly.
Tried a few other things, basically ran out of ideas, nothing seems to fix it.