misc/rohttptls.py
2025-09-08 09:42:29 -04:00

378 lines
12 KiB
Python

#!/usr/bin/env python3
## This Blog post is executable python code, it requires the gevent modules to run
# pip as gevent, Debian as python3-gevent.
import os
import sys
import gevent
import gevent.socket as socket
import gevent.ssl as ssl # we must use gevent's ssl module here, see ln 186
import signal
from io import StringIO
from email.utils import formatdate
Fork = False
# Welcome to Part 5.5 of this blog post series that might actually be a
# book. Wherein we shall attempt to implement TLS correctly for this
# our bespoke http implementation. Which if I did my research correctly.
# Should look easy, but figureing out how not to shoot ourselves in
# the foot took almost five hours of research.
# A reminder please don't use this code in production.
# If you want to fork it and make your own mistakes
# after Part VI comes out be my guest, your own misfortune.
# You should have also seen the prologue to the code.
# which advises you to install mkcert, and tells you how to use it
# to get a valid cert/keypair for use in this example.
# If you didn't read it **READ IT NOW**
CRLF = "\r\n"
LF = "\n"
class AccessDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Convert nested dicts to AccessDict
for key, value in self.items():
if isinstance(value, dict) and not isinstance(value, AccessDict):
self[key] = AccessDict(value)
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{key}'"
)
def __setattr__(self, key, value):
# Convert nested dicts to AccessDict
if isinstance(value, dict) and not isinstance(value, AccessDict):
value = AccessDict(value)
self[key] = value
def __delattr__(self, key):
try:
del self[key]
except KeyError:
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{key}'"
)
# These are request and response objects similar to Go's standard
# net/http package. Again this is still a toy implementation.
# In a future part. I will go over how to make a more full featured
# implementation. This is just making us ssl ready and laying the ground work
class HttpRequest(AccessDict):
def __init__(self, method="GET", path="/", headers={}, *args, **kwargs):
super().__init__(*args, **kwargs)
self["method"] = method
self["headers"] = headers
if "body" in kwargs:
self["body"] = StringIO(kwargs["body"])
self["path"] = path
if "Host" not in self["headers"]:
self["headers"]["Host"] = "localhost"
def read(self, seek):
return self["body"].read(seek)
def __str__(self):
buf = StringIO()
buf.write(f"{self.method} {self.path} HTTP/1.1")
for k, v in self["headers"].items():
buf.write(f"{k}: {v}" + CRLF)
buf.write(CRLF)
buf.write(self["body"].getvalue() + CRLF)
return buf.getvalue() + CRLF
class HttpResponse(AccessDict):
def __init__(self, status="400", headers={}, body="goodbye\r\n", *args, **kwargs):
super().__init__(*args, **kwargs)
self["status"] = status
self["headers"] = headers
self["body"] = StringIO()
# We Must have date and host headers set correctly to use tls
# so we unconditionally set them here
if "host" not in kwargs:
self["headers"]["Host"] = "localhost"
else:
self["headers"]["Host"] = kwargs["host"]
self["headers"]["Date"] = formatdate(timeval=None, localtime=False, usegmt=True)
self["headers"]["Content-Type"] = "text/plain; charset=UTF-8"
def write(self, stuff):
return self.body.write(stuff)
# Foreshadowing (n): A literary device in which an author ...
def __str__(self):
buf = StringIO()
print(self.headers)
buf.write(f"HTTP/1.1 {self.status}" + CRLF)
length = len(self["body"].getvalue())
for k, v in self["headers"].items():
buf.write(f"{k}: {v}\r\n")
if "Content-Length" not in self["headers"]:
buf.write(f"Content-Length: {length}\r\n")
buf.write(CRLF) # Per RFC 9112
buf.write(self["body"].getvalue() + CRLF)
return buf.getvalue() + CRLF
RICKROLL_LYRICS = """
We're no strangers to love
You know the rules and so do I
A full commitment's what I'm thinkin' of
You wouldn't get this from any other guy
I just wanna tell you how I'm feeling
Gotta make you understand
Never gonna give you up, never gonna let you down
Never gonna run around and desert you
Never gonna make you cry, never gonna say goodbye
Never gonna tell a lie and hurt you
We've known each other for so long
Your heart's been aching, but you're too shy to say it
Inside, we both know what's been going on
We know the game and we're gonna play it
And if you ask me how I'm feeling
Don't tell me you're too blind to see
Never gonna give you up, never gonna let you down
Never gonna run around and desert you
Never gonna make you cry, never gonna say goodbye
Never gonna tell a lie and hurt you
Never gonna give you up, never gonna let you down
Never gonna run around and desert you
Never gonna make you cry, never gonna say goodbye
Never gonna tell a lie and hurt you
"""
head_response = HttpResponse()
head_response.status = 200
head_response.headers["Content-Length"] = 980
head_response.write("")
good_response = HttpResponse()
good_response.status = 200
good_response.headers["Last-Modified"] = "Mon, 27 July 1987 00:00 GMT"
good_response.write(RICKROLL_LYRICS)
error_response = HttpResponse()
error_response.status = 405 # a 405 here is closer to RFC compliant
error_response.write("Together forever and never to part Together forever we two")
client_procs = []
# commentary on ln CHANGEME
ct_svr_proc = None
ssl_svr_proc = None
# You've Seen all this before, in the last Part 1. I will shorten commentary
class NullDevice:
def write(self, s):
pass
def hup_handle(sig, fr):
sys.exit()
# We've been using gevent all along, but now it's time to say the quiet part
# outloud. Gevent is an alternative concurrency module for python.
# Trying to use gevent and standard python's stuff side by side
# is a quick road to madness. I'll explain more in the Prequal series
# For now it's enough to know the APIs are identical.
# With that out of the way, we come to our first real decision,
# that has security implications.
# We could implement TLS in two ways. Method 1 we have the server.
# listen on two different ports. As http does on 80/443
# the other approach involves encrypting traffic on the port we
# already use. The one port method may seem safer.
# This was the route chosen by the gemini project.
# But for teaching purposes the two port method works better.
# so that's what we'll do.
# This requires a couple of changes, in server handler.
# first we change it's name, and make the corresponding change
# in main, and we'll copy it almost verbatim, and make changes for TLS
def cleartext_server_handler():
serversock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
serversock.bind(("", 1337))
serversock.listen(10)
while True:
client, addr = serversock.accept()
print(addr)
client_procs.append(gevent.spawn(client_handler, client))
gevent.sleep(0.25)
serversock.close()
return
# TLS looks easy, but in practice almost every library for handling it
# in any language you can name is... HOT GARBAGE. Python is not the
# exception, but i find it does have less security foot guns.
# if anything screws up it will crash out with an SSLError
# it's error messages are cryptic.
# But once you've done it properly it looks easy.
# This took about four hours to debug, but i've got it finally
def tls_server_handler():
# the context is sort of like a container for cryptographic settings
# we load the default context, which contains the best default
# settings as reviewed, by the python security people.abs
# this avoids a lot of foot guns
# Note here that the server/client is reversed
# Because we are a server we need the context
# for clients.
ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
# Remember what i said about best default and not shooting
# ourselves in the foot. Well Firefox, does a stupid thing
# during TLS handshake and declares it supports. SSLv3
# Which has been considered hopelessly broken since 2014.
# Mozilla's own security people even said so at the time.
# See References. Anyway Setting the minimum and maximum
# explicitly to TLSv1.2/1.3 avoids this wrongness.
# so we do it
ctx.minimum_version = ssl.TLSVersion.TLSv1_2
ctx.maximum_version = ssl.TLSVersion.TLSv1_3
serversock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
serversock.bind(("", 1972)) # we choose another port number, 1972 foreshadows
# a bit
# Next we load our key and cert
# I'm assuming you used the mkcert method
# if you used the self signed method just place your cert/key
# See comment on ln 277
ctx.load_cert_chain(certfile="cert.pem", keyfile="key.pem")
serversock.listen(10)
while True:
client, addr = serversock.accept()
print(addr)
try:
secure = ctx.wrap_socket(client, server_side=True)
# last step in the process is to wrap the client socket in
# TLS. The SSLContext does this for us.
# but you must pass server_side=True to avoid silly defaults
client_procs.append(gevent.spawn(client_handler, secure))
gevent.sleep(0.25)
# If anything goes wrong here the ssl.SSLError is thrown
# i was originally going to leave it to crash, but
# the browser behavior on pki errors also causes this
# exception. So we will log the error and continue.
except ssl.SSLError as e:
print(e)
gevent.sleep(0.25)
continue
finally:
gevent.sleep(0.25)
serversock.close()
return
# One Change here see ln 325
def client_handler(sock):
print("Client handler spawn")
junk_counter = 0
while True:
if junk_counter > 3:
sock.close()
return
data = sock.recv(4096)
dstring = data.decode("UTF-8")
if dstring.startswith("GET"):
break
elif dstring.startswith("HEAD"):
hr = str(head_response)
sock.send(hr.encode("utf-8"))
sock.shutdown(socket.SHUT_RDWR)
sock.close()
return
else:
error = str(error_response)
sock.send(error.encode("utf-8"))
junk_counter += 1
gevent.sleep(0.25) # this is a somewhat magical value, see Part II
default = str(good_response)
sock.send(default.encode("utf-8"))
sock.shutdown(socket.SHUT_RDWR) # we do a more graceful exit here by
# shutting down the socket, makes things faster for TLS
# may have an effect on client response time to but i didn't notice it.
sock.close()
return
def daemon_main():
svr_proc = gevent.spawn(cleartext_server_handler)
ssl_svr_proc = gevent.spawn(tls_server_handler)
client_procs.append(svr_proc)
client_procs.append(ssl_svr_proc)
gevent.joinall(client_procs)
sys.exit(0)
# so things will not fork while i'm debbuging
if not Fork:
daemon_main()
pid = os.fork()
if pid:
os._exit(0)
else:
os.setpgrp()
os.umask(0)
print(os.getpid())
sys.stdout = NullDevice()
sys.stderr = NullDevice()
signal.signal(signal.SIGHUP, hup_handle)
signal.signal(signal.SIGTERM, hup_handle)
daemon_main()
# To recap we just did a bunch of work, for no user visible change
# This is not a bad thing, often the first drafts of programs.
# Will fit the requirements of the moment. But when the requirements
# change the program must be adapted to fit.
# This process of iteration and redesign,
# is called "paying down technical debt", and it should be done whenever
# possible.
#
# And we've just moved up to the second level of the 7 story mountain
# Yay us.
#
# References
# Robustness Principal (Devopedia): https://devopedia.org/postel-s-law
# IETF RFC 9112 HTTP/1.1 https://datatracker.ietf.org/doc/html/rfc9112
#