From 3ed9fc8e92048b7f43d65e1b4e114f0b1c9376de Mon Sep 17 00:00:00 2001 From: Matt Arnold Date: Tue, 9 Sep 2025 13:45:59 -0400 Subject: [PATCH] Final free part done code wise --- filesystem.py | 498 ++++++++++++++++++++++++++++++++++++++++++++++ htdocs/index.html | 25 +++ htdocs/style.css | 74 +++++++ 3 files changed, 597 insertions(+) create mode 100644 filesystem.py create mode 100644 htdocs/index.html create mode 100644 htdocs/style.css diff --git a/filesystem.py b/filesystem.py new file mode 100644 index 0000000..4e6b172 --- /dev/null +++ b/filesystem.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python3 +## This Blog post is executable python code, it requires the gevent modules to run +# pip as gevent, Debian as python3-gevent. + +import os +import sys +import gevent +import gevent.socket as socket +import gevent.ssl as ssl # we must use gevent's ssl module here, see ln 186 +import signal +from io import StringIO, BytesIO +from email.utils import formatdate +import mimetypes +import pathlib + +Fork = False +# Welcome to Part 5.5 of this blog post series that might actually be a +# book. Wherein we shall attempt to implement TLS correctly for this +# our bespoke http implementation. Which if I did my research correctly. + +# Should look easy, but figureing out how not to shoot ourselves in +# the foot took almost five hours of research. +# A reminder please don't use this code in production. +# If you want to fork it and make your own mistakes +# after Part VI comes out be my guest, your own misfortune. + +# You should have also seen the prologue to the code. +# which advises you to install mkcert, and tells you how to use it +# to get a valid cert/keypair for use in this example. +# If you didn't read it **READ IT NOW** + + +CRLF = "\r\n" +LF = "\n" + + +class AccessDict(dict): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # Convert nested dicts to AccessDict + for key, value in self.items(): + if isinstance(value, dict) and not isinstance(value, AccessDict): + self[key] = AccessDict(value) + + def __getattr__(self, key): + try: + return self[key] + except KeyError: + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{key}'" + ) + + def __setattr__(self, key, value): + # Convert nested dicts to AccessDict + if isinstance(value, dict) and not isinstance(value, AccessDict): + value = AccessDict(value) + self[key] = value + + def __delattr__(self, key): + try: + del self[key] + except KeyError: + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{key}'" + ) + + +# These are request and response objects similar to Go's standard +# net/http package. Again this is still a toy implementation. +# In a future part. I will go over how to make a more full featured +# implementation. This is just making us ssl ready and laying the ground work +class HttpRequest(AccessDict): + def __init__(self, method="GET", path="/", headers={}, *args, **kwargs): + super().__init__(*args, **kwargs) + self["method"] = method + self["headers"] = headers + if "body" in kwargs: + self["body"] = StringIO(kwargs["body"]) + self["path"] = path + if "Host" not in self["headers"]: + self["headers"]["Host"] = "localhost" + + def read(self, seek): + return self["body"].read(seek) + + def __str__(self): + buf = StringIO() + buf.write(f"{self.method} {self.path} HTTP/1.1\r\n") + for k, v in self["headers"].items(): + if k == "": + continue + buf.write(f"{k}: {v}" + CRLF) + buf.write(CRLF) + if "body" in self: + buf.write(self["body"].getvalue() + CRLF) + else: + buf.write("" + CRLF) + return buf.getvalue() + CRLF + + +class HttpResponse(AccessDict): + def __init__(self, status="400", headers={}, body="goodbye\r\n", *args, **kwargs): + super().__init__(*args, **kwargs) + self["status"] = status + self["headers"] = headers + self["body"] = StringIO() + # We Must have date and host headers set correctly to use tls + # so we unconditionally set them here + if "host" not in kwargs: + self["headers"]["Host"] = "localhost" + else: + self["headers"]["Host"] = kwargs["host"] + if "binary" in kwargs and kwargs["binary"]: + self.binmode = True + self["body"] = BytesIO() # hopefully this will be enough + else: + self.binmode = False + + self["headers"]["Date"] = formatdate(timeval=None, localtime=False, usegmt=True) + self["headers"]["Content-Type"] = "text/plain; charset=UTF-8" + + def write(self, stuff): + if self.binmode and type(stuff) is str: + return self.body.write(bytes(stuff, "utf-8")) + return self.body.write(stuff) + + # Foreshadowing (n): A literary device in which an author ... + def __str__(self): + buf = StringIO() + buf.write(f"HTTP/1.1 {self.status}" + CRLF) + length = len(self["body"].getvalue()) + for k, v in self["headers"].items(): + buf.write(f"{k}: {v}\r\n") + if "Content-Length" not in self["headers"]: + buf.write(f"Content-Length: {length}\r\n") + buf.write(CRLF) # Per RFC 9112 + if self.binmode: + buf.write(self["body"].getvalue().decode("utf-8")) + return buf.getvalue().encode("utf-8").decode("utf-8") + else: + buf.write(self["body"].getvalue()) + return buf.getvalue() + CRLF + + +WEBROOT = "htdocs/" + + +def fs_handler(request): + membuf = None + wr = HttpResponse(binary=True, body=b"") + target = pathlib.Path(__file__).parent + target /= WEBROOT + target /= request.path[1:] + print("AFTER PATHLIB: " + str(target)) + print(str(pathlib.Path.cwd())) + if str(target).endswith("/"): + nt = target / "index.html" + target = nt + print("GETINDEX: " + str(target)) + + if os.access(str(target), os.R_OK): + try: + membuf = open(target, "rb").read() + except IsADirectoryError: + nt = target / "index.html" + if os.path.exists(str(nt)): + membuf = open(nt, "rb").read() + wr.write(membuf) + wr.status = 200 + cts = mimetypes.guess_type(nt)[0] + wr.headers["Content-Type"] = cts + return wr + else: + wr.status = 404 + wr.write("May the force be with you\r\n") + return wr + wr.write(membuf) + wr.status = 200 + wr.headers["Content-Type"] = mimetypes.guess_type(target)[0] + return wr + else: + wr.status = 404 + wr.write("these are not the droids your looking for\r\n") + return wr + + +RICKROLL_LYRICS = """ + +We're no strangers to love +You know the rules and so do I +A full commitment's what I'm thinkin' of +You wouldn't get this from any other guy + +I just wanna tell you how I'm feeling +Gotta make you understand + +Never gonna give you up, never gonna let you down +Never gonna run around and desert you +Never gonna make you cry, never gonna say goodbye +Never gonna tell a lie and hurt you + +We've known each other for so long +Your heart's been aching, but you're too shy to say it +Inside, we both know what's been going on +We know the game and we're gonna play it + +And if you ask me how I'm feeling +Don't tell me you're too blind to see + +Never gonna give you up, never gonna let you down +Never gonna run around and desert you +Never gonna make you cry, never gonna say goodbye +Never gonna tell a lie and hurt you + +Never gonna give you up, never gonna let you down +Never gonna run around and desert you +Never gonna make you cry, never gonna say goodbye +Never gonna tell a lie and hurt you + +""" +head_response = HttpResponse() +head_response.status = 200 +# head_response.headers["Content-Length"] = 0 +head_response.write("") +good_response = HttpResponse() +good_response.status = 200 +good_response.headers["Last-Modified"] = "Mon, 27 July 1987 00:00 GMT" +good_response.write(RICKROLL_LYRICS) + +error_response = HttpResponse() +error_response.status = 404 # we fudge the rfc a bit here +error_response.write("Together forever and never to part Together forever we two") + + +client_procs = [] + +# commentary on ln CHANGEME +ct_svr_proc = None +ssl_svr_proc = None + + +# One change here +class BadParseError(Exception): + """Exception raised for custom error in the application.""" + + def __init__(self, message, error_code): + super().__init__(message) + self.error_code = error_code + self.message = message + + def __str__(self): + return f"{self.message} (Error Code: {self.error_code})" + + +# this exception will be raised by the request parser should an invalid +# request come in. +class NullDevice: + def write(self, s): + pass + + +def hup_handle(sig, fr): + sys.exit() + + +# We've been using gevent all along, but now it's time to say the quiet part +# outloud. Gevent is an alternative concurrency module for python. +# Trying to use gevent and standard python's stuff side by side +# is a quick road to madness. I'll explain more in the Prequal series +# For now it's enough to know the APIs are identical. + +# With that out of the way, we come to our first real decision, +# that has security implications. +# We could implement TLS in two ways. Method 1 we have the server. +# listen on two different ports. As http does on 80/443 +# the other approach involves encrypting traffic on the port we +# already use. The one port method may seem safer. +# This was the route chosen by the gemini project. + + +# But for teaching purposes the two port method works better. +# so that's what we'll do. +# This requires a couple of changes, in server handler. +# first we change it's name, and make the corresponding change +# in main, and we'll copy it almost verbatim, and make changes for TLS +def cleartext_server_handler(): + serversock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + serversock.bind(("", 1337)) + serversock.listen(10) + while True: + client, addr = serversock.accept() + client_procs.append(gevent.spawn(client_handler, client)) + gevent.sleep(0.25) + + serversock.close() + return + + +# TLS looks easy, but in practice almost every library for handling it +# in any language you can name is... HOT GARBAGE. Python is not the +# exception, but i find it does have less security foot guns. + + +# if anything screws up it will crash out with an SSLError +# it's error messages are cryptic. +# But once you've done it properly it looks easy. +# This took about four hours to debug, but i've got it finally +def tls_server_handler(): + # the context is sort of like a container for cryptographic settings + # we load the default context, which contains the best default + # settings as reviewed, by the python security people.abs + # this avoids a lot of foot guns + + # Note here that the server/client is reversed + # Because we are a server we need the context + # for clients. + ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH) + + # Remember what i said about best default and not shooting + # ourselves in the foot. Well Firefox/Gevent/Python, does a stupid thing + # during TLS handshake one and declares it supports. SSLv3 + # Which has been considered hopelessly broken since 2014. + # Mozilla's own security people even said so at the time. + # See References. Anyway Setting the minimum and maximum + # explicitly to TLSv1.2/1.3 avoids this wrongness. + # so we do it + ctx.minimum_version = ssl.TLSVersion.TLSv1_2 + ctx.maximum_version = ssl.TLSVersion.TLSv1_3 + + serversock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + serversock.bind(("", 1972)) # we choose another port number, 1972 foreshadows + # a bit + + # Next we load our key and cert + # I'm assuming you used the mkcert method + # if you used the self signed method just place your cert/key + # See comment on ln 277 + ctx.load_cert_chain(certfile="cert.pem", keyfile="key.pem") + + serversock.listen(10) + + while True: + client, addr = serversock.accept() + + try: + secure = ctx.wrap_socket(client, server_side=True) + # last step in the process is to wrap the client socket in + # TLS. The SSLContext does this for us. + # but you must pass server_side=True to avoid silly defaults + client_procs.append(gevent.spawn(client_handler, secure)) + gevent.sleep(0.25) + # If anything goes wrong here the ssl.SSLError is thrown + # i was originally going to leave it to crash, but + # the browser behavior on pki errors also causes this + # exception. So we will log the error and continue. + except ssl.SSLError as e: + print(e) + gevent.sleep(0.25) + continue + finally: + gevent.sleep(0.25) + + serversock.close() + return + + +def routing_table(request): + if request is not None: + if request.method == "GET" and request.path == "/rick": + return good_response + elif request.method == "HEAD" and request.path == "/rick": + return head_response + elif request.method == "HEAD": + return head_response + else: + return fs_handler(request) + else: + custom_response = HttpResponse() + custom_response.status = 500 + custom_response.write("Oh bother" + CRLF) + return custom_response + + +class HttpParser: + def __init__(self, raw_request): + try: + self.request_text = raw_request.decode("utf-8") + except UnicodeDecodeError: + raise BadParseError("UTF-8 decode failed", 400) + + def parse(self): + result = HttpRequest() + atoms = self.request_text.split(CRLF) + reqinfo = atoms[0].strip(CRLF).split(" ") + if len(reqinfo) < 3: + raise BadParseError("I'm a little teapot short and stout", 418) + if reqinfo[2] != "HTTP/1.1": + raise BadParseError("Wrong HTTP Version", 400) + # Now we parse the headers + for h in atoms[1:]: + if h != CRLF or h != "": + spl_point = h.find(":") + kv = [] + kv.append(h[0:spl_point]) + kv.append(h[spl_point + 1 :]) + if len(kv) == 2: + result.headers[kv[0]] = kv[1] + else: + raise BadParseError("here is my handle", 418) + else: + break + result.method = reqinfo[0] + result.path = reqinfo[1] + return result + + +def client_handler(sock): + print("Client handler spawn") + junk_counter = 0 + p = None + server_response = HttpResponse() + while True: + if junk_counter > 3: + server_response.status = 420 + server_response.body.write("Try Some Indica, it may help") + + break + data = sock.recv(4096) + try: + p = HttpParser(data).parse() + server_response = routing_table(p) + break + except BadParseError as e: + server_response = HttpResponse() + server_response.status = e.error_code + server_response.body.write(e.message + CRLF) + junk_counter += 1 + default = str(server_response) + sock.send(default.encode("utf-8")) + continue + + # gevent.sleep(0.25) # this is a somewhat magical value, see Part II + default = str(server_response) + CRLF + sock.send(default.encode("utf-8")) + # sock.shutdown(socket.SHUT_RDWR) # we do a more graceful exit here by + # shutting down the socket, makes things faster for TLS + # may have an effect on client response time to but i didn't notice it. + sock.close() + return + + +def daemon_main(): + svr_proc = gevent.spawn(cleartext_server_handler) + ssl_svr_proc = gevent.spawn(tls_server_handler) + client_procs.append(svr_proc) + client_procs.append(ssl_svr_proc) + gevent.joinall(client_procs) + sys.exit(0) + + +# so things will not fork while i'm debbuging +if __name__ == "__main__": + if not Fork: + daemon_main() + + pid = os.fork() + if pid: + os._exit(0) + + else: + os.setpgrp() + os.umask(0) + + print(os.getpid()) + sys.stdout = NullDevice() + sys.stderr = NullDevice() + + signal.signal(signal.SIGHUP, hup_handle) + signal.signal(signal.SIGTERM, hup_handle) + daemon_main() + + +# Now for the recap, today we learned how TLS works from the server's +# Perspective. We learned that python has secure defaults and you +# should use them unless there's a problem. + +# We also learned that firefox may have a problem, and being explicit +# about the TLS versions we support fixed that issue. + +# This problem is more likely a bug in gevent, if I had to guess + +# Stay tuned for Part 6 where we will, finally break up +# with Rick Astley. +# Cheers + +# References +# Python's SSL Module docs: https://docs.python.org/3/library/ssl.html#ssl.create_default_context +# End of SSLv3: https://blog.mozilla.org/security/2014/10/14/the-poodle-attack-and-the-end-of-ssl-3-0/ diff --git a/htdocs/index.html b/htdocs/index.html new file mode 100644 index 0000000..d794bb7 --- /dev/null +++ b/htdocs/index.html @@ -0,0 +1,25 @@ + + + + Page Title + + + + +
+

Page title

+ +

Hello! this is an example page using Xess

+ +
When in doubt, create another framework
+ + + +

Select this text to see what happens!

+ + +
+ + diff --git a/htdocs/style.css b/htdocs/style.css new file mode 100644 index 0000000..618eb3b --- /dev/null +++ b/htdocs/style.css @@ -0,0 +1,74 @@ +main { + font-family: monospace, monospace; + max-width: 38rem; + padding: 2rem; + margin: auto; +} + +@media only screen and (max-device-width: 736px) { + main { + padding: 0rem; + } +} + +::selection { + background: #d3869b; +} + +body { + background: #282828; + color: #ebdbb2; +} + +pre { + background-color: #3c3836; + padding: 1em; + border: 0; +} + +a, a:active, a:visited { + color: #b16286; + background-color: #1d2021; +} + +h1, h2, h3, h4, h5 { + margin-bottom: .1rem; +} + +blockquote { + border-left: 1px solid #bdae93; + margin: 0.5em 10px; + padding: 0.5em 10px; +} + +footer { + align: center; +} + +@media (prefers-color-scheme: light) { + body { + background: #fbf1c7; + color: #3c3836; + } + + pre { + background-color: #ebdbb2; + padding: 1em; + border: 0; + } + + a, a:active, a:visited { + color: #b16286; + background-color: #f9f5d7; + } + + h1, h2, h3, h4, h5 { + margin-bottom: .1rem; + } + + blockquote { + border-left: 1px solid #655c54; + margin: 0.5em 10px; + padding: 0.5em 10px; + } +}