commit d2898c58887876902d86df71013d58fdc187b9ee from: Manuel Kuklinski date: Tue May 5 14:21:54 2026 UTC initial commit commit - /dev/null commit + d2898c58887876902d86df71013d58fdc187b9ee blob - /dev/null blob + 74bb1492d7c4e50ea3def3a724b49552221920ba (mode 644) --- /dev/null +++ README.md @@ -0,0 +1,8 @@ +# auth-gate + +Inspired by +https://her.esy.fun/posts/0031-how-i-protect-my-forgejo-instance-from-ai-web-crawlers/index.html + +Needs `{gcc,llvm}` and `{fast,slow}cgi` + +**DISCLAIMER**: Vibe-coded with an LLM. I'm not much of a programmer... blob - /dev/null blob + 8b09380d9eb2cd78abcc817ee69a162e10525727 (mode 644) --- /dev/null +++ auth-gate.c @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2025 Manuel Kuklinski + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * SPDX-License-Identifier: ISC + */ + +/* + * auth-gate.c -- CGI access control for OpenBSD/slowcgi + * + * WHAT DOES THIS PROGRAM DO? + * -------------------------- + * This program runs as a CGI script behind a web server (slowcgi on OpenBSD). + * When a browser requests a page, the web server starts this program and passes + * information about the HTTP request via environment variables (e.g. which URL + * was requested, which cookies were sent, etc.). + * + * The program checks whether the browser sends a specific cookie. + * - Cookie present -> serve the file from the document root directory. + * - Cookie missing -> send a JavaScript snippet that sets the cookie and + * reloads the page (the browser does this automatically). + * + * SECURITY MEASURES (OpenBSD-specific): + * - unveil(): restricts filesystem access to DOCROOT (read-only). + * - pledge(): restricts all permitted syscalls to stdio + file access. + * - realpath() + prefix check: prevents path traversal attacks. + * - O_NOFOLLOW: prevents symbolic links from being exploited. + * + * BUILD (OpenBSD): + * cc -O2 -Wall -Wextra -o auth-gate auth-gate.c + */ + +#include /* printf(), fread(), fwrite(), fclose() */ +#include /* getenv(), strtol() */ +#include /* strlen(), strncmp(), strchr(), strstr(), strcmp() */ +#include /* strcasecmp() -- case-insensitive comparison */ +#include /* fstat(), struct stat, S_ISREG() */ +#include /* unveil(), pledge(), close() */ +#include /* open(), O_RDONLY, O_NOFOLLOW */ +#include /* PATH_MAX -- maximum path length of the OS */ + +/* --- Configuration constants --------------------------------------------- */ + +/* Directory from which files may be served. + * All requests must remain within this path. */ +#define DOCROOT "/htdocs/asdfghasdfgh.de" + +/* Name and expected value of the access cookie. */ +#define COOKIE_NAME "Yogsototh_opens_the_door" +#define COOKIE_VALUE "1" + +/* Buffer size for reading and sending files (8 KiB is sufficient). */ +#define BUF_SIZE 8192 + +/* --- Helper functions ----------------------------------------------------- */ + +/* + * url_decode -- decodes a percent-encoded URL string + * + * Background: many characters are not allowed directly in URLs. + * Instead they are encoded as %XX (XX = hexadecimal value). + * Example: %2F stands for '/', %20 for a space character. + * + * Parameters: + * dst -- destination buffer where the result is written + * src -- input URL (e.g. "/page%2Ftest") + * dstsize -- size of the destination buffer (prevents buffer overflow) + * + * Security: + * - Null bytes (val == 0x00) are rejected because they would terminate + * C strings early and could bypass security checks. + * - Control characters (val < 0x20) and DEL (0x7F) are rejected. + * - Slashes (%2F) are passed through -- realpath() secures the path + * reliably afterwards. + */ +static void url_decode(char *dst, const char *src, size_t dstsize) { + if (dstsize == 0) + return; + + size_t i = 0; /* write position in the destination buffer */ + + while (*src && i < dstsize - 1) { /* -1: reserve space for terminating '\0' */ + if (*src == '%' && src[1] && src[2]) { + /* The next two characters are the hex value of the encoded byte */ + char hex[3] = { src[1], src[2], '\0' }; + char *end; + long val = strtol(hex, &end, 16); /* hexadecimal -> integer */ + + /* Only accept valid, safe characters */ + if (*end == '\0' && val > 0x1f && val != 0x7f) + dst[i++] = (char)val; + + src += 3; /* skip '%' and both hex digits */ + } else { + /* Copy regular character directly */ + dst[i++] = *src++; + } + } + dst[i] = '\0'; /* properly terminate the C string */ +} + +/* + * sanitize_uri -- basic plausibility check of the request path + * + * This function catches obvious attack patterns early. + * The actual, reliable protection against path traversal is provided + * later by realpath() + prefix check. + * + * Rejected: + * - Paths that do not start with '/' (invalid HTTP URIs) + * - Paths containing "/../" or ending with "/.." (directory traversal) + * - The bare string ".." + * + * Returns 0 if the path looks clean, -1 if suspicious. + */ +static int sanitize_uri(const char *uri) { + /* HTTP URIs must start with '/' */ + if (uri[0] != '/') + return -1; + + /* "/../" anywhere in the path, or bare ".." -> reject */ + if (strstr(uri, "/../") || strcmp(uri, "..") == 0) + return -1; + + /* Path ends with "/.." -> reject */ + size_t len = strlen(uri); + if (len >= 3 && strcmp(uri + len - 3, "/..") == 0) + return -1; + + return 0; /* path looks clean */ +} + +/* + * check_bypass -- checks whether the browser sends the access cookie + * + * CGI programs receive HTTP headers as environment variables. + * The "Cookie:" header is available in the HTTP_COOKIE variable, e.g.: + * "session=abc; Yogsototh_opens_the_door=1; theme=dark" + * + * The function iterates over the semicolon-separated cookie pairs + * and looks for: COOKIE_NAME=COOKIE_VALUE + * + * Returns 1 if the cookie is present and correct, 0 otherwise. + */ +static int check_bypass(const char *cookie_hdr) { + if (!cookie_hdr) + return 0; /* no Cookie header -> no access */ + + /* Reject excessively long Cookie headers as a potential attack */ + if (strlen(cookie_hdr) > 4096) + return 0; + + const char *p = cookie_hdr; + size_t namelen = strlen(COOKIE_NAME); + + /* Scan the cookie string left to right */ + while (*p) { + /* Skip whitespace after semicolons */ + while (*p == ' ') p++; + + /* Check whether the current cookie matches our name */ + if (strncmp(p, COOKIE_NAME, namelen) == 0 && p[namelen] == '=') { + p += namelen + 1; /* advance past '=' */ + size_t vallen = strlen(COOKIE_VALUE); + + /* Verify the value -- the character after it must be '\0', ';', + * or ' ' to prevent "Yogsototh_opens_the_door=10" from matching */ + if (strncmp(p, COOKIE_VALUE, vallen) == 0 && + (p[vallen] == '\0' || p[vallen] == ';' || p[vallen] == ' ')) + return 1; /* cookie found and value matches */ + } + + /* Advance to the next cookie pair (past the next ';') */ + while (*p && *p != ';') p++; + if (*p == ';') p++; + } + + return 0; /* cookie not found or value incorrect */ +} + +/* + * get_mime_type -- determines the MIME type from the file extension + * + * Browsers need the MIME type to know how to handle a file + * (render HTML, display an image, open a PDF, etc.). + * + * Returns a constant string, e.g. "text/html; charset=utf-8". + * Falls back to "application/octet-stream" for unknown extensions + * (browsers treat this as an unknown binary file). + */ +static const char *get_mime_type(const char *path) { + /* strrchr finds the last '.' in the path -> file extension */ + const char *ext = strrchr(path, '.'); + if (!ext) return "application/octet-stream"; + ext++; /* advance past the dot, e.g. "html" instead of ".html" */ + + /* Case-insensitive comparison (e.g. treat ".PNG" like ".png") */ + if (strcasecmp(ext, "html") == 0 || strcasecmp(ext, "htm") == 0) return "text/html; charset=utf-8"; + if (strcasecmp(ext, "css") == 0) return "text/css"; + if (strcasecmp(ext, "js") == 0) return "application/javascript"; + if (strcasecmp(ext, "json") == 0) return "application/json"; + if (strcasecmp(ext, "png") == 0) return "image/png"; + if (strcasecmp(ext, "jpg") == 0 || + strcasecmp(ext, "jpeg") == 0) return "image/jpeg"; + if (strcasecmp(ext, "gif") == 0) return "image/gif"; + if (strcasecmp(ext, "svg") == 0) return "image/svg+xml"; + if (strcasecmp(ext, "ico") == 0) return "image/x-icon"; + if (strcasecmp(ext, "txt") == 0) return "text/plain; charset=utf-8"; + if (strcasecmp(ext, "pdf") == 0) return "application/pdf"; + if (strcasecmp(ext, "xml") == 0) return "application/xml"; + if (strcasecmp(ext, "webp") == 0) return "image/webp"; + if (strcasecmp(ext, "woff") == 0) return "font/woff"; + if (strcasecmp(ext, "woff2")== 0) return "font/woff2"; + return "application/octet-stream"; +} + +/* + * send_error -- sends an HTTP error response to the browser + * + * CGI programs communicate with the web server via stdout. + * The web server forwards the output to the browser. + * + * An HTTP response consists of: + * 1. Header lines (Status, Content-Type, ...) + * 2. A blank line (\r\n\r\n) as separator + * 3. The body (the actual content) + * + * Example for code=404, reason="Not Found": + * Status: 404 Not Found\r\n + * Content-Type: text/plain\r\n + * X-Content-Type-Options: nosniff\r\n + * Content-Length: 13\r\n + * \r\n + * 404 Not Found + */ +static void send_error(int code, const char *reason) { + char body[64]; + snprintf(body, sizeof(body), "%d %s", code, reason); + + printf("Status: %d %s\r\n", code, reason); + printf("Content-Type: text/plain\r\n"); + /* Prevents the browser from interpreting the content differently */ + printf("X-Content-Type-Options: nosniff\r\n"); + printf("Content-Length: %zu\r\n\r\n%s", strlen(body), body); +} + +/* --- Main program --------------------------------------------------------- */ + +int main(void) { + /* + * Read CGI environment variables. + * The web server populates these automatically before starting the CGI + * program. + * + * HTTP_COOKIE -- the browser's "Cookie:" header, e.g. "name=value" + * REQUEST_URI -- the requested path, e.g. "/page/index.html" + */ + const char *cookie_hdr = getenv("HTTP_COOKIE"); + const char *uri = getenv("REQUEST_URI"); + + /* + * OpenBSD hardening: unveil() -- restrict filesystem access + * + * After this call the program may ONLY access DOCROOT. + * Attempts to open other directories (/etc, /home, ...) will fail + * with ENOENT, as if the files did not exist. + * + * "r" = read-only; no writing, no execution. + * unveil(NULL, NULL) finalises the configuration (no further paths). + */ + if (unveil(DOCROOT, "r") == -1 || + unveil(NULL, NULL) == -1) { + send_error(500, "Internal Server Error"); + return 1; + } + + /* + * OpenBSD hardening: pledge() -- restrict permitted syscalls + * + * After this call the program is only allowed: + * stdio -- standard I/O (printf, fread, fwrite, ...) + * rpath -- read files (open, fstat, ...) + * + * All other syscalls (network, fork, exec, ...) are immediately + * terminated by the kernel with SIGABRT. This limits the damage + * if an attacker manages to inject code. + */ + if (pledge("stdio rpath", NULL) == -1) { + send_error(500, "Internal Server Error"); + return 1; + } + + /* Fallback: if no URI was provided, assume the root page */ + if (!uri) uri = "/"; + + /* + * Strip the query string. + * A URL may look like: /page/index.html?search=test&page=2 + * The part starting at '?' is not needed for the file path. + */ + char raw_uri[PATH_MAX]; + snprintf(raw_uri, sizeof(raw_uri), "%s", uri); + char *qs = strchr(raw_uri, '?'); + if (qs) *qs = '\0'; /* truncate the string at '?' */ + + /* + * Decode percent-encoding. + * "/page%2Ftest%20foo" becomes "/page/test foo". + */ + char decoded_uri[PATH_MAX]; + url_decode(decoded_uri, raw_uri, sizeof(decoded_uri)); + + /* First plausibility check of the path */ + if (sanitize_uri(decoded_uri) != 0) { + send_error(400, "Bad Request"); + return 0; + } + + /* + * Cookie check -- the core of the access control. + * + * If the cookie is missing or incorrect, we send the browser a small + * JavaScript snippet. It sets the cookie and reloads the page. + * On the second request the cookie will be present. + * + * Cache-Control: no-store -- prevents the browser from caching this + * response (otherwise it might skip the reload). + */ + if (!check_bypass(cookie_hdr)) { + const char *body = + ""; + + printf("Status: 418 I'm a teapot\r\n"); + printf("Cache-Control: no-store\r\n"); + printf("Content-Type: text/html\r\n"); + printf("X-Content-Type-Options: nosniff\r\n"); + printf("Content-Length: %zu\r\n\r\n%s", strlen(body), body); + return 0; + } + + /* + * Build the file path. + * DOCROOT + URI, e.g.: /htdocs/asdfghasdfgh.de + /page/index.html + * = /htdocs/asdfghasdfgh.de/page/index.html + */ + char path[PATH_MAX]; + int r = snprintf(path, sizeof(path), "%s%s", DOCROOT, decoded_uri); + if (r < 0 || r >= (int)sizeof(path)) { + send_error(400, "Bad Request"); + return 0; + } + + /* + * Redirect directory requests to index.html. + * If the path ends with '/', append index.html. + * e.g.: /htdocs/.../page/ -> /htdocs/.../page/index.html + */ + if (path[strlen(path) - 1] == '/') { + r = snprintf(path, sizeof(path), "%s%sindex.html", DOCROOT, decoded_uri); + if (r < 0 || r >= (int)sizeof(path)) { + send_error(400, "Bad Request"); + return 0; + } + } + + /* + * Canonicalise the path -- the central defence against path traversal. + * + * realpath() resolves all "..", ".", symlinks, etc. and returns the + * true absolute path. "/htdocs/site/../../etc/passwd" becomes + * "/etc/passwd" -- which we catch in the next step. + * + * If realpath() fails (file does not exist) we return 404. + */ + char resolved[PATH_MAX]; + if (realpath(path, resolved) == NULL) { + send_error(404, "Not Found"); + return 0; + } + + /* + * Prefix check: the resolved path must lie within DOCROOT. + * + * Example of an attack being blocked: + * Attacker requests: /../../../etc/passwd + * After realpath(): /etc/passwd + * Prefix check: does NOT start with "/htdocs/asdfghasdfgh.de/" -> 403 + * + * The second condition (resolved[docroot_len] != '/') prevents a + * directory with a similar name (e.g. /htdocs/asdfghasdfgh.de-evil) + * from being falsely accepted as inside DOCROOT. + */ + size_t docroot_len = strlen(DOCROOT); + if (strncmp(resolved, DOCROOT, docroot_len) != 0 || + (resolved[docroot_len] != '/' && resolved[docroot_len] != '\0')) { + send_error(403, "Forbidden"); + return 0; + } + + /* + * Open the file. + * + * O_RDONLY -- open for reading only, not writing + * O_NOFOLLOW -- if the resolved path itself is a symlink, open() fails. + * This prevents a TOCTOU race condition: between realpath() + * and open() an attacker could replace the file with a + * symlink. O_NOFOLLOW closes that window. + */ + int fd = open(resolved, O_RDONLY | O_NOFOLLOW); + if (fd == -1) { + send_error(403, "Forbidden"); + return 0; + } + + /* + * Read file metadata via the open file descriptor (not the path). + * + * Using fd instead of the path avoids another race condition. + * + * S_ISREG checks that it is a regular file (not a directory, device, + * or pipe). We only want to serve actual files. + */ + struct stat st; + if (fstat(fd, &st) != 0 || !S_ISREG(st.st_mode)) { + close(fd); + send_error(404, "Not Found"); + return 0; + } + + /* + * Wrap the file descriptor in a FILE stream. + * fdopen() takes ownership of fd -- fclose() will also close fd. + */ + FILE *f = fdopen(fd, "rb"); + if (!f) { + close(fd); + send_error(500, "Internal Server Error"); + return 0; + } + + /* + * Send the HTTP response headers. + * + * Content-Length: tells the browser how many bytes follow. + * st.st_size is of type off_t (may be 64-bit), so cast to long long + * for a portable printf format specifier. + * + * X-Frame-Options: DENY -- prevents the page from being embedded in an + *