aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkj_sh6042026-03-11 22:47:32 -0400
committerkj_sh6042026-03-11 22:47:32 -0400
commitfe9d6171de63ce839b01eade15ddcb6bb14e06f8 (patch)
treeb975b438194c93d9df98d33f4085ad16f193e45e
parentd151ec91488f10134babae4d4a879d823b221b62 (diff)
refactor: prod-level changes
-rw-r--r--Dockerfile18
-rw-r--r--README.md24
-rw-r--r--requirements.txt1
-rw-r--r--src/__legacy_src/app.nim525
-rw-r--r--src/__legacy_src/backend_compat.nim525
-rw-r--r--src/__legacy_src/server.nim525
-rw-r--r--src/app.py280
7 files changed, 1774 insertions, 124 deletions
diff --git a/Dockerfile b/Dockerfile
index e87147d..8ff9c84 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,13 +1,18 @@
FROM python:3.12-slim
-ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBIAN_FRONTEND=noninteractive \
+ PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1 \
+ PORT=5001 \
+ HOST=0.0.0.0 \
+ LOG_LEVEL=INFO \
+ TRUST_PROXY=1
RUN apt-get update && apt-get install -y --no-install-recommends \
libcairo2 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libgdk-pixbuf-2.0-0 \
- libffi-dev \
shared-mime-info \
fonts-noto \
fonts-noto-color-emoji \
@@ -15,13 +20,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
WORKDIR /app
+RUN addgroup --system app && adduser --system --ingroup app app
+
COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir --disable-pip-version-check -r requirements.txt
COPY src/ .
RUN mkdir -p generated uploads
+RUN chown -R app:app /app
+USER app
+
EXPOSE 5001
-CMD ["python3", "app.py"]
+CMD ["gunicorn", "--bind", "0.0.0.0:5001", "--workers", "2", "--threads", "4", "--timeout", "180", "--graceful-timeout", "30", "--access-logfile", "-", "--error-logfile", "-", "app:app"]
diff --git a/README.md b/README.md
index 7d7a087..0b52be4 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ a simple web app that converts markdown to pdf.
- python 3.10+
- system packages: `libcairo2 libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 shared-mime-info`
+- gunicorn (installed from `requirements.txt`)
## image usage
@@ -29,11 +30,34 @@ a simple web app that converts markdown to pdf.
### local
```bash
+python -m venv .venv
+source .venv/bin/activate
pip install -r requirements.txt
cd src/
python3 app.py
```
+### production (vps + nginx)
+
+```bash
+cd src/
+../.venv/bin/gunicorn \
+ --bind 127.0.0.1:5001 \
+ --workers 2 \
+ --threads 4 \
+ --timeout 180 \
+ --graceful-timeout 30 \
+ --access-logfile - \
+ --error-logfile - \
+ app:app
+```
+
+nginx should reverse proxy to `127.0.0.1:5001` and pass:
+
+- `X-Forwarded-For`
+- `X-Forwarded-Proto`
+- `X-Forwarded-Host`
+
### docker
```bash
diff --git a/requirements.txt b/requirements.txt
index e081602..e50d931 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ markdown==3.7.*
pygments==2.19.*
weasyprint==63.*
reportlab==4.3.*
+gunicorn==23.0.* \ No newline at end of file
diff --git a/src/__legacy_src/app.nim b/src/__legacy_src/app.nim
new file mode 100644
index 0000000..8b64793
--- /dev/null
+++ b/src/__legacy_src/app.nim
@@ -0,0 +1,525 @@
+import
+ std/[
+ asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri,
+ random,
+ ]
+
+# tiny backend in nimlang, may be stupid, but this was fun
+
+const
+ AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"]
+ ValidPaperSizes = [
+ "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper",
+ "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper",
+ "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper",
+ "ledgerpaper", "tabloid", "statement", "flsa",
+ ]
+ ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"]
+ ValidLineSpacings = ["1", "1.5", "2"]
+ CustomPaperDimensions = [
+ ("tabloid", "11in", "17in"),
+ ("statement", "5.5in", "8.5in"),
+ ("flsa", "8.5in", "13in"),
+ ]
+
+const AppName = "likha-pdf"
+
+proc lookupCustomPaper(name: string): tuple[width: string, height: string] =
+ for (paperName, w, h) in CustomPaperDimensions:
+ if paperName == name:
+ return (width: w, height: h)
+ (width: "", height: "")
+
+proc baseDir(): string {.inline.} =
+ getAppDir()
+
+proc generatedDir(): string {.inline.} =
+ baseDir() / "generated"
+
+proc uploadsDir(): string {.inline.} =
+ baseDir() / "uploads"
+
+proc latexTemplatePath(): string {.inline.} =
+ baseDir() / "latex" / "template.tex"
+
+proc templatesDir(): string {.inline.} =
+ baseDir() / "templates"
+
+proc partialsDir(): string {.inline.} =
+ templatesDir() / "partials"
+
+proc staticDir(): string {.inline.} =
+ baseDir() / "static"
+
+type MultipartPart = object
+ name: string
+ filename: string
+ contentType: string
+ content: string
+
+# helpers
+proc htmlEscape(value: string): string =
+ result = value
+ result = result.replace("&", "&")
+ result = result.replace("<", "&lt;")
+ result = result.replace(">", "&gt;")
+ result = result.replace("\"", "&quot;")
+ result = result.replace("'", "&#39;")
+
+proc randomHex(length: int): string =
+ const hexChars = "0123456789abcdef"
+ result = newStringOfCap(length)
+ for _ in 0 ..< length:
+ result.add(hexChars[rand(15)])
+
+proc renderTemplate(
+ filePath: string, replacements: openArray[(string, string)]
+): string =
+ result = readFile(filePath)
+ for (token, replacement) in replacements:
+ result = result.replace(token, replacement)
+
+proc decodeFormComponent(value: string): string =
+ decodeUrl(value.replace("+", " "))
+
+proc parseUrlEncoded(body: string): Table[string, string] =
+ result = initTable[string, string]()
+ if body.len == 0:
+ return
+
+ for pair in body.split("&"):
+ if pair.len == 0:
+ continue
+ let separator = pair.find('=')
+ if separator < 0:
+ result[decodeFormComponent(pair)] = ""
+ else:
+ let key = decodeFormComponent(pair[0 ..< separator])
+ let value = decodeFormComponent(pair[separator + 1 .. ^1])
+ result[key] = value
+
+# "options" are optional, defaults are forever.
+proc pickOption(value: string, fallback: string, options: openArray[string]): string =
+ for option in options:
+ if option == value:
+ return value
+ fallback
+
+proc sanitizeFilename(filename: string): string =
+ result = newStringOfCap(filename.len)
+ for ch in filename:
+ if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or
+ (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}):
+ result.add(ch)
+ elif ch == ' ':
+ result.add('_')
+
+proc baseFilename(value: string): string =
+ var normalized = value.replace("\\", "/")
+ let index = normalized.rfind('/')
+ if index >= 0 and index < normalized.high:
+ normalized = normalized[index + 1 .. ^1]
+ elif index == normalized.high:
+ normalized = ""
+ normalized
+
+proc isAllowedImage(filename: string): bool =
+ let dot = filename.rfind('.')
+ if dot < 1 or dot == filename.high:
+ return false
+ let extension = filename[dot + 1 .. ^1].toLowerAscii()
+ for allowed in AllowedImageExtensions:
+ if extension == allowed:
+ return true
+ false
+
+proc tailText(value: string, maxLen: int = 1200): string =
+ if value.len <= maxLen:
+ return value
+ value[value.len - maxLen .. ^1]
+
+proc extractBoundary(contentType: string): string =
+ for part in contentType.split(';'):
+ let token = part.strip()
+ if token.toLowerAscii().startsWith("boundary="):
+ return token[9 .. ^1].strip(chars = {'\"', '\''})
+ ""
+
+proc stripTrailingCrlf(value: string): string =
+ result = value
+ if result.len >= 2 and result.endsWith("\r\n"):
+ result.setLen(result.len - 2)
+
+# hand-rolled multipart parsing, yes i am aware that this is "eh"
+proc parseMultipart(body: string, boundary: string): seq[MultipartPart] =
+ let delimiter = "--" & boundary
+ for rawChunk in body.split(delimiter):
+ var chunk = rawChunk
+ if chunk.len == 0:
+ continue
+ if chunk == "--" or chunk == "--\r\n":
+ continue
+ if chunk.startsWith("\r\n"):
+ chunk = chunk[2 .. ^1]
+
+ chunk = stripTrailingCrlf(chunk)
+
+ if chunk.len == 2 and chunk == "--":
+ continue
+
+ let splitIndex = chunk.find("\r\n\r\n")
+ if splitIndex < 0:
+ continue
+
+ let headerBlock = chunk[0 ..< splitIndex]
+ var content = chunk[splitIndex + 4 .. ^1]
+ content = stripTrailingCrlf(content)
+
+ var name = ""
+ var filename = ""
+ var contentType = "application/octet-stream"
+
+ for line in headerBlock.split("\r\n"):
+ let separator = line.find(':')
+ if separator <= 0:
+ continue
+ let headerName = line[0 ..< separator].strip().toLowerAscii()
+ let headerValue = line[separator + 1 .. ^1].strip()
+
+ if headerName == "content-disposition":
+ for part in headerValue.split(';'):
+ let token = part.strip()
+ if token.startsWith("name="):
+ name = token[5 .. ^1].strip(chars = {'\"', '\''})
+ elif token.startsWith("filename="):
+ filename = token[9 .. ^1].strip(chars = {'\"', '\''})
+ elif headerName == "content-type":
+ contentType = headerValue
+
+ if name.len > 0:
+ result.add(
+ MultipartPart(
+ name: name, filename: filename, contentType: contentType, content: content
+ )
+ )
+
+proc isSafeRelativePath(pathPart: string): bool =
+ pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and
+ not pathPart.startsWith("/")
+
+proc fileContentType(filePath: string): string =
+ let lowered = filePath.toLowerAscii()
+ if lowered.endsWith(".js"):
+ return "application/javascript; charset=utf-8"
+ if lowered.endsWith(".css"):
+ return "text/css; charset=utf-8"
+ if lowered.endsWith(".html"):
+ return "text/html; charset=utf-8"
+ if lowered.endsWith(".png"):
+ return "image/png"
+ if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"):
+ return "image/jpeg"
+ if lowered.endsWith(".gif"):
+ return "image/gif"
+ if lowered.endsWith(".webp"):
+ return "image/webp"
+ if lowered.endsWith(".svg"):
+ return "image/svg+xml"
+ if lowered.endsWith(".pdf"):
+ return "application/pdf"
+ "application/octet-stream"
+
+# response wrappers
+proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondText(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondFile(
+ req: Request,
+ filePath: string,
+ asAttachment: bool = false,
+ attachmentName: string = "",
+) {.async.} =
+ if not fileExists(filePath):
+ await respondText(req, Http404, "Not found")
+ return
+
+ var headers = newHttpHeaders()
+ headers["Content-Type"] = fileContentType(filePath)
+ if asAttachment and attachmentName.len > 0:
+ headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\""
+
+ await req.respond(Http200, readFile(filePath), headers)
+
+# pandoc does the heavy lifting
+proc runPandoc(
+ sourceMarkdown: string,
+ outputPath: string,
+ paperSize: string,
+ margin: string,
+ mainFont: string,
+ lineSpacing: string,
+ showPageNumbers: bool,
+): tuple[ok: bool, output: string, missingPandoc: bool] =
+ let tempDir = getTempDir() / (AppName & "-" & randomHex(10))
+ createDir(tempDir)
+ let tempMarkdownPath = tempDir / "source.md"
+ let tempRawPath = tempDir / "raw.md"
+
+ try:
+ # write raw markdown first
+ writeFile(tempRawPath, sourceMarkdown)
+
+ # preprocess markdown: convert to ascii with transliteration and normalize quotes
+ let iconvCmd =
+ "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) &
+ " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath)
+ let (_, iconvExitCode) = execCmdEx(iconvCmd)
+
+ if iconvExitCode != 0:
+ # if preprocessing fails, fall back to original content
+ writeFile(tempMarkdownPath, sourceMarkdown)
+
+ var args = @[
+ tempMarkdownPath,
+ "--from",
+ "markdown+emoji+hard_line_breaks",
+ "--pdf-engine=lualatex",
+ "--template",
+ latexTemplatePath(),
+ "-V",
+ "margin=" & margin,
+ "-V",
+ "mainfont=" & mainFont,
+ "-V",
+ "linespacing=" & lineSpacing,
+ "--resource-path",
+ baseDir() & ":" & uploadsDir() & ":" & tempDir,
+ "-o",
+ outputPath,
+ ]
+
+ let dims = lookupCustomPaper(paperSize)
+ if dims.width.len > 0:
+ args.add("-V")
+ args.add("paperwidth=" & dims.width)
+ args.add("-V")
+ args.add("paperheight=" & dims.height)
+ else:
+ args.add("-V")
+ args.add("papersize=" & paperSize)
+
+ if not showPageNumbers:
+ args.add("-V")
+ args.add("hidepages=true")
+
+ var process: Process
+ try:
+ process =
+ startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut})
+ except OSError:
+ return (
+ ok: false,
+ output: "Pandoc is not installed or not in PATH.",
+ missingPandoc: true,
+ )
+
+ let output = process.outputStream.readAll()
+ let exitCode = process.waitForExit()
+ process.close()
+
+ if exitCode == 0:
+ return (ok: true, output: "", missingPandoc: false)
+ return (ok: false, output: output, missingPandoc: false)
+ finally:
+ try:
+ if fileExists(tempRawPath):
+ removeFile(tempRawPath)
+ if fileExists(tempMarkdownPath):
+ removeFile(tempMarkdownPath)
+ if dirExists(tempDir):
+ removeDir(tempDir)
+ except OSError:
+ discard
+
+# app endpoint: strict inputs, loud errors.
+proc handleConvert(req: Request) {.async.} =
+ let formData = parseUrlEncoded(req.body)
+ let markdown = formData.getOrDefault("markdown", "").strip()
+
+ if markdown.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")]
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let paperSize =
+ pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes)
+ let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins)
+
+ var mainFontFamily = formData.getOrDefault("main_font", "serif")
+ if mainFontFamily != "serif" and mainFontFamily != "sans":
+ mainFontFamily = "serif"
+
+ let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella"
+ let lineSpacing =
+ pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings)
+ let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on"
+ let epoch = int(getTime().toUnix())
+ let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf"
+ let outputPath = generatedDir() / outputName
+
+ let conversion = runPandoc(
+ markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers
+ )
+
+ if not conversion.ok:
+ let message =
+ if conversion.missingPandoc:
+ conversion.output
+ else:
+ let stderr = conversion.output.strip()
+ if stderr.len > 0:
+ tailText(stderr)
+ else:
+ "PDF conversion failed."
+
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))]
+ )
+ let code = if conversion.missingPandoc: Http500 else: Http400
+ await respondHtml(req, code, html)
+ return
+
+ let html = renderTemplate(
+ partialsDir() / "result.html",
+ [
+ ("{{ filename }}", htmlEscape(outputName)),
+ ("{{ download_url }}", "/download/" & encodeUrl(outputName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# upload endpoint. accepts image, returns markdown snippet
+proc handleUploadImage(req: Request) {.async.} =
+ let contentType = req.headers.getOrDefault("Content-Type")
+ let boundary = extractBoundary(contentType)
+
+ if boundary.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let parts = parseMultipart(req.body, boundary)
+ var imagePart: MultipartPart
+ var foundImage = false
+ for part in parts:
+ if part.name == "image":
+ imagePart = part
+ foundImage = true
+ break
+
+ if not foundImage or imagePart.filename.strip().len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let originalName = sanitizeFilename(baseFilename(imagePart.filename))
+ if originalName.len == 0 or not isAllowedImage(originalName):
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "unsupported image type.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let extensionStart = originalName.rfind('.')
+ let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii()
+
+ let epoch = int(getTime().toUnix())
+ let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension
+ let imagePath = uploadsDir() / storedName
+
+ writeFile(imagePath, imagePart.content)
+
+ let markdownSnippet = "![](uploads/" & storedName & ")"
+ let html = renderTemplate(
+ partialsDir() / "upload_result.html",
+ [
+ ("{{ filename }}", htmlEscape(storedName)),
+ ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)),
+ ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# router table
+proc route(req: Request) {.async.} =
+ let path = req.url.path
+
+ if req.reqMethod == HttpGet and path == "/":
+ await respondFile(req, templatesDir() / "index.html")
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/static/"):
+ let relativePath = decodeUrl(path[8 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, staticDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/uploads/"):
+ let relativePath = decodeUrl(path[9 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, uploadsDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/download/"):
+ let relativePath = decodeUrl(path[10 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(
+ req,
+ generatedDir() / relativePath,
+ asAttachment = true,
+ attachmentName = relativePath,
+ )
+ return
+
+ if req.reqMethod == HttpPost and path == "/convert":
+ await handleConvert(req)
+ return
+
+ if req.reqMethod == HttpPost and path == "/upload-image":
+ await handleUploadImage(req)
+ return
+
+ await respondText(req, Http404, "Not found")
+
+# server boot, then we let htmx do htmx things.
+when isMainModule:
+ randomize()
+
+ if not dirExists(generatedDir()):
+ createDir(generatedDir())
+ if not dirExists(uploadsDir()):
+ createDir(uploadsDir())
+
+ let server = newAsyncHttpServer()
+ echo "listening on http://localhost:5001"
+ waitFor server.serve(Port(5001), route) \ No newline at end of file
diff --git a/src/__legacy_src/backend_compat.nim b/src/__legacy_src/backend_compat.nim
new file mode 100644
index 0000000..8b64793
--- /dev/null
+++ b/src/__legacy_src/backend_compat.nim
@@ -0,0 +1,525 @@
+import
+ std/[
+ asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri,
+ random,
+ ]
+
+# tiny backend in nimlang, may be stupid, but this was fun
+
+const
+ AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"]
+ ValidPaperSizes = [
+ "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper",
+ "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper",
+ "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper",
+ "ledgerpaper", "tabloid", "statement", "flsa",
+ ]
+ ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"]
+ ValidLineSpacings = ["1", "1.5", "2"]
+ CustomPaperDimensions = [
+ ("tabloid", "11in", "17in"),
+ ("statement", "5.5in", "8.5in"),
+ ("flsa", "8.5in", "13in"),
+ ]
+
+const AppName = "likha-pdf"
+
+proc lookupCustomPaper(name: string): tuple[width: string, height: string] =
+ for (paperName, w, h) in CustomPaperDimensions:
+ if paperName == name:
+ return (width: w, height: h)
+ (width: "", height: "")
+
+proc baseDir(): string {.inline.} =
+ getAppDir()
+
+proc generatedDir(): string {.inline.} =
+ baseDir() / "generated"
+
+proc uploadsDir(): string {.inline.} =
+ baseDir() / "uploads"
+
+proc latexTemplatePath(): string {.inline.} =
+ baseDir() / "latex" / "template.tex"
+
+proc templatesDir(): string {.inline.} =
+ baseDir() / "templates"
+
+proc partialsDir(): string {.inline.} =
+ templatesDir() / "partials"
+
+proc staticDir(): string {.inline.} =
+ baseDir() / "static"
+
+type MultipartPart = object
+ name: string
+ filename: string
+ contentType: string
+ content: string
+
+# helpers
+proc htmlEscape(value: string): string =
+ result = value
+ result = result.replace("&", "&amp;")
+ result = result.replace("<", "&lt;")
+ result = result.replace(">", "&gt;")
+ result = result.replace("\"", "&quot;")
+ result = result.replace("'", "&#39;")
+
+proc randomHex(length: int): string =
+ const hexChars = "0123456789abcdef"
+ result = newStringOfCap(length)
+ for _ in 0 ..< length:
+ result.add(hexChars[rand(15)])
+
+proc renderTemplate(
+ filePath: string, replacements: openArray[(string, string)]
+): string =
+ result = readFile(filePath)
+ for (token, replacement) in replacements:
+ result = result.replace(token, replacement)
+
+proc decodeFormComponent(value: string): string =
+ decodeUrl(value.replace("+", " "))
+
+proc parseUrlEncoded(body: string): Table[string, string] =
+ result = initTable[string, string]()
+ if body.len == 0:
+ return
+
+ for pair in body.split("&"):
+ if pair.len == 0:
+ continue
+ let separator = pair.find('=')
+ if separator < 0:
+ result[decodeFormComponent(pair)] = ""
+ else:
+ let key = decodeFormComponent(pair[0 ..< separator])
+ let value = decodeFormComponent(pair[separator + 1 .. ^1])
+ result[key] = value
+
+# "options" are optional, defaults are forever.
+proc pickOption(value: string, fallback: string, options: openArray[string]): string =
+ for option in options:
+ if option == value:
+ return value
+ fallback
+
+proc sanitizeFilename(filename: string): string =
+ result = newStringOfCap(filename.len)
+ for ch in filename:
+ if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or
+ (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}):
+ result.add(ch)
+ elif ch == ' ':
+ result.add('_')
+
+proc baseFilename(value: string): string =
+ var normalized = value.replace("\\", "/")
+ let index = normalized.rfind('/')
+ if index >= 0 and index < normalized.high:
+ normalized = normalized[index + 1 .. ^1]
+ elif index == normalized.high:
+ normalized = ""
+ normalized
+
+proc isAllowedImage(filename: string): bool =
+ let dot = filename.rfind('.')
+ if dot < 1 or dot == filename.high:
+ return false
+ let extension = filename[dot + 1 .. ^1].toLowerAscii()
+ for allowed in AllowedImageExtensions:
+ if extension == allowed:
+ return true
+ false
+
+proc tailText(value: string, maxLen: int = 1200): string =
+ if value.len <= maxLen:
+ return value
+ value[value.len - maxLen .. ^1]
+
+proc extractBoundary(contentType: string): string =
+ for part in contentType.split(';'):
+ let token = part.strip()
+ if token.toLowerAscii().startsWith("boundary="):
+ return token[9 .. ^1].strip(chars = {'\"', '\''})
+ ""
+
+proc stripTrailingCrlf(value: string): string =
+ result = value
+ if result.len >= 2 and result.endsWith("\r\n"):
+ result.setLen(result.len - 2)
+
+# hand-rolled multipart parsing, yes i am aware that this is "eh"
+proc parseMultipart(body: string, boundary: string): seq[MultipartPart] =
+ let delimiter = "--" & boundary
+ for rawChunk in body.split(delimiter):
+ var chunk = rawChunk
+ if chunk.len == 0:
+ continue
+ if chunk == "--" or chunk == "--\r\n":
+ continue
+ if chunk.startsWith("\r\n"):
+ chunk = chunk[2 .. ^1]
+
+ chunk = stripTrailingCrlf(chunk)
+
+ if chunk.len == 2 and chunk == "--":
+ continue
+
+ let splitIndex = chunk.find("\r\n\r\n")
+ if splitIndex < 0:
+ continue
+
+ let headerBlock = chunk[0 ..< splitIndex]
+ var content = chunk[splitIndex + 4 .. ^1]
+ content = stripTrailingCrlf(content)
+
+ var name = ""
+ var filename = ""
+ var contentType = "application/octet-stream"
+
+ for line in headerBlock.split("\r\n"):
+ let separator = line.find(':')
+ if separator <= 0:
+ continue
+ let headerName = line[0 ..< separator].strip().toLowerAscii()
+ let headerValue = line[separator + 1 .. ^1].strip()
+
+ if headerName == "content-disposition":
+ for part in headerValue.split(';'):
+ let token = part.strip()
+ if token.startsWith("name="):
+ name = token[5 .. ^1].strip(chars = {'\"', '\''})
+ elif token.startsWith("filename="):
+ filename = token[9 .. ^1].strip(chars = {'\"', '\''})
+ elif headerName == "content-type":
+ contentType = headerValue
+
+ if name.len > 0:
+ result.add(
+ MultipartPart(
+ name: name, filename: filename, contentType: contentType, content: content
+ )
+ )
+
+proc isSafeRelativePath(pathPart: string): bool =
+ pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and
+ not pathPart.startsWith("/")
+
+proc fileContentType(filePath: string): string =
+ let lowered = filePath.toLowerAscii()
+ if lowered.endsWith(".js"):
+ return "application/javascript; charset=utf-8"
+ if lowered.endsWith(".css"):
+ return "text/css; charset=utf-8"
+ if lowered.endsWith(".html"):
+ return "text/html; charset=utf-8"
+ if lowered.endsWith(".png"):
+ return "image/png"
+ if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"):
+ return "image/jpeg"
+ if lowered.endsWith(".gif"):
+ return "image/gif"
+ if lowered.endsWith(".webp"):
+ return "image/webp"
+ if lowered.endsWith(".svg"):
+ return "image/svg+xml"
+ if lowered.endsWith(".pdf"):
+ return "application/pdf"
+ "application/octet-stream"
+
+# response wrappers
+proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondText(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondFile(
+ req: Request,
+ filePath: string,
+ asAttachment: bool = false,
+ attachmentName: string = "",
+) {.async.} =
+ if not fileExists(filePath):
+ await respondText(req, Http404, "Not found")
+ return
+
+ var headers = newHttpHeaders()
+ headers["Content-Type"] = fileContentType(filePath)
+ if asAttachment and attachmentName.len > 0:
+ headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\""
+
+ await req.respond(Http200, readFile(filePath), headers)
+
+# pandoc does the heavy lifting
+proc runPandoc(
+ sourceMarkdown: string,
+ outputPath: string,
+ paperSize: string,
+ margin: string,
+ mainFont: string,
+ lineSpacing: string,
+ showPageNumbers: bool,
+): tuple[ok: bool, output: string, missingPandoc: bool] =
+ let tempDir = getTempDir() / (AppName & "-" & randomHex(10))
+ createDir(tempDir)
+ let tempMarkdownPath = tempDir / "source.md"
+ let tempRawPath = tempDir / "raw.md"
+
+ try:
+ # write raw markdown first
+ writeFile(tempRawPath, sourceMarkdown)
+
+ # preprocess markdown: convert to ascii with transliteration and normalize quotes
+ let iconvCmd =
+ "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) &
+ " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath)
+ let (_, iconvExitCode) = execCmdEx(iconvCmd)
+
+ if iconvExitCode != 0:
+ # if preprocessing fails, fall back to original content
+ writeFile(tempMarkdownPath, sourceMarkdown)
+
+ var args = @[
+ tempMarkdownPath,
+ "--from",
+ "markdown+emoji+hard_line_breaks",
+ "--pdf-engine=lualatex",
+ "--template",
+ latexTemplatePath(),
+ "-V",
+ "margin=" & margin,
+ "-V",
+ "mainfont=" & mainFont,
+ "-V",
+ "linespacing=" & lineSpacing,
+ "--resource-path",
+ baseDir() & ":" & uploadsDir() & ":" & tempDir,
+ "-o",
+ outputPath,
+ ]
+
+ let dims = lookupCustomPaper(paperSize)
+ if dims.width.len > 0:
+ args.add("-V")
+ args.add("paperwidth=" & dims.width)
+ args.add("-V")
+ args.add("paperheight=" & dims.height)
+ else:
+ args.add("-V")
+ args.add("papersize=" & paperSize)
+
+ if not showPageNumbers:
+ args.add("-V")
+ args.add("hidepages=true")
+
+ var process: Process
+ try:
+ process =
+ startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut})
+ except OSError:
+ return (
+ ok: false,
+ output: "Pandoc is not installed or not in PATH.",
+ missingPandoc: true,
+ )
+
+ let output = process.outputStream.readAll()
+ let exitCode = process.waitForExit()
+ process.close()
+
+ if exitCode == 0:
+ return (ok: true, output: "", missingPandoc: false)
+ return (ok: false, output: output, missingPandoc: false)
+ finally:
+ try:
+ if fileExists(tempRawPath):
+ removeFile(tempRawPath)
+ if fileExists(tempMarkdownPath):
+ removeFile(tempMarkdownPath)
+ if dirExists(tempDir):
+ removeDir(tempDir)
+ except OSError:
+ discard
+
+# app endpoint: strict inputs, loud errors.
+proc handleConvert(req: Request) {.async.} =
+ let formData = parseUrlEncoded(req.body)
+ let markdown = formData.getOrDefault("markdown", "").strip()
+
+ if markdown.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")]
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let paperSize =
+ pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes)
+ let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins)
+
+ var mainFontFamily = formData.getOrDefault("main_font", "serif")
+ if mainFontFamily != "serif" and mainFontFamily != "sans":
+ mainFontFamily = "serif"
+
+ let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella"
+ let lineSpacing =
+ pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings)
+ let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on"
+ let epoch = int(getTime().toUnix())
+ let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf"
+ let outputPath = generatedDir() / outputName
+
+ let conversion = runPandoc(
+ markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers
+ )
+
+ if not conversion.ok:
+ let message =
+ if conversion.missingPandoc:
+ conversion.output
+ else:
+ let stderr = conversion.output.strip()
+ if stderr.len > 0:
+ tailText(stderr)
+ else:
+ "PDF conversion failed."
+
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))]
+ )
+ let code = if conversion.missingPandoc: Http500 else: Http400
+ await respondHtml(req, code, html)
+ return
+
+ let html = renderTemplate(
+ partialsDir() / "result.html",
+ [
+ ("{{ filename }}", htmlEscape(outputName)),
+ ("{{ download_url }}", "/download/" & encodeUrl(outputName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# upload endpoint. accepts image, returns markdown snippet
+proc handleUploadImage(req: Request) {.async.} =
+ let contentType = req.headers.getOrDefault("Content-Type")
+ let boundary = extractBoundary(contentType)
+
+ if boundary.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let parts = parseMultipart(req.body, boundary)
+ var imagePart: MultipartPart
+ var foundImage = false
+ for part in parts:
+ if part.name == "image":
+ imagePart = part
+ foundImage = true
+ break
+
+ if not foundImage or imagePart.filename.strip().len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let originalName = sanitizeFilename(baseFilename(imagePart.filename))
+ if originalName.len == 0 or not isAllowedImage(originalName):
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "unsupported image type.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let extensionStart = originalName.rfind('.')
+ let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii()
+
+ let epoch = int(getTime().toUnix())
+ let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension
+ let imagePath = uploadsDir() / storedName
+
+ writeFile(imagePath, imagePart.content)
+
+ let markdownSnippet = "![](uploads/" & storedName & ")"
+ let html = renderTemplate(
+ partialsDir() / "upload_result.html",
+ [
+ ("{{ filename }}", htmlEscape(storedName)),
+ ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)),
+ ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# router table
+proc route(req: Request) {.async.} =
+ let path = req.url.path
+
+ if req.reqMethod == HttpGet and path == "/":
+ await respondFile(req, templatesDir() / "index.html")
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/static/"):
+ let relativePath = decodeUrl(path[8 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, staticDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/uploads/"):
+ let relativePath = decodeUrl(path[9 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, uploadsDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/download/"):
+ let relativePath = decodeUrl(path[10 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(
+ req,
+ generatedDir() / relativePath,
+ asAttachment = true,
+ attachmentName = relativePath,
+ )
+ return
+
+ if req.reqMethod == HttpPost and path == "/convert":
+ await handleConvert(req)
+ return
+
+ if req.reqMethod == HttpPost and path == "/upload-image":
+ await handleUploadImage(req)
+ return
+
+ await respondText(req, Http404, "Not found")
+
+# server boot, then we let htmx do htmx things.
+when isMainModule:
+ randomize()
+
+ if not dirExists(generatedDir()):
+ createDir(generatedDir())
+ if not dirExists(uploadsDir()):
+ createDir(uploadsDir())
+
+ let server = newAsyncHttpServer()
+ echo "listening on http://localhost:5001"
+ waitFor server.serve(Port(5001), route) \ No newline at end of file
diff --git a/src/__legacy_src/server.nim b/src/__legacy_src/server.nim
new file mode 100644
index 0000000..8b64793
--- /dev/null
+++ b/src/__legacy_src/server.nim
@@ -0,0 +1,525 @@
+import
+ std/[
+ asynchttpserver, asyncdispatch, os, osproc, streams, strutils, tables, times, uri,
+ random,
+ ]
+
+# tiny backend in nimlang, may be stupid, but this was fun
+
+const
+ AllowedImageExtensions = ["png", "jpg", "jpeg", "gif", "webp", "svg"]
+ ValidPaperSizes = [
+ "a0paper", "a1paper", "a2paper", "a3paper", "a4paper", "a5paper", "a6paper",
+ "b0paper", "b1paper", "b2paper", "b3paper", "b4paper", "b5paper", "b6paper",
+ "c4paper", "c5paper", "c6paper", "letterpaper", "legalpaper", "executivepaper",
+ "ledgerpaper", "tabloid", "statement", "flsa",
+ ]
+ ValidMargins = ["0.25in", "0.5in", "0.75in", "1in", "1.25in", "1.5in", "1.75in"]
+ ValidLineSpacings = ["1", "1.5", "2"]
+ CustomPaperDimensions = [
+ ("tabloid", "11in", "17in"),
+ ("statement", "5.5in", "8.5in"),
+ ("flsa", "8.5in", "13in"),
+ ]
+
+const AppName = "likha-pdf"
+
+proc lookupCustomPaper(name: string): tuple[width: string, height: string] =
+ for (paperName, w, h) in CustomPaperDimensions:
+ if paperName == name:
+ return (width: w, height: h)
+ (width: "", height: "")
+
+proc baseDir(): string {.inline.} =
+ getAppDir()
+
+proc generatedDir(): string {.inline.} =
+ baseDir() / "generated"
+
+proc uploadsDir(): string {.inline.} =
+ baseDir() / "uploads"
+
+proc latexTemplatePath(): string {.inline.} =
+ baseDir() / "latex" / "template.tex"
+
+proc templatesDir(): string {.inline.} =
+ baseDir() / "templates"
+
+proc partialsDir(): string {.inline.} =
+ templatesDir() / "partials"
+
+proc staticDir(): string {.inline.} =
+ baseDir() / "static"
+
+type MultipartPart = object
+ name: string
+ filename: string
+ contentType: string
+ content: string
+
+# helpers
+proc htmlEscape(value: string): string =
+ result = value
+ result = result.replace("&", "&amp;")
+ result = result.replace("<", "&lt;")
+ result = result.replace(">", "&gt;")
+ result = result.replace("\"", "&quot;")
+ result = result.replace("'", "&#39;")
+
+proc randomHex(length: int): string =
+ const hexChars = "0123456789abcdef"
+ result = newStringOfCap(length)
+ for _ in 0 ..< length:
+ result.add(hexChars[rand(15)])
+
+proc renderTemplate(
+ filePath: string, replacements: openArray[(string, string)]
+): string =
+ result = readFile(filePath)
+ for (token, replacement) in replacements:
+ result = result.replace(token, replacement)
+
+proc decodeFormComponent(value: string): string =
+ decodeUrl(value.replace("+", " "))
+
+proc parseUrlEncoded(body: string): Table[string, string] =
+ result = initTable[string, string]()
+ if body.len == 0:
+ return
+
+ for pair in body.split("&"):
+ if pair.len == 0:
+ continue
+ let separator = pair.find('=')
+ if separator < 0:
+ result[decodeFormComponent(pair)] = ""
+ else:
+ let key = decodeFormComponent(pair[0 ..< separator])
+ let value = decodeFormComponent(pair[separator + 1 .. ^1])
+ result[key] = value
+
+# "options" are optional, defaults are forever.
+proc pickOption(value: string, fallback: string, options: openArray[string]): string =
+ for option in options:
+ if option == value:
+ return value
+ fallback
+
+proc sanitizeFilename(filename: string): string =
+ result = newStringOfCap(filename.len)
+ for ch in filename:
+ if (ch >= 'a' and ch <= 'z') or (ch >= 'A' and ch <= 'Z') or
+ (ch >= '0' and ch <= '9') or (ch in {'-', '_', '.'}):
+ result.add(ch)
+ elif ch == ' ':
+ result.add('_')
+
+proc baseFilename(value: string): string =
+ var normalized = value.replace("\\", "/")
+ let index = normalized.rfind('/')
+ if index >= 0 and index < normalized.high:
+ normalized = normalized[index + 1 .. ^1]
+ elif index == normalized.high:
+ normalized = ""
+ normalized
+
+proc isAllowedImage(filename: string): bool =
+ let dot = filename.rfind('.')
+ if dot < 1 or dot == filename.high:
+ return false
+ let extension = filename[dot + 1 .. ^1].toLowerAscii()
+ for allowed in AllowedImageExtensions:
+ if extension == allowed:
+ return true
+ false
+
+proc tailText(value: string, maxLen: int = 1200): string =
+ if value.len <= maxLen:
+ return value
+ value[value.len - maxLen .. ^1]
+
+proc extractBoundary(contentType: string): string =
+ for part in contentType.split(';'):
+ let token = part.strip()
+ if token.toLowerAscii().startsWith("boundary="):
+ return token[9 .. ^1].strip(chars = {'\"', '\''})
+ ""
+
+proc stripTrailingCrlf(value: string): string =
+ result = value
+ if result.len >= 2 and result.endsWith("\r\n"):
+ result.setLen(result.len - 2)
+
+# hand-rolled multipart parsing, yes i am aware that this is "eh"
+proc parseMultipart(body: string, boundary: string): seq[MultipartPart] =
+ let delimiter = "--" & boundary
+ for rawChunk in body.split(delimiter):
+ var chunk = rawChunk
+ if chunk.len == 0:
+ continue
+ if chunk == "--" or chunk == "--\r\n":
+ continue
+ if chunk.startsWith("\r\n"):
+ chunk = chunk[2 .. ^1]
+
+ chunk = stripTrailingCrlf(chunk)
+
+ if chunk.len == 2 and chunk == "--":
+ continue
+
+ let splitIndex = chunk.find("\r\n\r\n")
+ if splitIndex < 0:
+ continue
+
+ let headerBlock = chunk[0 ..< splitIndex]
+ var content = chunk[splitIndex + 4 .. ^1]
+ content = stripTrailingCrlf(content)
+
+ var name = ""
+ var filename = ""
+ var contentType = "application/octet-stream"
+
+ for line in headerBlock.split("\r\n"):
+ let separator = line.find(':')
+ if separator <= 0:
+ continue
+ let headerName = line[0 ..< separator].strip().toLowerAscii()
+ let headerValue = line[separator + 1 .. ^1].strip()
+
+ if headerName == "content-disposition":
+ for part in headerValue.split(';'):
+ let token = part.strip()
+ if token.startsWith("name="):
+ name = token[5 .. ^1].strip(chars = {'\"', '\''})
+ elif token.startsWith("filename="):
+ filename = token[9 .. ^1].strip(chars = {'\"', '\''})
+ elif headerName == "content-type":
+ contentType = headerValue
+
+ if name.len > 0:
+ result.add(
+ MultipartPart(
+ name: name, filename: filename, contentType: contentType, content: content
+ )
+ )
+
+proc isSafeRelativePath(pathPart: string): bool =
+ pathPart.len > 0 and not pathPart.contains("..") and not pathPart.contains('\\') and
+ not pathPart.startsWith("/")
+
+proc fileContentType(filePath: string): string =
+ let lowered = filePath.toLowerAscii()
+ if lowered.endsWith(".js"):
+ return "application/javascript; charset=utf-8"
+ if lowered.endsWith(".css"):
+ return "text/css; charset=utf-8"
+ if lowered.endsWith(".html"):
+ return "text/html; charset=utf-8"
+ if lowered.endsWith(".png"):
+ return "image/png"
+ if lowered.endsWith(".jpg") or lowered.endsWith(".jpeg"):
+ return "image/jpeg"
+ if lowered.endsWith(".gif"):
+ return "image/gif"
+ if lowered.endsWith(".webp"):
+ return "image/webp"
+ if lowered.endsWith(".svg"):
+ return "image/svg+xml"
+ if lowered.endsWith(".pdf"):
+ return "application/pdf"
+ "application/octet-stream"
+
+# response wrappers
+proc respondHtml(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/html; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondText(req: Request, code: HttpCode, content: string) {.async.} =
+ let headers = newHttpHeaders({"Content-Type": "text/plain; charset=utf-8"})
+ await req.respond(code, content, headers)
+
+proc respondFile(
+ req: Request,
+ filePath: string,
+ asAttachment: bool = false,
+ attachmentName: string = "",
+) {.async.} =
+ if not fileExists(filePath):
+ await respondText(req, Http404, "Not found")
+ return
+
+ var headers = newHttpHeaders()
+ headers["Content-Type"] = fileContentType(filePath)
+ if asAttachment and attachmentName.len > 0:
+ headers["Content-Disposition"] = "attachment; filename=\"" & attachmentName & "\""
+
+ await req.respond(Http200, readFile(filePath), headers)
+
+# pandoc does the heavy lifting
+proc runPandoc(
+ sourceMarkdown: string,
+ outputPath: string,
+ paperSize: string,
+ margin: string,
+ mainFont: string,
+ lineSpacing: string,
+ showPageNumbers: bool,
+): tuple[ok: bool, output: string, missingPandoc: bool] =
+ let tempDir = getTempDir() / (AppName & "-" & randomHex(10))
+ createDir(tempDir)
+ let tempMarkdownPath = tempDir / "source.md"
+ let tempRawPath = tempDir / "raw.md"
+
+ try:
+ # write raw markdown first
+ writeFile(tempRawPath, sourceMarkdown)
+
+ # preprocess markdown: convert to ascii with transliteration and normalize quotes
+ let iconvCmd =
+ "iconv -c -t ASCII//TRANSLIT " & quoteShell(tempRawPath) &
+ " | sed 's/'\\''/'/g; s/\"\"/\"/g' > " & quoteShell(tempMarkdownPath)
+ let (_, iconvExitCode) = execCmdEx(iconvCmd)
+
+ if iconvExitCode != 0:
+ # if preprocessing fails, fall back to original content
+ writeFile(tempMarkdownPath, sourceMarkdown)
+
+ var args = @[
+ tempMarkdownPath,
+ "--from",
+ "markdown+emoji+hard_line_breaks",
+ "--pdf-engine=lualatex",
+ "--template",
+ latexTemplatePath(),
+ "-V",
+ "margin=" & margin,
+ "-V",
+ "mainfont=" & mainFont,
+ "-V",
+ "linespacing=" & lineSpacing,
+ "--resource-path",
+ baseDir() & ":" & uploadsDir() & ":" & tempDir,
+ "-o",
+ outputPath,
+ ]
+
+ let dims = lookupCustomPaper(paperSize)
+ if dims.width.len > 0:
+ args.add("-V")
+ args.add("paperwidth=" & dims.width)
+ args.add("-V")
+ args.add("paperheight=" & dims.height)
+ else:
+ args.add("-V")
+ args.add("papersize=" & paperSize)
+
+ if not showPageNumbers:
+ args.add("-V")
+ args.add("hidepages=true")
+
+ var process: Process
+ try:
+ process =
+ startProcess("pandoc", args = args, options = {poUsePath, poStdErrToStdOut})
+ except OSError:
+ return (
+ ok: false,
+ output: "Pandoc is not installed or not in PATH.",
+ missingPandoc: true,
+ )
+
+ let output = process.outputStream.readAll()
+ let exitCode = process.waitForExit()
+ process.close()
+
+ if exitCode == 0:
+ return (ok: true, output: "", missingPandoc: false)
+ return (ok: false, output: output, missingPandoc: false)
+ finally:
+ try:
+ if fileExists(tempRawPath):
+ removeFile(tempRawPath)
+ if fileExists(tempMarkdownPath):
+ removeFile(tempMarkdownPath)
+ if dirExists(tempDir):
+ removeDir(tempDir)
+ except OSError:
+ discard
+
+# app endpoint: strict inputs, loud errors.
+proc handleConvert(req: Request) {.async.} =
+ let formData = parseUrlEncoded(req.body)
+ let markdown = formData.getOrDefault("markdown", "").strip()
+
+ if markdown.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", "Markdown content is required.")]
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let paperSize =
+ pickOption(formData.getOrDefault("paper_size", ""), "a4paper", ValidPaperSizes)
+ let margin = pickOption(formData.getOrDefault("margin", ""), "1in", ValidMargins)
+
+ var mainFontFamily = formData.getOrDefault("main_font", "serif")
+ if mainFontFamily != "serif" and mainFontFamily != "sans":
+ mainFontFamily = "serif"
+
+ let mainFont = if mainFontFamily == "sans": "TeX Gyre Heros" else: "TeX Gyre Pagella"
+ let lineSpacing =
+ pickOption(formData.getOrDefault("line_spacing", ""), "1", ValidLineSpacings)
+ let showPageNumbers = formData.getOrDefault("page_numbers", "") == "on"
+ let epoch = int(getTime().toUnix())
+ let outputName = AppName & "_" & $epoch & "_" & randomHex(32) & ".pdf"
+ let outputPath = generatedDir() / outputName
+
+ let conversion = runPandoc(
+ markdown, outputPath, paperSize, margin, mainFont, lineSpacing, showPageNumbers
+ )
+
+ if not conversion.ok:
+ let message =
+ if conversion.missingPandoc:
+ conversion.output
+ else:
+ let stderr = conversion.output.strip()
+ if stderr.len > 0:
+ tailText(stderr)
+ else:
+ "PDF conversion failed."
+
+ let html = renderTemplate(
+ partialsDir() / "error.html", [("{{ message }}", htmlEscape(message))]
+ )
+ let code = if conversion.missingPandoc: Http500 else: Http400
+ await respondHtml(req, code, html)
+ return
+
+ let html = renderTemplate(
+ partialsDir() / "result.html",
+ [
+ ("{{ filename }}", htmlEscape(outputName)),
+ ("{{ download_url }}", "/download/" & encodeUrl(outputName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# upload endpoint. accepts image, returns markdown snippet
+proc handleUploadImage(req: Request) {.async.} =
+ let contentType = req.headers.getOrDefault("Content-Type")
+ let boundary = extractBoundary(contentType)
+
+ if boundary.len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let parts = parseMultipart(req.body, boundary)
+ var imagePart: MultipartPart
+ var foundImage = false
+ for part in parts:
+ if part.name == "image":
+ imagePart = part
+ foundImage = true
+ break
+
+ if not foundImage or imagePart.filename.strip().len == 0:
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "image file is required.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let originalName = sanitizeFilename(baseFilename(imagePart.filename))
+ if originalName.len == 0 or not isAllowedImage(originalName):
+ let html = renderTemplate(
+ partialsDir() / "upload_error.html",
+ [("{{ message }}", "unsupported image type.")],
+ )
+ await respondHtml(req, Http400, html)
+ return
+
+ let extensionStart = originalName.rfind('.')
+ let extension = originalName[extensionStart + 1 .. ^1].toLowerAscii()
+
+ let epoch = int(getTime().toUnix())
+ let storedName = "img_" & $epoch & "_" & randomHex(32) & "." & extension
+ let imagePath = uploadsDir() / storedName
+
+ writeFile(imagePath, imagePart.content)
+
+ let markdownSnippet = "![](uploads/" & storedName & ")"
+ let html = renderTemplate(
+ partialsDir() / "upload_result.html",
+ [
+ ("{{ filename }}", htmlEscape(storedName)),
+ ("{{ markdown_snippet }}", htmlEscape(markdownSnippet)),
+ ("{{ preview_url }}", "/uploads/" & encodeUrl(storedName)),
+ ],
+ )
+ await respondHtml(req, Http200, html)
+
+# router table
+proc route(req: Request) {.async.} =
+ let path = req.url.path
+
+ if req.reqMethod == HttpGet and path == "/":
+ await respondFile(req, templatesDir() / "index.html")
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/static/"):
+ let relativePath = decodeUrl(path[8 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, staticDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/uploads/"):
+ let relativePath = decodeUrl(path[9 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(req, uploadsDir() / relativePath)
+ return
+
+ if req.reqMethod == HttpGet and path.startsWith("/download/"):
+ let relativePath = decodeUrl(path[10 .. ^1])
+ if not isSafeRelativePath(relativePath):
+ await respondText(req, Http400, "Invalid path")
+ return
+ await respondFile(
+ req,
+ generatedDir() / relativePath,
+ asAttachment = true,
+ attachmentName = relativePath,
+ )
+ return
+
+ if req.reqMethod == HttpPost and path == "/convert":
+ await handleConvert(req)
+ return
+
+ if req.reqMethod == HttpPost and path == "/upload-image":
+ await handleUploadImage(req)
+ return
+
+ await respondText(req, Http404, "Not found")
+
+# server boot, then we let htmx do htmx things.
+when isMainModule:
+ randomize()
+
+ if not dirExists(generatedDir()):
+ createDir(generatedDir())
+ if not dirExists(uploadsDir()):
+ createDir(uploadsDir())
+
+ let server = newAsyncHttpServer()
+ echo "listening on http://localhost:5001"
+ waitFor server.serve(Port(5001), route) \ No newline at end of file
diff --git a/src/app.py b/src/app.py
index 88deef4..89666ad 100644
--- a/src/app.py
+++ b/src/app.py
@@ -1,35 +1,37 @@
#!/usr/bin/env python3
# likha-pdf — markdown to pdf, no latex required
-# converts markdown to html, then html to pdf via weasyprint
-# falls back to reportlab if weasyprint chokes — a pdf is always produced
+# production-friendly flask app with weasyprint + reportlab fallback
+import logging
import os
-import re
import secrets
import time
+from pathlib import Path, PurePosixPath
from flask import (
Flask,
+ Response,
+ current_app,
request,
send_from_directory,
- render_template_string,
abort,
)
from markupsafe import escape
from markdown import markdown
-from pygments.formatters import HtmlFormatter
from weasyprint import HTML
+from werkzeug.middleware.proxy_fix import ProxyFix
APP_NAME = "likha-pdf"
-PORT = 5001
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 5001
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-GENERATED_DIR = os.path.join(BASE_DIR, "generated")
-UPLOADS_DIR = os.path.join(BASE_DIR, "uploads")
-TEMPLATES_DIR = os.path.join(BASE_DIR, "templates")
-PARTIALS_DIR = os.path.join(TEMPLATES_DIR, "partials")
-STATIC_DIR = os.path.join(BASE_DIR, "static")
+BASE_DIR = Path(__file__).resolve().parent
+GENERATED_DIR = BASE_DIR / "generated"
+UPLOADS_DIR = BASE_DIR / "uploads"
+TEMPLATES_DIR = BASE_DIR / "templates"
+PARTIALS_DIR = TEMPLATES_DIR / "partials"
+STATIC_DIR = BASE_DIR / "static"
ALLOWED_IMAGE_EXTS = {"png", "jpg", "jpeg", "gif", "webp", "svg"}
@@ -93,16 +95,20 @@ MARKDOWN_EXT_CONFIG = {
},
}
-app = Flask(
- __name__,
- template_folder=TEMPLATES_DIR,
- static_folder=STATIC_DIR,
- static_url_path="/static",
-)
-app.config["MAX_CONTENT_LENGTH"] = 64 * 1024 * 1024 # 64 MB
-
# helpers
+def env_bool(name, default=False):
+ raw = os.getenv(name)
+ if raw is None:
+ return default
+ return raw.strip().lower() in {"1", "true", "yes", "on"}
+
+
+def ensure_runtime_dirs():
+ GENERATED_DIR.mkdir(parents=True, exist_ok=True)
+ UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
+
+
def random_hex(length=32):
return secrets.token_hex(length // 2)
@@ -132,19 +138,15 @@ def is_allowed_image(filename):
def is_safe_relative_path(path_part):
- return (
- bool(path_part)
- and ".." not in path_part
- and "\\" not in path_part
- and not path_part.startswith("/")
- )
+ if not path_part or "\\" in path_part:
+ return False
+ safe_path = PurePosixPath(path_part)
+ return not safe_path.is_absolute() and ".." not in safe_path.parts
def read_partial(name, replacements=None):
"""read a partial html template and apply replacements"""
- path = os.path.join(PARTIALS_DIR, name)
- with open(path, "r", encoding="utf-8") as f:
- content = f.read()
+ content = (PARTIALS_DIR / name).read_text(encoding="utf-8")
if replacements:
for token, value in replacements.items():
content = content.replace(token, value)
@@ -312,7 +314,7 @@ def convert_with_weasyprint(full_html, output_path):
try:
doc = HTML(
string=full_html,
- base_url=BASE_DIR,
+ base_url=str(BASE_DIR),
)
doc.write_pdf(output_path)
return True, ""
@@ -452,6 +454,7 @@ def generate_pdf(source_markdown, output_path, paper_size, margin,
# weasyprint failed — fall back to reportlab
try:
+ current_app.logger.warning("weasyprint failed, using reportlab fallback: %s", err)
convert_with_reportlab(
source_markdown, output_path,
paper_size, margin, font_family, line_spacing,
@@ -461,108 +464,145 @@ def generate_pdf(source_markdown, output_path, paper_size, margin,
return False, f"weasyprint: {err} | reportlab: {fallback_err}"
-# routes
-@app.route("/")
-def index():
- index_path = os.path.join(TEMPLATES_DIR, "index.html")
- with open(index_path, "r", encoding="utf-8") as f:
- return f.read()
+def create_app():
+ ensure_runtime_dirs()
+ app = Flask(
+ __name__,
+ template_folder=str(TEMPLATES_DIR),
+ static_folder=str(STATIC_DIR),
+ static_url_path="/static",
+ )
-@app.route("/convert", methods=["POST"])
-def convert():
- md = request.form.get("markdown", "").strip()
- if not md:
- return read_partial("error.html", {
- "{{ message }}": "Markdown content is required.",
- }), 400
+ app.config["MAX_CONTENT_LENGTH"] = int(os.getenv("MAX_CONTENT_LENGTH", str(64 * 1024 * 1024)))
- paper_size = pick_option(
- request.form.get("paper_size", ""), "letterpaper", VALID_PAPER_SIZES,
- )
- margin = pick_option(
- request.form.get("margin", ""), "1in", VALID_MARGINS,
- )
+ if env_bool("TRUST_PROXY", default=True):
+ app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_port=1)
- font_family = request.form.get("main_font", "serif")
- if font_family not in ("serif", "sans"):
- font_family = "serif"
+ log_level = os.getenv("LOG_LEVEL", "INFO").upper()
+ app.logger.setLevel(log_level)
- line_spacing = pick_option(
- request.form.get("line_spacing", ""), "1", VALID_LINE_SPACINGS,
- )
- show_page_numbers = request.form.get("page_numbers") == "on"
+ @app.after_request
+ def add_security_headers(resp):
+ resp.headers.setdefault("X-Content-Type-Options", "nosniff")
+ resp.headers.setdefault("X-Frame-Options", "DENY")
+ resp.headers.setdefault("Referrer-Policy", "no-referrer")
+ return resp
- epoch = int(time.time())
- output_name = f"{APP_NAME}_{epoch}_{random_hex()}.pdf"
- output_path = os.path.join(GENERATED_DIR, output_name)
+ @app.errorhandler(413)
+ def payload_too_large(_err):
+ return read_partial("upload_error.html", {
+ "{{ message }}": "request body too large.",
+ }), 413
+
+ @app.route("/healthz")
+ def healthz():
+ return Response("ok\n", mimetype="text/plain")
+
+ @app.route("/")
+ def index():
+ return send_from_directory(str(TEMPLATES_DIR), "index.html")
+
+ @app.route("/convert", methods=["POST"])
+ def convert():
+ md = request.form.get("markdown", "").strip()
+ if not md:
+ return read_partial("error.html", {
+ "{{ message }}": "Markdown content is required.",
+ }), 400
+
+ paper_size = pick_option(
+ request.form.get("paper_size", ""), "letterpaper", VALID_PAPER_SIZES,
+ )
+ margin = pick_option(
+ request.form.get("margin", ""), "1in", VALID_MARGINS,
+ )
- ok, err = generate_pdf(
- md, output_path,
- paper_size, margin, font_family, line_spacing, show_page_numbers,
- )
+ font_family = request.form.get("main_font", "serif")
+ if font_family not in ("serif", "sans"):
+ font_family = "serif"
- if not ok:
- return read_partial("error.html", {
- "{{ message }}": str(escape(tail_text(err))),
- }), 500
+ line_spacing = pick_option(
+ request.form.get("line_spacing", ""), "1", VALID_LINE_SPACINGS,
+ )
+ show_page_numbers = request.form.get("page_numbers") == "on"
+
+ output_name = f"{APP_NAME}_{int(time.time())}_{random_hex()}.pdf"
+ output_path = GENERATED_DIR / output_name
+
+ ok, err = generate_pdf(
+ md,
+ str(output_path),
+ paper_size,
+ margin,
+ font_family,
+ line_spacing,
+ show_page_numbers,
+ )
- return read_partial("result.html", {
- "{{ filename }}": str(escape(output_name)),
- "{{ download_url }}": f"/download/{output_name}",
- })
+ if not ok:
+ app.logger.error("pdf generation failed: %s", err)
+ return read_partial("error.html", {
+ "{{ message }}": str(escape(tail_text(err))),
+ }), 500
+
+ return read_partial("result.html", {
+ "{{ filename }}": str(escape(output_name)),
+ "{{ download_url }}": f"/download/{output_name}",
+ })
+
+ @app.route("/upload-image", methods=["POST"])
+ def upload_image():
+ uploaded = request.files.get("image")
+ if not uploaded or not uploaded.filename or not uploaded.filename.strip():
+ return read_partial("upload_error.html", {
+ "{{ message }}": "image file is required.",
+ }), 400
+
+ original = sanitize_filename(uploaded.filename)
+ if not original or not is_allowed_image(original):
+ return read_partial("upload_error.html", {
+ "{{ message }}": "unsupported image type.",
+ }), 400
+
+ ext = original.rsplit(".", 1)[-1].lower()
+ stored_name = f"img_{int(time.time())}_{random_hex()}.{ext}"
+ image_path = UPLOADS_DIR / stored_name
+ uploaded.save(str(image_path))
+
+ snippet = f"![](uploads/{stored_name})"
+ return read_partial("upload_result.html", {
+ "{{ filename }}": str(escape(stored_name)),
+ "{{ markdown_snippet }}": str(escape(snippet)),
+ "{{ preview_url }}": f"/uploads/{stored_name}",
+ })
+
+ @app.route("/uploads/<path:filename>")
+ def serve_upload(filename):
+ if not is_safe_relative_path(filename):
+ abort(400)
+ return send_from_directory(str(UPLOADS_DIR), filename, conditional=True)
+
+ @app.route("/download/<path:filename>")
+ def download(filename):
+ if not is_safe_relative_path(filename):
+ abort(400)
+ return send_from_directory(
+ str(GENERATED_DIR),
+ filename,
+ as_attachment=True,
+ download_name=filename,
+ conditional=True,
+ )
+ return app
-@app.route("/upload-image", methods=["POST"])
-def upload_image():
- uploaded = request.files.get("image")
- if not uploaded or not uploaded.filename or not uploaded.filename.strip():
- return read_partial("upload_error.html", {
- "{{ message }}": "image file is required.",
- }), 400
- original = sanitize_filename(uploaded.filename)
- if not original or not is_allowed_image(original):
- return read_partial("upload_error.html", {
- "{{ message }}": "unsupported image type.",
- }), 400
-
- ext = original.rsplit(".", 1)[-1].lower()
- epoch = int(time.time())
- stored_name = f"img_{epoch}_{random_hex()}.{ext}"
- image_path = os.path.join(UPLOADS_DIR, stored_name)
- uploaded.save(image_path)
-
- snippet = f"![](uploads/{stored_name})"
- return read_partial("upload_result.html", {
- "{{ filename }}": str(escape(stored_name)),
- "{{ markdown_snippet }}": str(escape(snippet)),
- "{{ preview_url }}": f"/uploads/{stored_name}",
- })
-
-
-@app.route("/uploads/<path:filename>")
-def serve_upload(filename):
- if not is_safe_relative_path(filename):
- abort(400)
- return send_from_directory(UPLOADS_DIR, filename)
-
-
-@app.route("/download/<path:filename>")
-def download(filename):
- if not is_safe_relative_path(filename):
- abort(400)
- return send_from_directory(
- GENERATED_DIR, filename,
- as_attachment=True,
- download_name=filename,
- )
+app = create_app()
-# main
if __name__ == "__main__":
- os.makedirs(GENERATED_DIR, exist_ok=True)
- os.makedirs(UPLOADS_DIR, exist_ok=True)
-
- print(f" {APP_NAME} listening on http://localhost:{PORT}")
- app.run(host="0.0.0.0", port=PORT, debug=False)
+ host = os.getenv("HOST", DEFAULT_HOST)
+ port = int(os.getenv("PORT", str(DEFAULT_PORT)))
+ print(f" {APP_NAME} listening on http://{host}:{port}")
+ app.run(host=host, port=port, debug=False)