#!/usr/bin/env python3
# SPDX-License-Identifier: AGPL-3.0-only
# Copyright (C) 2026 SWGY, Inc.
"""Generate a static, syntax-highlighted HTML site of this repo's tracked source.
Walks `git ls-files`, renders each file with Pygments (style `lightbulb`,
line numbers on), and writes per-directory `index.html` listing pages so
the resulting tree can be dropped on any static web server and browsed
by clicking.
Run from the repo root:
uv run python tools/highlight.py [--output PATH] [--repo PATH]
"""
from __future__ import annotations
import argparse
import html
import subprocess
import sys
from pathlib import Path
from typing import Any
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import TextLexer, get_lexer_for_filename, guess_lexer
from pygments.util import ClassNotFound
STYLE = "lightbulb"
CSS_FILENAME = "style.css"
def git_tracked_files(repo: Path) -> list[str]:
"""Return relative POSIX paths of every file tracked by git in `repo`."""
result = subprocess.run(
["git", "-C", str(repo), "ls-files", "-z"],
check=True,
capture_output=True,
)
raw = result.stdout.decode("utf-8", errors="replace")
return [p for p in raw.split("\0") if p]
def ensure_empty_output_dir(path: Path) -> None:
"""Create `path` if missing; refuse to proceed if it exists and is non-empty."""
if path.exists():
if not path.is_dir():
sys.exit(f"error: --output {path} exists and is not a directory")
if any(path.iterdir()):
sys.exit(
f"error: output dir {path} is not empty; "
"delete it manually or choose a different --output"
)
else:
path.mkdir(parents=True)
def pick_lexer(name: str, code: str) -> Any:
"""Return a Pygments lexer for `name`/`code`, falling back to plain text."""
try:
return get_lexer_for_filename(name, code)
except ClassNotFound:
pass
try:
return guess_lexer(code)
except ClassNotFound:
return TextLexer()
def page_shell(title: str, breadcrumb: str, depth: int, body: str) -> str:
"""Wrap a body in the minimal HTML page chrome used by every page."""
css_href = "../" * depth + CSS_FILENAME
return (
"<!doctype html>\n"
'<html lang="en">\n'
"<head>\n"
' <meta charset="utf-8">\n'
f" <title>{html.escape(title)}</title>\n"
f' <link rel="stylesheet" href="{html.escape(css_href)}">\n'
"</head>\n"
"<body>\n"
f' <nav class="crumbs">{breadcrumb}</nav>\n'
f" <main>{body}</main>\n"
"</body>\n"
"</html>\n"
)
def breadcrumb_html(rel_parts: list[str], repo_label: str, is_file: bool) -> str:
"""Render the breadcrumb nav: each ancestor links back to its index.html.
`rel_parts` is the path split into components (file or dir, including the
leaf). `is_file` toggles whether the leaf segment should be a plain span
(file pages) or part of the link chain (dir pages render their own leaf
as a span separately).
"""
# On a file page the leaf is the filename itself (not a directory), so
# the document's containing dir is one level shallower than rel_parts.
depth = len(rel_parts) - 1 if is_file else len(rel_parts)
pieces: list[str] = []
root_href = "../" * depth + "index.html" if depth else "index.html"
pieces.append(f'<a href="{html.escape(root_href)}">{html.escape(repo_label)}</a>')
for i, part in enumerate(rel_parts):
is_leaf = i == len(rel_parts) - 1
if is_leaf and is_file:
pieces.append(f"<span>{html.escape(part)}</span>")
else:
up = depth - i - 1
href = "../" * up + "index.html"
pieces.append(f'<a href="{html.escape(href)}">{html.escape(part)}</a>')
return " / ".join(pieces)
def render_file(
src: Path,
rel: str,
out_root: Path,
repo_label: str,
formatter: HtmlFormatter,
) -> None:
"""Render one source file to `<out_root>/<rel>.html`."""
code = src.read_text(encoding="utf-8", errors="replace")
lexer = pick_lexer(src.name, code)
highlighted = highlight(code, lexer, formatter)
parts = rel.split("/")
depth = len(parts) - 1
title = f"{rel} — {repo_label}"
crumb = breadcrumb_html(parts, repo_label, is_file=True)
body = f"<h1>{html.escape(rel)}</h1>\n{highlighted}"
page = page_shell(title, crumb, depth, body)
dest = out_root / (rel + ".html")
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_text(page, encoding="utf-8")
def build_tree(rels: list[str]) -> dict[str, Any]:
"""Build a nested dict-of-dicts tree from a list of relative file paths.
Files are stored as `{name: None}`; directories as `{name: {...}}`.
"""
root: dict[str, Any] = {}
for rel in rels:
parts = rel.split("/")
node = root
for part in parts[:-1]:
node = node.setdefault(part, {})
if node is None:
raise RuntimeError(f"path collision at {rel}")
node[parts[-1]] = None
return root
def render_index(
node: dict[str, Any],
rel_parts: list[str],
out_root: Path,
repo_label: str,
) -> None:
"""Write `index.html` for one directory and recurse into subdirs."""
depth = len(rel_parts)
dir_label = repo_label if not rel_parts else rel_parts[-1] + "/"
title = f"{repo_label} — source" if not rel_parts else f"{'/'.join(rel_parts)}/ — {repo_label}"
crumb = breadcrumb_html(rel_parts, repo_label, is_file=False)
subdirs = sorted(k for k, v in node.items() if isinstance(v, dict))
files = sorted(k for k, v in node.items() if v is None)
lines: list[str] = [f"<h1>{html.escape(dir_label)}</h1>", '<ul class="listing">']
if rel_parts:
lines.append(' <li class="dir"><a href="../index.html">..</a></li>')
for d in subdirs:
href = f"{d}/index.html"
lines.append(f' <li class="dir"><a href="{html.escape(href)}">{html.escape(d)}/</a></li>')
for f in files:
href = f"{f}.html"
lines.append(f' <li class="file"><a href="{html.escape(href)}">{html.escape(f)}</a></li>')
lines.append("</ul>")
body = "\n".join(lines)
out_dir = out_root.joinpath(*rel_parts) if rel_parts else out_root
out_dir.mkdir(parents=True, exist_ok=True)
(out_dir / "index.html").write_text(page_shell(title, crumb, depth, body), encoding="utf-8")
for name in subdirs:
render_index(node[name], [*rel_parts, name], out_root, repo_label)
LISTING_CSS = """
body {
background: #1d2331;
color: #e6e6e6;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica,
Arial, sans-serif;
margin: 0;
padding: 1.5rem 2rem 3rem;
line-height: 1.45;
}
main { max-width: 1100px; }
a { color: #7ec9ff; text-decoration: none; }
a:hover { text-decoration: underline; }
h1 {
font-size: 1.4rem;
margin: 0.5rem 0 1rem;
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
color: #f0f0f0;
}
nav.crumbs {
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
font-size: 0.9rem;
padding-bottom: 0.5rem;
border-bottom: 1px solid #333;
margin-bottom: 1rem;
color: #888;
}
nav.crumbs a { color: #9ad1ff; }
nav.crumbs span { color: #d0d0d0; }
ul.listing {
list-style: none;
padding: 0;
margin: 0;
font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;
}
ul.listing li { padding: 0.15rem 0; }
ul.listing li.dir a { font-weight: 600; }
.highlight {
background: #1d2331;
padding: 0.5rem 0;
overflow-x: auto;
border-radius: 4px;
}
.highlight pre { margin: 0; }
.highlight .linenos {
color: #555;
padding-right: 0.75rem;
border-right: 1px solid #2c2c2c;
user-select: none;
}
.highlight .linenos a { color: #555; }
.highlight .linenos a:hover { color: #aaa; }
"""
def main() -> None:
"""Entry point: parse args, enumerate tracked files, emit the static site."""
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument("--output", type=Path, default=None)
parser.add_argument("--repo", type=Path, default=None)
args = parser.parse_args()
if args.repo is not None:
repo = args.repo.resolve()
else:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
check=True,
capture_output=True,
text=True,
)
repo = Path(result.stdout.strip()).resolve()
out_root: Path = (args.output or (repo / "site")).resolve()
ensure_empty_output_dir(out_root)
repo_label = repo.name
rels = git_tracked_files(repo)
if not rels:
sys.exit(f"error: `git ls-files` returned no files in {repo}")
formatter = HtmlFormatter(
style=STYLE,
linenos=1,
cssclass="highlight",
anchorlinenos=True,
lineanchors="L",
)
(out_root / CSS_FILENAME).write_text(
formatter.get_style_defs(".highlight") + LISTING_CSS,
encoding="utf-8",
)
for rel in rels:
src = repo / rel
if not src.is_file():
continue
render_file(src, rel, out_root, repo_label, formatter)
tree = build_tree([r for r in rels if (repo / r).is_file()])
render_index(tree, [], out_root, repo_label)
print(f"Wrote {len(rels)} files to {out_root}")
if __name__ == "__main__":
main()