webapp-scaffold/bin/inject-hashed-filenames.py
Uwe Schuster b3b2903c75 #3: inject-hashed-filenames.py — tag-aware HTML rewrite
Replace substring `html.replace(old_src, new_src)` with a regex anchored
to <script src="…"> / <link href="…"> attribute values. Inert occurrences
in comments, JSON literals, or unrelated attributes are left alone.

Loud warning (stderr) when zero matches occur — previously the script
silently skipped a typo'd old_src.

Also rewrites <link href> in the same pass so adjacent CSS hashing doesn't
need a follow-up edit.

Tests: tests/test_inject_hashed_filenames.py covers happy path (both quote
styles, extra attributes), inert-substring cases (comment, JSON literal,
data-attr, anchor href), and link-href rewriting.

Closes #3

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 21:38:19 +02:00

102 lines
3.3 KiB
Python
Executable file

#!/usr/bin/env python3
"""Rewrite HTML script tags to point at Vite's hashed production bundle.
Usage:
inject-hashed-filenames.py CONFIG_JSON [BUILD_DIR]
CONFIG_JSON is a path to a JSON file listing the entries to rewrite. The
schema mirrors the two hard-coded entries fewo-webapp had baked into an
earlier version of this script:
[
{
"manifest": "static/dist/.vite/manifest.json",
"html": "static/index.html",
"old_src": "/static/dist/app.js"
},
{
"manifest": "static/guest/dist/.vite/manifest.json",
"html": "static/guest/index.html",
"old_src": "/guest/dist/guest-app.js"
}
]
All paths are resolved relative to BUILD_DIR (defaults to the repo root
containing the config file's parent chain → `$PWD`).
"""
import json
import os
import re
import sys
def _build_pattern(old_src: str) -> "re.Pattern[str]":
# Match `src` on <script> and `href` on <link>, single- or double-quoted.
# We anchor to the tag name so an `old_src` substring sitting inside an
# HTML comment, a JSON literal, or a `data-…` attribute is not rewritten.
return re.compile(
r'(<(?:script|link)\b[^>]*?\b(?:src|href)\s*=\s*["\'])'
+ re.escape(old_src)
+ r'(["\'])',
re.IGNORECASE,
)
def rewrite(html: str, old_src: str, new_src: str) -> "tuple[str, int]":
"""Return (new_html, count). Tag-aware: only rewrites <script src> /
<link href> attributes, never substring matches in comments or JSON."""
pattern = _build_pattern(old_src)
return pattern.subn(lambda m: m.group(1) + new_src + m.group(2), html)
def inject(manifest_path: str, html_path: str, old_src: str) -> None:
if not os.path.exists(manifest_path):
print(f"skip: no manifest at {manifest_path}")
return
if not os.path.exists(html_path):
print(f"skip: no html at {html_path}")
return
with open(manifest_path) as f:
manifest = json.load(f)
for entry in manifest.values():
if not entry.get("isEntry"):
continue
hashed = entry["file"]
new_src = f"{os.path.dirname(old_src)}/{hashed}"
with open(html_path) as f:
html = f.read()
new_html, count = rewrite(html, old_src, new_src)
if count == 0:
# Loud warning — silent skip used to mask typos in `old_src`.
print(
f"WARN: no <script src> or <link href> matching {old_src!r} "
f"in {html_path} — leaving file unchanged",
file=sys.stderr,
)
return
with open(html_path, "w") as f:
f.write(new_html)
print(f"{old_src} -> {new_src} ({count} occurrence{'s' if count != 1 else ''})")
return
print(f"skip: no isEntry row in {manifest_path}")
def main() -> int:
if len(sys.argv) < 2:
print("usage: inject-hashed-filenames.py CONFIG_JSON [BUILD_DIR]", file=sys.stderr)
return 2
cfg_path = sys.argv[1]
build_dir = sys.argv[2] if len(sys.argv) > 2 else os.getcwd()
with open(cfg_path) as f:
entries = json.load(f)
for e in entries:
inject(
os.path.join(build_dir, e["manifest"]),
os.path.join(build_dir, e["html"]),
e["old_src"],
)
return 0
if __name__ == "__main__":
sys.exit(main())