#3: inject-hashed-filenames.py — tag-aware HTML rewrite
Replace substring `html.replace(old_src, new_src)` with a regex anchored to <script src="…"> / <link href="…"> attribute values. Inert occurrences in comments, JSON literals, or unrelated attributes are left alone. Loud warning (stderr) when zero matches occur — previously the script silently skipped a typo'd old_src. Also rewrites <link href> in the same pass so adjacent CSS hashing doesn't need a follow-up edit. Tests: tests/test_inject_hashed_filenames.py covers happy path (both quote styles, extra attributes), inert-substring cases (comment, JSON literal, data-attr, anchor href), and link-href rewriting. Closes #3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b1a13b83fd
commit
b3b2903c75
4 changed files with 107 additions and 4 deletions
BIN
bin/__pycache__/inject-hashed-filenames.cpython-313.pyc
Normal file
BIN
bin/__pycache__/inject-hashed-filenames.cpython-313.pyc
Normal file
Binary file not shown.
|
|
@ -26,9 +26,29 @@ containing the config file's parent chain → `$PWD`).
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pattern(old_src: str) -> "re.Pattern[str]":
|
||||||
|
# Match `src` on <script> and `href` on <link>, single- or double-quoted.
|
||||||
|
# We anchor to the tag name so an `old_src` substring sitting inside an
|
||||||
|
# HTML comment, a JSON literal, or a `data-…` attribute is not rewritten.
|
||||||
|
return re.compile(
|
||||||
|
r'(<(?:script|link)\b[^>]*?\b(?:src|href)\s*=\s*["\'])'
|
||||||
|
+ re.escape(old_src)
|
||||||
|
+ r'(["\'])',
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def rewrite(html: str, old_src: str, new_src: str) -> "tuple[str, int]":
|
||||||
|
"""Return (new_html, count). Tag-aware: only rewrites <script src> /
|
||||||
|
<link href> attributes, never substring matches in comments or JSON."""
|
||||||
|
pattern = _build_pattern(old_src)
|
||||||
|
return pattern.subn(lambda m: m.group(1) + new_src + m.group(2), html)
|
||||||
|
|
||||||
|
|
||||||
def inject(manifest_path: str, html_path: str, old_src: str) -> None:
|
def inject(manifest_path: str, html_path: str, old_src: str) -> None:
|
||||||
if not os.path.exists(manifest_path):
|
if not os.path.exists(manifest_path):
|
||||||
print(f"skip: no manifest at {manifest_path}")
|
print(f"skip: no manifest at {manifest_path}")
|
||||||
|
|
@ -45,12 +65,18 @@ def inject(manifest_path: str, html_path: str, old_src: str) -> None:
|
||||||
new_src = f"{os.path.dirname(old_src)}/{hashed}"
|
new_src = f"{os.path.dirname(old_src)}/{hashed}"
|
||||||
with open(html_path) as f:
|
with open(html_path) as f:
|
||||||
html = f.read()
|
html = f.read()
|
||||||
if old_src not in html:
|
new_html, count = rewrite(html, old_src, new_src)
|
||||||
print(f"skip: {old_src!r} not in {html_path}")
|
if count == 0:
|
||||||
|
# Loud warning — silent skip used to mask typos in `old_src`.
|
||||||
|
print(
|
||||||
|
f"WARN: no <script src> or <link href> matching {old_src!r} "
|
||||||
|
f"in {html_path} — leaving file unchanged",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
with open(html_path, "w") as f:
|
with open(html_path, "w") as f:
|
||||||
f.write(html.replace(old_src, new_src))
|
f.write(new_html)
|
||||||
print(f"{old_src} -> {new_src}")
|
print(f"{old_src} -> {new_src} ({count} occurrence{'s' if count != 1 else ''})")
|
||||||
return
|
return
|
||||||
print(f"skip: no isEntry row in {manifest_path}")
|
print(f"skip: no isEntry row in {manifest_path}")
|
||||||
|
|
||||||
|
|
|
||||||
Binary file not shown.
77
tests/test_inject_hashed_filenames.py
Normal file
77
tests/test_inject_hashed_filenames.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
"""Tests for bin/inject-hashed-filenames.py rewrite() (#3).
|
||||||
|
|
||||||
|
Pinned behaviour: the rewrite is tag-aware — only `<script src="…">` and
|
||||||
|
`<link href="…">` attribute values are replaced. Inert occurrences of the
|
||||||
|
old src in HTML comments, JSON literals, or unrelated attributes must be
|
||||||
|
left alone (the previous `html.replace` was substring-blind).
|
||||||
|
"""
|
||||||
|
import importlib.util
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
SCRIPT = os.path.join(os.path.dirname(HERE), "bin", "inject-hashed-filenames.py")
|
||||||
|
|
||||||
|
spec = importlib.util.spec_from_file_location("ihf", SCRIPT)
|
||||||
|
ihf = importlib.util.module_from_spec(spec)
|
||||||
|
sys.modules["ihf"] = ihf
|
||||||
|
spec.loader.exec_module(ihf)
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_script_src_double_quoted():
|
||||||
|
html = '<script src="/static/dist/app.js"></script>'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 1
|
||||||
|
assert out == '<script src="/static/dist/app.abc123.js"></script>'
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_script_src_single_quoted():
|
||||||
|
html = "<script src='/static/dist/app.js'></script>"
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 1
|
||||||
|
assert "/static/dist/app.abc123.js" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_link_href():
|
||||||
|
html = '<link rel="stylesheet" href="/static/dist/app.css">'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.css", "/static/dist/app.abc123.css")
|
||||||
|
assert n == 1
|
||||||
|
assert '/static/dist/app.abc123.css' in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_rewrite_inside_html_comment():
|
||||||
|
html = '<!-- old script was at /static/dist/app.js --><script src="/other.js"></script>'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 0
|
||||||
|
assert "/static/dist/app.js" in out
|
||||||
|
assert "/static/dist/app.abc123.js" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_rewrite_inside_json_literal():
|
||||||
|
html = '<pre>{ "src": "/static/dist/app.js" }</pre>'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 0
|
||||||
|
assert out == html
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_rewrite_unrelated_attribute():
|
||||||
|
html = '<img data-bundle="/static/dist/app.js">'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 0
|
||||||
|
assert out == html
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_rewrite_anchor_href():
|
||||||
|
# Even though <a href="…"> is a `href` attribute, it isn't a <link>.
|
||||||
|
html = '<a href="/static/dist/app.js">debug link</a>'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 0
|
||||||
|
assert out == html
|
||||||
|
|
||||||
|
|
||||||
|
def test_rewrites_with_extra_attributes_around_src():
|
||||||
|
html = '<script type="module" src="/static/dist/app.js" defer></script>'
|
||||||
|
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||||
|
assert n == 1
|
||||||
|
assert '/static/dist/app.abc123.js' in out
|
||||||
|
assert 'type="module"' in out and 'defer' in out
|
||||||
Loading…
Add table
Reference in a new issue