#3: inject-hashed-filenames.py — tag-aware HTML rewrite
Replace substring `html.replace(old_src, new_src)` with a regex anchored to <script src="…"> / <link href="…"> attribute values. Inert occurrences in comments, JSON literals, or unrelated attributes are left alone. Loud warning (stderr) when zero matches occur — previously the script silently skipped a typo'd old_src. Also rewrites <link href> in the same pass so adjacent CSS hashing doesn't need a follow-up edit. Tests: tests/test_inject_hashed_filenames.py covers happy path (both quote styles, extra attributes), inert-substring cases (comment, JSON literal, data-attr, anchor href), and link-href rewriting. Closes #3 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b1a13b83fd
commit
b3b2903c75
4 changed files with 107 additions and 4 deletions
BIN
bin/__pycache__/inject-hashed-filenames.cpython-313.pyc
Normal file
BIN
bin/__pycache__/inject-hashed-filenames.cpython-313.pyc
Normal file
Binary file not shown.
|
|
@ -26,9 +26,29 @@ containing the config file's parent chain → `$PWD`).
|
|||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
def _build_pattern(old_src: str) -> "re.Pattern[str]":
|
||||
# Match `src` on <script> and `href` on <link>, single- or double-quoted.
|
||||
# We anchor to the tag name so an `old_src` substring sitting inside an
|
||||
# HTML comment, a JSON literal, or a `data-…` attribute is not rewritten.
|
||||
return re.compile(
|
||||
r'(<(?:script|link)\b[^>]*?\b(?:src|href)\s*=\s*["\'])'
|
||||
+ re.escape(old_src)
|
||||
+ r'(["\'])',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def rewrite(html: str, old_src: str, new_src: str) -> "tuple[str, int]":
|
||||
"""Return (new_html, count). Tag-aware: only rewrites <script src> /
|
||||
<link href> attributes, never substring matches in comments or JSON."""
|
||||
pattern = _build_pattern(old_src)
|
||||
return pattern.subn(lambda m: m.group(1) + new_src + m.group(2), html)
|
||||
|
||||
|
||||
def inject(manifest_path: str, html_path: str, old_src: str) -> None:
|
||||
if not os.path.exists(manifest_path):
|
||||
print(f"skip: no manifest at {manifest_path}")
|
||||
|
|
@ -45,12 +65,18 @@ def inject(manifest_path: str, html_path: str, old_src: str) -> None:
|
|||
new_src = f"{os.path.dirname(old_src)}/{hashed}"
|
||||
with open(html_path) as f:
|
||||
html = f.read()
|
||||
if old_src not in html:
|
||||
print(f"skip: {old_src!r} not in {html_path}")
|
||||
new_html, count = rewrite(html, old_src, new_src)
|
||||
if count == 0:
|
||||
# Loud warning — silent skip used to mask typos in `old_src`.
|
||||
print(
|
||||
f"WARN: no <script src> or <link href> matching {old_src!r} "
|
||||
f"in {html_path} — leaving file unchanged",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return
|
||||
with open(html_path, "w") as f:
|
||||
f.write(html.replace(old_src, new_src))
|
||||
print(f"{old_src} -> {new_src}")
|
||||
f.write(new_html)
|
||||
print(f"{old_src} -> {new_src} ({count} occurrence{'s' if count != 1 else ''})")
|
||||
return
|
||||
print(f"skip: no isEntry row in {manifest_path}")
|
||||
|
||||
|
|
|
|||
Binary file not shown.
77
tests/test_inject_hashed_filenames.py
Normal file
77
tests/test_inject_hashed_filenames.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""Tests for bin/inject-hashed-filenames.py rewrite() (#3).
|
||||
|
||||
Pinned behaviour: the rewrite is tag-aware — only `<script src="…">` and
|
||||
`<link href="…">` attribute values are replaced. Inert occurrences of the
|
||||
old src in HTML comments, JSON literals, or unrelated attributes must be
|
||||
left alone (the previous `html.replace` was substring-blind).
|
||||
"""
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
SCRIPT = os.path.join(os.path.dirname(HERE), "bin", "inject-hashed-filenames.py")
|
||||
|
||||
spec = importlib.util.spec_from_file_location("ihf", SCRIPT)
|
||||
ihf = importlib.util.module_from_spec(spec)
|
||||
sys.modules["ihf"] = ihf
|
||||
spec.loader.exec_module(ihf)
|
||||
|
||||
|
||||
def test_rewrites_script_src_double_quoted():
|
||||
html = '<script src="/static/dist/app.js"></script>'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 1
|
||||
assert out == '<script src="/static/dist/app.abc123.js"></script>'
|
||||
|
||||
|
||||
def test_rewrites_script_src_single_quoted():
|
||||
html = "<script src='/static/dist/app.js'></script>"
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 1
|
||||
assert "/static/dist/app.abc123.js" in out
|
||||
|
||||
|
||||
def test_rewrites_link_href():
|
||||
html = '<link rel="stylesheet" href="/static/dist/app.css">'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.css", "/static/dist/app.abc123.css")
|
||||
assert n == 1
|
||||
assert '/static/dist/app.abc123.css' in out
|
||||
|
||||
|
||||
def test_does_not_rewrite_inside_html_comment():
|
||||
html = '<!-- old script was at /static/dist/app.js --><script src="/other.js"></script>'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 0
|
||||
assert "/static/dist/app.js" in out
|
||||
assert "/static/dist/app.abc123.js" not in out
|
||||
|
||||
|
||||
def test_does_not_rewrite_inside_json_literal():
|
||||
html = '<pre>{ "src": "/static/dist/app.js" }</pre>'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 0
|
||||
assert out == html
|
||||
|
||||
|
||||
def test_does_not_rewrite_unrelated_attribute():
|
||||
html = '<img data-bundle="/static/dist/app.js">'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 0
|
||||
assert out == html
|
||||
|
||||
|
||||
def test_does_not_rewrite_anchor_href():
|
||||
# Even though <a href="…"> is a `href` attribute, it isn't a <link>.
|
||||
html = '<a href="/static/dist/app.js">debug link</a>'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 0
|
||||
assert out == html
|
||||
|
||||
|
||||
def test_rewrites_with_extra_attributes_around_src():
|
||||
html = '<script type="module" src="/static/dist/app.js" defer></script>'
|
||||
out, n = ihf.rewrite(html, "/static/dist/app.js", "/static/dist/app.abc123.js")
|
||||
assert n == 1
|
||||
assert '/static/dist/app.abc123.js' in out
|
||||
assert 'type="module"' in out and 'defer' in out
|
||||
Loading…
Add table
Reference in a new issue