Files
2026-05-26 00:31:45 -07:00

79 lines
3.3 KiB
Python

import os
import re
import hashlib
from html.parser import HTMLParser
from .security import get_safe_path, PROJECTS_DIR
from .state import load_ledger, update_failure_count
class TagBalanceParser(HTMLParser):
def __init__(self):
super().__init__()
self.stack = []
self.errors = []
# Exclude 'p' and 'font' to avoid false positives on standard HTML5 tag-omissions
self.tracked_tags = {'table', 'tr', 'td', 'div', 'nav', 'header', 'footer', 'main', 'section'}
def handle_starttag(self, tag, attrs):
if tag in self.tracked_tags: self.stack.append((tag, self.getpos()))
def handle_endtag(self, tag):
if tag in self.tracked_tags:
if not self.stack:
self.errors.append(f"Orphaned </{tag}> near line {self.getpos()[0]}")
else:
last_open, pos = self.stack.pop()
if last_open != tag:
self.errors.append(f"Mismatched tag: <{last_open}> (L{pos[0]}) closed by </{tag}> (L{self.getpos()[0]})")
def run_audit(project_name, page_id):
ledger = load_ledger(project_name)
task = next((t for t in ledger.get("task_queue", []) if t["page_id"] == page_id), None)
if not task: return {"status": "error", "reason": "Task not found."}
dest_file = get_safe_path(PROJECTS_DIR, project_name, "dist", task["filename"])
if not os.path.exists(dest_file):
return {"status": "error", "reason": "Compiled file not found on disk."}
# 1. Hash Freshness Check
with open(dest_file, "r", encoding="utf-8") as f:
disk_content = f.read()
disk_hash = hashlib.sha256(disk_content.encode('utf-8')).hexdigest()
ledger_hash = ledger.get("hashes", {}).get(page_id, "")
if disk_hash != ledger_hash:
return {"status": "error", "reason": "Security Fault: Disk hash does not match Ledger hash. Stale or tampered file."}
errors = []
# 2. Tag Balance Audit
parser = TagBalanceParser()
try:
parser.feed(disk_content)
errors.extend(parser.errors)
if parser.stack:
for unclosed, pos in parser.stack:
errors.append(f"Unclosed <{unclosed}> near line {pos[0]}")
except Exception as e:
errors.append(f"Parser failure: {str(e)}")
# 3. Link Matrix Audit
allowed_files = {t["filename"] for t in ledger.get("task_queue", [])}
links = re.findall(r'href=["\']([^"\']+)["\']', disk_content, re.IGNORECASE)
for link in links:
if not link.startswith(("http", "https", "mailto:", "#")) and link not in allowed_files:
errors.append(f"Dead Link Found: '{link}' is not in the project task queue.")
# 4. Three-Strike Circuit Breaker
if errors:
strikes = update_failure_count(project_name, page_id, increment=True)
if strikes >= 3:
return {
"status": "circuit_breaker",
"reason": f"Page '{page_id}' failed structural audit 3 consecutive times. Escalating to human.",
"errors": errors
}
return {"status": "error", "strikes": strikes, "errors": errors}
# Reset strikes on success
update_failure_count(project_name, page_id, increment=False)
return {"status": "success", "message": "Audit passed. File is structurally sound."}