import sqlite3 from datetime import datetime, timezone from pathlib import Path from typing import Iterable, List LOG_DB_PATH = Path("./instance/mathstream_logs.sqlite") def _normalize_paths(paths: Iterable[Path]) -> List[str]: return [str(Path(p).resolve()) for p in paths] def _ensure_db(reset: bool = False) -> None: LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True) with sqlite3.connect(LOG_DB_PATH) as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS logs ( path TEXT PRIMARY KEY, created_at REAL, last_access REAL, access_count INTEGER DEFAULT 0 ) """ ) conn.execute( """ CREATE TABLE IF NOT EXISTS refs ( path TEXT PRIMARY KEY, ref_count INTEGER DEFAULT 0 ) """ ) if reset: conn.execute("DELETE FROM logs") conn.execute("DELETE FROM refs") conn.commit() _ensure_db(reset=True) def register_log_file(path: Path) -> None: """Ensure the log database is aware of a file's existence.""" normalized = _normalize_paths([path])[0] _ensure_db() timestamp = datetime.now(timezone.utc).timestamp() with sqlite3.connect(LOG_DB_PATH) as conn: conn.execute( """ INSERT INTO logs (path, created_at, last_access, access_count) VALUES (?, ?, ?, 0) ON CONFLICT(path) DO NOTHING """, (normalized, timestamp, timestamp), ) conn.execute( """ INSERT INTO refs (path, ref_count) VALUES (?, 0) ON CONFLICT(path) DO NOTHING """, (normalized,), ) conn.commit() def register_reference(path: Path) -> None: """Increment reference count similarly to Python's ref counter.""" normalized = _normalize_paths([path])[0] _ensure_db() timestamp = datetime.now(timezone.utc).timestamp() with sqlite3.connect(LOG_DB_PATH) as conn: conn.execute( """ INSERT INTO logs (path, created_at, last_access, access_count) VALUES (?, ?, ?, 1) ON CONFLICT(path) DO NOTHING """, (normalized, timestamp, timestamp), ) conn.execute( """ INSERT INTO refs (path, ref_count) VALUES (?, 1) ON CONFLICT(path) DO UPDATE SET ref_count = ref_count + 1 """, (normalized,), ) conn.execute( """ UPDATE logs SET last_access = ?, access_count = access_count + 1 WHERE path = ? """, (timestamp, normalized), ) conn.commit() def touch_log_file(path: Path) -> None: """Refresh access metadata when a file is streamed.""" normalized = _normalize_paths([path])[0] _ensure_db() timestamp = datetime.now(timezone.utc).timestamp() with sqlite3.connect(LOG_DB_PATH) as conn: conn.execute( """ INSERT INTO logs (path, created_at, last_access, access_count) VALUES (?, ?, ?, 1) ON CONFLICT(path) DO UPDATE SET last_access = excluded.last_access, access_count = logs.access_count + 1 """, (normalized, timestamp, timestamp), ) conn.commit() def wipe_log_records() -> None: """Drop all bookkeeping (used after manual log purges).""" _ensure_db() with sqlite3.connect(LOG_DB_PATH) as conn: conn.execute("DELETE FROM logs") conn.execute("DELETE FROM refs") conn.commit() def _delete_records(paths: List[Path]) -> None: if not paths: return normalized = [(str(p.resolve()),) for p in paths] with sqlite3.connect(LOG_DB_PATH) as conn: conn.executemany("DELETE FROM logs WHERE path = ?", normalized) conn.executemany("DELETE FROM refs WHERE path = ?", normalized) conn.commit() def collect_garbage(score_threshold: float) -> list[Path]: """Remove seldom-used staged files based on an age/refcount score.""" if score_threshold < 0: raise ValueError("score_threshold must be non-negative") _ensure_db() now = datetime.now(timezone.utc).timestamp() with sqlite3.connect(LOG_DB_PATH) as conn: rows = conn.execute( """ SELECT l.path, COALESCE(l.created_at, ?), COALESCE(l.last_access, l.created_at, ?), COALESCE(l.access_count, 0), COALESCE(r.ref_count, 0) FROM logs l LEFT JOIN refs r ON l.path = r.path """, (now, now), ).fetchall() removed: list[Path] = [] for path_str, created_at, last_access, access_count, ref_count in rows: path = Path(path_str) age = now - (last_access or created_at or now) score = age / ((ref_count + 1) * (access_count + 1)) if score < score_threshold: continue if path.exists(): try: path.unlink() except OSError: continue removed.append(path) _delete_records(removed) return removed