added better garabge collection
This commit is contained in:
parent
df9b2b5f29
commit
443f9f4f4b
@ -23,6 +23,7 @@ from mathstream import (
|
|||||||
pow,
|
pow,
|
||||||
is_even,
|
is_even,
|
||||||
is_odd,
|
is_odd,
|
||||||
|
collect_garbage,
|
||||||
)
|
)
|
||||||
|
|
||||||
a = StreamNumber("instance/log/huge.txt")
|
a = StreamNumber("instance/log/huge.txt")
|
||||||
@ -37,6 +38,9 @@ print("modulo =", "".join(mod(a, b).stream()))
|
|||||||
print("power =", "".join(pow(a, e).stream()))
|
print("power =", "".join(pow(a, e).stream()))
|
||||||
print("a is even?", is_even(a))
|
print("a is even?", is_even(a))
|
||||||
print("b is odd?", is_odd(b))
|
print("b is odd?", is_odd(b))
|
||||||
|
|
||||||
|
# reclaim space for files whose age outweighs their use
|
||||||
|
collect_garbage(0.5)
|
||||||
```
|
```
|
||||||
|
|
||||||
Each arithmetic call writes its result back into `instance/log` (configurable via `mathstream.number.LOG_DIR`) so you can stream the digits later or reuse them in further operations.
|
Each arithmetic call writes its result back into `instance/log` (configurable via `mathstream.number.LOG_DIR`) so you can stream the digits later or reuse them in further operations.
|
||||||
@ -49,6 +53,9 @@ Each arithmetic call writes its result back into `instance/log` (configurable vi
|
|||||||
- **Sign-aware** – Addition, subtraction, multiplication, division (`//` behavior), modulo, and exponentiation (non-negative exponents) all respect operand sign. Division/modulo follow Python’s floor-division rules.
|
- **Sign-aware** – Addition, subtraction, multiplication, division (`//` behavior), modulo, and exponentiation (non-negative exponents) all respect operand sign. Division/modulo follow Python’s floor-division rules.
|
||||||
- **Utilities** – `clear_logs()` wipes prior staged results so you can start fresh.
|
- **Utilities** – `clear_logs()` wipes prior staged results so you can start fresh.
|
||||||
- **Parity helpers** – `is_even` and `is_odd` inspect the streamed digits without materializing the integer.
|
- **Parity helpers** – `is_even` and `is_odd` inspect the streamed digits without materializing the integer.
|
||||||
|
- **Garbage collection** – `collect_garbage(score_threshold)` computes a score from file age, access count, and reference count (tracked in `instance/mathstream_logs.sqlite`, freshly truncated each run). Files whose score meets or exceeds the threshold are deleted, letting you tune how aggressively to reclaim space. Both staged results and literal caches participate.
|
||||||
|
|
||||||
|
Divide-by-zero scenarios raise the custom `DivideByZeroError` so callers can distinguish mathstream issues from Python’s native exceptions.
|
||||||
|
|
||||||
## Example Script
|
## Example Script
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +1,20 @@
|
|||||||
from .engine import clear_logs, add, sub, mul, div, mod, pow, is_even, is_odd
|
from .engine import clear_logs, add, sub, mul, div, mod, pow, is_even, is_odd
|
||||||
|
from .exceptions import MathStreamError, DivideByZeroError
|
||||||
from .number import StreamNumber
|
from .number import StreamNumber
|
||||||
|
from .utils import collect_garbage
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"clear_logs",
|
"clear_logs",
|
||||||
"add", "sub",
|
"collect_garbage",
|
||||||
"mul", "div", "mod",
|
"add",
|
||||||
|
"sub",
|
||||||
|
"mul",
|
||||||
|
"div",
|
||||||
|
"mod",
|
||||||
"pow",
|
"pow",
|
||||||
"is_even", "is_odd",
|
"is_even",
|
||||||
|
"is_odd",
|
||||||
"StreamNumber",
|
"StreamNumber",
|
||||||
|
"MathStreamError",
|
||||||
|
"DivideByZeroError",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -2,7 +2,9 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import Iterable, Tuple
|
from typing import Iterable, Tuple
|
||||||
|
|
||||||
|
from .exceptions import DivideByZeroError
|
||||||
from .number import StreamNumber, LOG_DIR
|
from .number import StreamNumber, LOG_DIR
|
||||||
|
from .utils import register_log_file, wipe_log_records
|
||||||
|
|
||||||
|
|
||||||
def _ensure_log_dir() -> None:
|
def _ensure_log_dir() -> None:
|
||||||
@ -123,7 +125,7 @@ def _multiply_digit(num: str, digit: int) -> str:
|
|||||||
|
|
||||||
def _divide_abs(dividend: str, divisor: str) -> Tuple[str, str]:
|
def _divide_abs(dividend: str, divisor: str) -> Tuple[str, str]:
|
||||||
if divisor == "0":
|
if divisor == "0":
|
||||||
raise ZeroDivisionError("division by zero")
|
raise DivideByZeroError("division by zero")
|
||||||
if dividend == "0":
|
if dividend == "0":
|
||||||
return "0", "0"
|
return "0", "0"
|
||||||
|
|
||||||
@ -171,6 +173,7 @@ def _write_result(operation: str, operands: Iterable[StreamNumber], digits: str)
|
|||||||
out_file = LOG_DIR / f"{operation}_{operand_hash}.bin"
|
out_file = LOG_DIR / f"{operation}_{operand_hash}.bin"
|
||||||
with open(out_file, "w", encoding="utf-8") as out:
|
with open(out_file, "w", encoding="utf-8") as out:
|
||||||
out.write(digits)
|
out.write(digits)
|
||||||
|
register_log_file(out_file)
|
||||||
return StreamNumber(out_file)
|
return StreamNumber(out_file)
|
||||||
|
|
||||||
|
|
||||||
@ -179,6 +182,7 @@ def clear_logs():
|
|||||||
for p in LOG_DIR.glob("*"):
|
for p in LOG_DIR.glob("*"):
|
||||||
p.unlink()
|
p.unlink()
|
||||||
_ensure_log_dir()
|
_ensure_log_dir()
|
||||||
|
wipe_log_records()
|
||||||
|
|
||||||
|
|
||||||
def add(num_a: StreamNumber, num_b: StreamNumber) -> StreamNumber:
|
def add(num_a: StreamNumber, num_b: StreamNumber) -> StreamNumber:
|
||||||
@ -266,6 +270,9 @@ def mod(num_a: StreamNumber, num_b: StreamNumber) -> StreamNumber:
|
|||||||
sign_a, a_digits = _normalize_stream(num_a)
|
sign_a, a_digits = _normalize_stream(num_a)
|
||||||
sign_b, b_digits = _normalize_stream(num_b)
|
sign_b, b_digits = _normalize_stream(num_b)
|
||||||
|
|
||||||
|
if b_digits == "0":
|
||||||
|
raise DivideByZeroError("modulo by zero")
|
||||||
|
|
||||||
_, remainder = _divide_abs(a_digits, b_digits)
|
_, remainder = _divide_abs(a_digits, b_digits)
|
||||||
|
|
||||||
if remainder == "0":
|
if remainder == "0":
|
||||||
|
|||||||
6
mathstream/exceptions.py
Normal file
6
mathstream/exceptions.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
class MathStreamError(Exception):
|
||||||
|
"""Base class for mathstream-specific errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class DivideByZeroError(MathStreamError):
|
||||||
|
"""Raised when division or modulo operations encounter a zero divisor."""
|
||||||
@ -2,6 +2,8 @@ import hashlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
from .utils import register_log_file, register_reference, touch_log_file
|
||||||
|
|
||||||
LOG_DIR = Path("./instance/log")
|
LOG_DIR = Path("./instance/log")
|
||||||
|
|
||||||
|
|
||||||
@ -29,6 +31,14 @@ def _canonicalize_literal(value: str) -> str:
|
|||||||
return f"{sign}{digits}"
|
return f"{sign}{digits}"
|
||||||
|
|
||||||
|
|
||||||
|
def _is_in_log_dir(path: Path) -> bool:
|
||||||
|
try:
|
||||||
|
path.resolve().relative_to(LOG_DIR.resolve())
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class StreamNumber:
|
class StreamNumber:
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -50,6 +60,10 @@ class StreamNumber:
|
|||||||
if not self.path.exists():
|
if not self.path.exists():
|
||||||
raise FileNotFoundError(self.path)
|
raise FileNotFoundError(self.path)
|
||||||
|
|
||||||
|
if _is_in_log_dir(self.path):
|
||||||
|
register_log_file(self.path)
|
||||||
|
register_reference(self.path)
|
||||||
|
|
||||||
self.hash = hashlib.sha1(str(self.path).encode()).hexdigest()[:10]
|
self.hash = hashlib.sha1(str(self.path).encode()).hexdigest()[:10]
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@ -57,6 +71,8 @@ class StreamNumber:
|
|||||||
|
|
||||||
def stream(self, chunk_size=4096):
|
def stream(self, chunk_size=4096):
|
||||||
"""Yield chunks of digits as strings."""
|
"""Yield chunks of digits as strings."""
|
||||||
|
if _is_in_log_dir(self.path):
|
||||||
|
touch_log_file(self.path)
|
||||||
with open(self.path, "r", encoding="utf-8") as f:
|
with open(self.path, "r", encoding="utf-8") as f:
|
||||||
while chunk := f.read(chunk_size):
|
while chunk := f.read(chunk_size):
|
||||||
yield chunk.strip().replace(",", ".")
|
yield chunk.strip().replace(",", ".")
|
||||||
@ -67,4 +83,5 @@ class StreamNumber:
|
|||||||
stage_file = LOG_DIR / f"{self.hash}_stage_{stage}.bin"
|
stage_file = LOG_DIR / f"{self.hash}_stage_{stage}.bin"
|
||||||
with open(stage_file, "wb") as f:
|
with open(stage_file, "wb") as f:
|
||||||
f.write(data.encode())
|
f.write(data.encode())
|
||||||
|
register_log_file(stage_file)
|
||||||
return stage_file
|
return stage_file
|
||||||
|
|||||||
@ -0,0 +1,180 @@
|
|||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable, List
|
||||||
|
|
||||||
|
LOG_DB_PATH = Path("./instance/mathstream_logs.sqlite")
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_paths(paths: Iterable[Path]) -> List[str]:
|
||||||
|
return [str(Path(p).resolve()) for p in paths]
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_db(reset: bool = False) -> None:
|
||||||
|
LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS logs (
|
||||||
|
path TEXT PRIMARY KEY,
|
||||||
|
created_at REAL,
|
||||||
|
last_access REAL,
|
||||||
|
access_count INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS refs (
|
||||||
|
path TEXT PRIMARY KEY,
|
||||||
|
ref_count INTEGER DEFAULT 0
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if reset:
|
||||||
|
conn.execute("DELETE FROM logs")
|
||||||
|
conn.execute("DELETE FROM refs")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
_ensure_db(reset=True)
|
||||||
|
|
||||||
|
|
||||||
|
def register_log_file(path: Path) -> None:
|
||||||
|
"""Ensure the log database is aware of a file's existence."""
|
||||||
|
normalized = _normalize_paths([path])[0]
|
||||||
|
_ensure_db()
|
||||||
|
timestamp = datetime.now(timezone.utc).timestamp()
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO logs (path, created_at, last_access, access_count)
|
||||||
|
VALUES (?, ?, ?, 0)
|
||||||
|
ON CONFLICT(path)
|
||||||
|
DO NOTHING
|
||||||
|
""",
|
||||||
|
(normalized, timestamp, timestamp),
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO refs (path, ref_count)
|
||||||
|
VALUES (?, 0)
|
||||||
|
ON CONFLICT(path)
|
||||||
|
DO NOTHING
|
||||||
|
""",
|
||||||
|
(normalized,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def register_reference(path: Path) -> None:
|
||||||
|
"""Increment reference count similarly to Python's ref counter."""
|
||||||
|
normalized = _normalize_paths([path])[0]
|
||||||
|
_ensure_db()
|
||||||
|
timestamp = datetime.now(timezone.utc).timestamp()
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO logs (path, created_at, last_access, access_count)
|
||||||
|
VALUES (?, ?, ?, 1)
|
||||||
|
ON CONFLICT(path)
|
||||||
|
DO NOTHING
|
||||||
|
""",
|
||||||
|
(normalized, timestamp, timestamp),
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO refs (path, ref_count)
|
||||||
|
VALUES (?, 1)
|
||||||
|
ON CONFLICT(path)
|
||||||
|
DO UPDATE SET ref_count = ref_count + 1
|
||||||
|
""",
|
||||||
|
(normalized,),
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
UPDATE logs
|
||||||
|
SET last_access = ?, access_count = access_count + 1
|
||||||
|
WHERE path = ?
|
||||||
|
""",
|
||||||
|
(timestamp, normalized),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def touch_log_file(path: Path) -> None:
|
||||||
|
"""Refresh access metadata when a file is streamed."""
|
||||||
|
normalized = _normalize_paths([path])[0]
|
||||||
|
_ensure_db()
|
||||||
|
timestamp = datetime.now(timezone.utc).timestamp()
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO logs (path, created_at, last_access, access_count)
|
||||||
|
VALUES (?, ?, ?, 1)
|
||||||
|
ON CONFLICT(path)
|
||||||
|
DO UPDATE SET
|
||||||
|
last_access = excluded.last_access,
|
||||||
|
access_count = logs.access_count + 1
|
||||||
|
""",
|
||||||
|
(normalized, timestamp, timestamp),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def wipe_log_records() -> None:
|
||||||
|
"""Drop all bookkeeping (used after manual log purges)."""
|
||||||
|
_ensure_db()
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.execute("DELETE FROM logs")
|
||||||
|
conn.execute("DELETE FROM refs")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_records(paths: List[Path]) -> None:
|
||||||
|
if not paths:
|
||||||
|
return
|
||||||
|
normalized = [(str(p.resolve()),) for p in paths]
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
conn.executemany("DELETE FROM logs WHERE path = ?", normalized)
|
||||||
|
conn.executemany("DELETE FROM refs WHERE path = ?", normalized)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def collect_garbage(score_threshold: float) -> list[Path]:
|
||||||
|
"""Remove seldom-used staged files based on an age/refcount score."""
|
||||||
|
if score_threshold < 0:
|
||||||
|
raise ValueError("score_threshold must be non-negative")
|
||||||
|
_ensure_db()
|
||||||
|
now = datetime.now(timezone.utc).timestamp()
|
||||||
|
with sqlite3.connect(LOG_DB_PATH) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
l.path,
|
||||||
|
COALESCE(l.created_at, ?),
|
||||||
|
COALESCE(l.last_access, l.created_at, ?),
|
||||||
|
COALESCE(l.access_count, 0),
|
||||||
|
COALESCE(r.ref_count, 0)
|
||||||
|
FROM logs l
|
||||||
|
LEFT JOIN refs r ON l.path = r.path
|
||||||
|
""",
|
||||||
|
(now, now),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
removed: list[Path] = []
|
||||||
|
for path_str, created_at, last_access, access_count, ref_count in rows:
|
||||||
|
path = Path(path_str)
|
||||||
|
age = now - (last_access or created_at or now)
|
||||||
|
score = age / ((ref_count + 1) * (access_count + 1))
|
||||||
|
if score < score_threshold:
|
||||||
|
continue
|
||||||
|
if path.exists():
|
||||||
|
try:
|
||||||
|
path.unlink()
|
||||||
|
except OSError:
|
||||||
|
continue
|
||||||
|
removed.append(path)
|
||||||
|
|
||||||
|
_delete_records(removed)
|
||||||
|
return removed
|
||||||
24
test.py
24
test.py
@ -13,6 +13,8 @@ from mathstream import (
|
|||||||
is_even,
|
is_even,
|
||||||
is_odd,
|
is_odd,
|
||||||
clear_logs,
|
clear_logs,
|
||||||
|
collect_garbage,
|
||||||
|
DivideByZeroError,
|
||||||
)
|
)
|
||||||
|
|
||||||
NUMBERS_DIR = Path(__file__).parent / "tests"
|
NUMBERS_DIR = Path(__file__).parent / "tests"
|
||||||
@ -54,6 +56,7 @@ def main() -> None:
|
|||||||
negative_divisor = write_number("neg_divisor", "-34567")
|
negative_divisor = write_number("neg_divisor", "-34567")
|
||||||
literal_even = StreamNumber(literal="2000")
|
literal_even = StreamNumber(literal="2000")
|
||||||
literal_odd = StreamNumber(literal="-3")
|
literal_odd = StreamNumber(literal="-3")
|
||||||
|
zero_literal = StreamNumber(literal="0")
|
||||||
|
|
||||||
# Showcase the core operations.
|
# Showcase the core operations.
|
||||||
total = add(big, small)
|
total = add(big, small)
|
||||||
@ -85,6 +88,27 @@ def main() -> None:
|
|||||||
check_bool("is_even(literal_even)", is_even(literal_even), True)
|
check_bool("is_even(literal_even)", is_even(literal_even), True)
|
||||||
check_bool("is_odd(literal_odd)", is_odd(literal_odd), True)
|
check_bool("is_odd(literal_odd)", is_odd(literal_odd), True)
|
||||||
|
|
||||||
|
# Custom exception coverage
|
||||||
|
try:
|
||||||
|
div(literal_even, zero_literal)
|
||||||
|
except DivideByZeroError:
|
||||||
|
print("div(literal_even, zero_literal) raised DivideByZeroError as expected")
|
||||||
|
else:
|
||||||
|
raise AssertionError("div by zero did not raise DivideByZeroError")
|
||||||
|
|
||||||
|
try:
|
||||||
|
mod(literal_even, zero_literal)
|
||||||
|
except DivideByZeroError:
|
||||||
|
print("mod(literal_even, zero_literal) raised DivideByZeroError as expected")
|
||||||
|
else:
|
||||||
|
raise AssertionError("mod by zero did not raise DivideByZeroError")
|
||||||
|
|
||||||
|
removed = collect_garbage(0)
|
||||||
|
print(f"collect_garbage removed {len(removed)} files")
|
||||||
|
check_bool("total exists post GC", total.path.exists(), False)
|
||||||
|
check_bool("literal_even exists post GC", literal_even.path.exists(), False)
|
||||||
|
check_bool("huge operand persists", big.path.exists(), True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user