MakerFLOSS/scripts/gen_overview.py

289 lines
8.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Generate a category overview Markdown file from per-item YAML frontmatter.
Reads `scripts/overview_config.yml`, picks the block named by `--category`,
walks `source_dir/*.md` (excluding `output_file`), validates each file's
frontmatter, and writes a grouped+sorted table to `output_file`.
Exits non-zero on any schema violation. Deterministic, offline, stdlib + PyYAML.
"""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
import yaml
REPO_ROOT = Path(__file__).resolve().parent.parent
CONFIG_PATH = REPO_ROOT / "scripts" / "overview_config.yml"
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
# Shown at the bottom of every error report so a newcomer knows where to look.
GUIDE_URL = "https://docs.makerfloss.eu/guides/editing-hardware-docs/"
class SchemaError(Exception):
pass
def _allowed_hint(field: str, enums: dict) -> str:
allowed = enums.get(field)
return f" Allowed values: {', '.join(map(str, allowed))}." if allowed else ""
def _example_value(field: str, enums: dict) -> str:
allowed = enums.get(field)
return str(allowed[0]) if allowed else "..."
def report_errors(errors: list[str], category: str) -> None:
"""Print a collected list of problems with orientation for newcomers."""
print(
f"\ngen_overview: found {len(errors)} problem(s) in the {category} docs:",
file=sys.stderr,
)
for err in errors:
print(f"{err}", file=sys.stderr)
print(
"\nFix the field(s) named above, then run 'make docs-index' again.\n"
f"Guide: {GUIDE_URL}",
file=sys.stderr,
)
def parse_frontmatter(path: Path) -> dict | None:
text = path.read_text(encoding="utf-8")
m = FRONTMATTER_RE.match(text)
if not m:
return None
try:
data = yaml.safe_load(m.group(1))
except yaml.YAMLError as e:
raise SchemaError(f"{path}: invalid YAML frontmatter: {e}") from e
if not isinstance(data, dict):
raise SchemaError(f"{path}: frontmatter is not a mapping")
return data
def validate(path: Path, fm: dict, cfg: dict) -> None:
enums = cfg.get("enums", {})
name = path.name
for field in cfg["required_fields"]:
if field not in fm:
raise SchemaError(
f"{name}: missing required field '{field}'. Add a line like "
f"'{field}: {_example_value(field, enums)}' to the frontmatter."
f"{_allowed_hint(field, enums)}"
)
for field, allowed in enums.items():
if field in fm and fm[field] not in allowed:
raise SchemaError(
f"{name}: {field} {fm[field]!r} is not allowed. "
f"Use one of: {', '.join(map(str, allowed))}."
)
key_field = cfg.get("key_field", "hostname")
if key_field not in fm:
raise SchemaError(
f"{name}: missing the '{key_field}' field (the device's id). It must "
f"match the filename, e.g. '{key_field}: {path.stem}'."
)
stem = path.stem
value = fm[key_field]
if stem != value:
raise SchemaError(
f"{name}: '{key_field}: {value}' does not match the filename '{name}'. "
f"Rename the file to '{value}.md', or set {key_field} to '{stem}'."
)
def fmt_cpu(fm: dict) -> str:
model = fm.get("cpu") or ""
cores = fm.get("cpu_cores")
threads = fm.get("cpu_threads")
suffix = ""
if isinstance(cores, int) and isinstance(threads, int) and threads != cores:
suffix = f" · {cores}c/{threads}t"
elif isinstance(cores, int):
suffix = f" · {cores}c"
return (str(model) + suffix).strip()
def fmt_ram(fm: dict) -> str:
n = fm.get("ram_gb")
if isinstance(n, int):
return f"{n} GB"
if isinstance(n, str) and n:
return n
return ""
def _fmt_size_gb(n: int) -> str:
if n >= 1000 and n % 1000 == 0:
return f"{n // 1000} TB"
if n >= 1000:
return f"{n / 1000:.1f} TB"
return f"{n} GB"
def fmt_storage(fm: dict) -> str:
drives = fm.get("storage")
if isinstance(drives, list) and drives:
parts = []
for d in drives:
gb = d.get("gb")
t = (d.get("type") or "").upper()
if isinstance(gb, int):
parts.append(f"{_fmt_size_gb(gb)} {t}".strip())
elif t:
parts.append(t)
return " + ".join(parts)
if isinstance(drives, str) and drives:
return drives
n = fm.get("storage_gb")
t = fm.get("storage_type", "").upper() if fm.get("storage_type") else ""
if not isinstance(n, int):
return t # type alone if no capacity
return f"{_fmt_size_gb(n)} {t}".strip()
def fmt_nic(fm: dict) -> str:
g = fm.get("nic_gbps")
if g is None or g == "":
return ""
if isinstance(g, str):
return g
def one(v: float | int) -> str:
if isinstance(v, float) and not v.is_integer():
return f"{v}"
return f"{int(v)}"
if isinstance(g, list):
if not g:
return ""
return "/".join(one(v) for v in g) + " GbE"
return f"{one(g)} GbE"
def cell(fm: dict, col: dict) -> str:
kind = col.get("kind")
if kind == "key-link":
v = fm[col["field"]]
return f"[{v}]({v}.md)"
if kind == "url-link":
u = fm.get(col["field"], "")
if not u:
return ""
label = u.removeprefix("https://").removeprefix("http://")
return f"[{label}]({u})"
if kind == "cpu":
return fmt_cpu(fm)
if kind == "ram":
return fmt_ram(fm)
if kind == "storage":
return fmt_storage(fm)
if kind == "nic":
return fmt_nic(fm)
value = fm.get(col["field"], "")
return "" if value is None else str(value)
def render(cfg: dict, items: list[dict]) -> str:
columns = cfg["columns"]
group_by = cfg.get("group_by")
sort_by = cfg.get("sort_by", "hostname")
group_titles = cfg.get("group_titles", {})
if group_by:
groups: dict[str, list[dict]] = {}
for fm in items:
groups.setdefault(fm.get(group_by, ""), []).append(fm)
ordered = sorted(groups.items())
else:
ordered = [("", items)]
lines: list[str] = []
lines.append(f"# {cfg['title']}")
lines.append("")
lines.append(
f"_Auto-generated from `{cfg['source_dir']}/*.md` — do not edit by hand. "
f"Run `make docs-index` after changing a file._"
)
lines.append("")
for group_key, rows in ordered:
rows.sort(key=lambda r: r.get(sort_by, ""))
if group_by:
title = group_titles.get(group_key, group_key.title() + "s")
lines.append(f"## {title}")
lines.append("")
header = "| " + " | ".join(c["header"] for c in columns) + " |"
sep = "|" + "|".join("---" for _ in columns) + "|"
lines.append(header)
lines.append(sep)
for fm in rows:
lines.append("| " + " | ".join(cell(fm, c) for c in columns) + " |")
lines.append("")
return "\n".join(lines).rstrip() + "\n"
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
parser.add_argument("--category", required=True, help="Category key from overview_config.yml")
args = parser.parse_args()
config_all = yaml.safe_load(CONFIG_PATH.read_text(encoding="utf-8"))
if args.category not in config_all:
print(f"ERROR: category {args.category!r} not in {CONFIG_PATH}", file=sys.stderr)
return 2
cfg = config_all[args.category]
source_dir = REPO_ROOT / cfg["source_dir"]
output_file = REPO_ROOT / cfg["output_file"]
output_abs = output_file.resolve()
items: list[dict] = []
errors: list[str] = []
for path in sorted(source_dir.glob("*.md")):
if path.resolve() == output_abs:
continue
try:
fm = parse_frontmatter(path)
except SchemaError as e:
errors.append(str(e))
continue
if fm is None:
print(
f"WARNING: {path.name}: no '---' frontmatter block — skipping "
f"(it will not appear in the {args.category} index).",
file=sys.stderr,
)
continue
try:
validate(path, fm, cfg)
except SchemaError as e:
errors.append(str(e))
continue
items.append(fm)
if errors:
report_errors(errors, args.category)
return 1
output_file.parent.mkdir(parents=True, exist_ok=True)
tmp = output_file.with_suffix(output_file.suffix + ".tmp")
tmp.write_text(render(cfg, items), encoding="utf-8")
tmp.replace(output_file)
print(f"Wrote {output_file.relative_to(REPO_ROOT)} ({len(items)} item(s))")
return 0
if __name__ == "__main__":
sys.exit(main())