#!/usr/bin/env python3 """Generate a category overview Markdown file from per-item YAML frontmatter. Reads `scripts/overview_config.yml`, picks the block named by `--category`, walks `source_dir/*.md` (excluding `output_file`), validates each file's frontmatter, and writes a grouped+sorted table to `output_file`. Exits non-zero on any schema violation. Deterministic, offline, stdlib + PyYAML. """ from __future__ import annotations import argparse import re import sys from pathlib import Path import yaml REPO_ROOT = Path(__file__).resolve().parent.parent CONFIG_PATH = REPO_ROOT / "scripts" / "overview_config.yml" FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) class SchemaError(Exception): pass def parse_frontmatter(path: Path) -> dict | None: text = path.read_text(encoding="utf-8") m = FRONTMATTER_RE.match(text) if not m: return None try: data = yaml.safe_load(m.group(1)) except yaml.YAMLError as e: raise SchemaError(f"{path}: invalid YAML frontmatter: {e}") from e if not isinstance(data, dict): raise SchemaError(f"{path}: frontmatter is not a mapping") return data def validate(path: Path, fm: dict, cfg: dict) -> None: for field in cfg["required_fields"]: if field not in fm: raise SchemaError(f"{path}: missing required field '{field}'") for field, allowed in cfg.get("enums", {}).items(): if field in fm and fm[field] not in allowed: raise SchemaError( f"{path}: {field}={fm[field]!r} not in {allowed}" ) key_field = cfg.get("key_field", "hostname") if key_field not in fm: raise SchemaError( f"{path}: missing key field {key_field!r}" ) stem = path.stem value = fm[key_field] if stem != value: raise SchemaError( f"{path}: filename stem {stem!r} != {key_field} {value!r}" ) def fmt_cpu(fm: dict) -> str: model = fm.get("cpu", "") cores = fm.get("cpu_cores") threads = fm.get("cpu_threads") suffix = "" if cores and threads and threads != cores: suffix = f" · {cores}c/{threads}t" elif cores: suffix = f" · {cores}c" return (model + suffix).strip() def fmt_ram(fm: dict) -> str: n = fm.get("ram_gb") return f"{n} GB" if isinstance(n, int) else "" def _fmt_size_gb(n: int) -> str: if n >= 1000 and n % 1000 == 0: return f"{n // 1000} TB" if n >= 1000: return f"{n / 1000:.1f} TB" return f"{n} GB" def fmt_storage(fm: dict) -> str: drives = fm.get("storage") if isinstance(drives, list) and drives: parts = [] for d in drives: gb = d.get("gb") t = (d.get("type") or "").upper() if isinstance(gb, int): parts.append(f"{_fmt_size_gb(gb)} {t}".strip()) elif t: parts.append(t) return " + ".join(parts) n = fm.get("storage_gb") t = fm.get("storage_type", "").upper() if fm.get("storage_type") else "" if not isinstance(n, int): return t # type alone if no capacity return f"{_fmt_size_gb(n)} {t}".strip() def fmt_nic(fm: dict) -> str: g = fm.get("nic_gbps") if g is None: return "" def one(v: float | int) -> str: if isinstance(v, float) and not v.is_integer(): return f"{v}" return f"{int(v)}" if isinstance(g, list): if not g: return "" return "/".join(one(v) for v in g) + " GbE" return f"{one(g)} GbE" def cell(fm: dict, col: dict) -> str: kind = col.get("kind") if kind == "key-link": v = fm[col["field"]] return f"[{v}]({v}.md)" if kind == "url-link": u = fm.get(col["field"], "") if not u: return "" label = u.removeprefix("https://").removeprefix("http://") return f"[{label}]({u})" if kind == "cpu": return fmt_cpu(fm) if kind == "ram": return fmt_ram(fm) if kind == "storage": return fmt_storage(fm) if kind == "nic": return fmt_nic(fm) value = fm.get(col["field"], "") return "" if value is None else str(value) def render(cfg: dict, items: list[dict]) -> str: columns = cfg["columns"] group_by = cfg.get("group_by") sort_by = cfg.get("sort_by", "hostname") group_titles = cfg.get("group_titles", {}) if group_by: groups: dict[str, list[dict]] = {} for fm in items: groups.setdefault(fm.get(group_by, ""), []).append(fm) ordered = sorted(groups.items()) else: ordered = [("", items)] lines: list[str] = [] lines.append(f"# {cfg['title']}") lines.append("") lines.append( f"_Auto-generated from `{cfg['source_dir']}/*.md` — do not edit by hand. " f"Run `make docs-index` after changing a file._" ) lines.append("") for group_key, rows in ordered: rows.sort(key=lambda r: r.get(sort_by, "")) if group_by: title = group_titles.get(group_key, group_key.title() + "s") lines.append(f"## {title}") lines.append("") header = "| " + " | ".join(c["header"] for c in columns) + " |" sep = "|" + "|".join("---" for _ in columns) + "|" lines.append(header) lines.append(sep) for fm in rows: lines.append("| " + " | ".join(cell(fm, c) for c in columns) + " |") lines.append("") return "\n".join(lines).rstrip() + "\n" def main() -> int: parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) parser.add_argument("--category", required=True, help="Category key from overview_config.yml") args = parser.parse_args() config_all = yaml.safe_load(CONFIG_PATH.read_text(encoding="utf-8")) if args.category not in config_all: print(f"ERROR: category {args.category!r} not in {CONFIG_PATH}", file=sys.stderr) return 2 cfg = config_all[args.category] source_dir = REPO_ROOT / cfg["source_dir"] output_file = REPO_ROOT / cfg["output_file"] output_abs = output_file.resolve() items: list[dict] = [] errors: list[str] = [] for path in sorted(source_dir.glob("*.md")): if path.resolve() == output_abs: continue try: fm = parse_frontmatter(path) except SchemaError as e: errors.append(str(e)) continue if fm is None: print(f"WARNING: {path}: no YAML frontmatter, skipping", file=sys.stderr) continue try: validate(path, fm, cfg) except SchemaError as e: errors.append(str(e)) continue items.append(fm) if errors: for err in errors: print(f"ERROR: {err}", file=sys.stderr) return 1 output_file.parent.mkdir(parents=True, exist_ok=True) tmp = output_file.with_suffix(output_file.suffix + ".tmp") tmp.write_text(render(cfg, items), encoding="utf-8") tmp.replace(output_file) print(f"Wrote {output_file.relative_to(REPO_ROOT)} ({len(items)} item(s))") return 0 if __name__ == "__main__": sys.exit(main())