#!/usr/bin/env python3 """Generate a category overview Markdown file from per-item YAML frontmatter. Reads `scripts/overview_config.yml`, picks the block named by `--category`, walks `source_dir/*.md` (excluding `output_file`), validates each file's frontmatter, and writes a grouped+sorted table to `output_file`. Exits non-zero on any schema violation. Deterministic, offline, stdlib + PyYAML. """ from __future__ import annotations import argparse import re import sys from pathlib import Path import yaml REPO_ROOT = Path(__file__).resolve().parent.parent CONFIG_PATH = REPO_ROOT / "scripts" / "overview_config.yml" FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) # Shown at the bottom of every error report so a newcomer knows where to look. GUIDE_URL = "https://docs.makerfloss.eu/guides/editing-hardware-docs/" class SchemaError(Exception): pass def _allowed_hint(field: str, enums: dict) -> str: allowed = enums.get(field) return f" Allowed values: {', '.join(map(str, allowed))}." if allowed else "" def _example_value(field: str, enums: dict) -> str: allowed = enums.get(field) return str(allowed[0]) if allowed else "..." def report_errors(errors: list[str], category: str) -> None: """Print a collected list of problems with orientation for newcomers.""" print( f"\ngen_overview: found {len(errors)} problem(s) in the {category} docs:", file=sys.stderr, ) for err in errors: print(f" ✗ {err}", file=sys.stderr) print( "\nFix the field(s) named above, then run 'make docs-index' again.\n" f"Guide: {GUIDE_URL}", file=sys.stderr, ) def parse_frontmatter(path: Path) -> dict | None: text = path.read_text(encoding="utf-8") m = FRONTMATTER_RE.match(text) if not m: return None try: data = yaml.safe_load(m.group(1)) except yaml.YAMLError as e: raise SchemaError(f"{path}: invalid YAML frontmatter: {e}") from e if not isinstance(data, dict): raise SchemaError(f"{path}: frontmatter is not a mapping") return data def validate(path: Path, fm: dict, cfg: dict) -> None: enums = cfg.get("enums", {}) name = path.name for field in cfg["required_fields"]: if field not in fm: raise SchemaError( f"{name}: missing required field '{field}'. Add a line like " f"'{field}: {_example_value(field, enums)}' to the frontmatter." f"{_allowed_hint(field, enums)}" ) for field, allowed in enums.items(): if field in fm and fm[field] not in allowed: raise SchemaError( f"{name}: {field} {fm[field]!r} is not allowed. " f"Use one of: {', '.join(map(str, allowed))}." ) key_field = cfg.get("key_field", "hostname") if key_field not in fm: raise SchemaError( f"{name}: missing the '{key_field}' field (the device's id). It must " f"match the filename, e.g. '{key_field}: {path.stem}'." ) stem = path.stem value = fm[key_field] if stem != value: raise SchemaError( f"{name}: '{key_field}: {value}' does not match the filename '{name}'. " f"Rename the file to '{value}.md', or set {key_field} to '{stem}'." ) def fmt_cpu(fm: dict) -> str: model = fm.get("cpu") or "" cores = fm.get("cpu_cores") threads = fm.get("cpu_threads") suffix = "" if isinstance(cores, int) and isinstance(threads, int) and threads != cores: suffix = f" · {cores}c/{threads}t" elif isinstance(cores, int): suffix = f" · {cores}c" return (str(model) + suffix).strip() def fmt_ram(fm: dict) -> str: n = fm.get("ram_gb") if isinstance(n, int): return f"{n} GB" if isinstance(n, str) and n: return n return "" def _fmt_size_gb(n: int) -> str: if n >= 1000 and n % 1000 == 0: return f"{n // 1000} TB" if n >= 1000: return f"{n / 1000:.1f} TB" return f"{n} GB" def fmt_storage(fm: dict) -> str: drives = fm.get("storage") if isinstance(drives, list) and drives: parts = [] for d in drives: gb = d.get("gb") t = (d.get("type") or "").upper() if isinstance(gb, int): parts.append(f"{_fmt_size_gb(gb)} {t}".strip()) elif t: parts.append(t) return " + ".join(parts) if isinstance(drives, str) and drives: return drives n = fm.get("storage_gb") t = fm.get("storage_type", "").upper() if fm.get("storage_type") else "" if not isinstance(n, int): return t # type alone if no capacity return f"{_fmt_size_gb(n)} {t}".strip() def fmt_nic(fm: dict) -> str: g = fm.get("nic_gbps") if g is None or g == "": return "" if isinstance(g, str): return g def one(v: float | int) -> str: if isinstance(v, float) and not v.is_integer(): return f"{v}" return f"{int(v)}" if isinstance(g, list): if not g: return "" return "/".join(one(v) for v in g) + " GbE" return f"{one(g)} GbE" def cell(fm: dict, col: dict) -> str: kind = col.get("kind") if kind == "key-link": v = fm[col["field"]] return f"[{v}]({v}.md)" if kind == "url-link": u = fm.get(col["field"], "") if not u: return "" label = u.removeprefix("https://").removeprefix("http://") return f"[{label}]({u})" if kind == "cpu": return fmt_cpu(fm) if kind == "ram": return fmt_ram(fm) if kind == "storage": return fmt_storage(fm) if kind == "nic": return fmt_nic(fm) value = fm.get(col["field"], "") return "" if value is None else str(value) def render(cfg: dict, items: list[dict]) -> str: columns = cfg["columns"] group_by = cfg.get("group_by") sort_by = cfg.get("sort_by", "hostname") group_titles = cfg.get("group_titles", {}) if group_by: groups: dict[str, list[dict]] = {} for fm in items: groups.setdefault(fm.get(group_by, ""), []).append(fm) ordered = sorted(groups.items()) else: ordered = [("", items)] lines: list[str] = [] lines.append(f"# {cfg['title']}") lines.append("") lines.append( f"_Auto-generated from `{cfg['source_dir']}/*.md` — do not edit by hand. " f"Run `make docs-index` after changing a file._" ) lines.append("") for group_key, rows in ordered: rows.sort(key=lambda r: r.get(sort_by, "")) if group_by: title = group_titles.get(group_key, group_key.title() + "s") lines.append(f"## {title}") lines.append("") header = "| " + " | ".join(c["header"] for c in columns) + " |" sep = "|" + "|".join("---" for _ in columns) + "|" lines.append(header) lines.append(sep) for fm in rows: lines.append("| " + " | ".join(cell(fm, c) for c in columns) + " |") lines.append("") return "\n".join(lines).rstrip() + "\n" def main() -> int: parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) parser.add_argument("--category", required=True, help="Category key from overview_config.yml") args = parser.parse_args() config_all = yaml.safe_load(CONFIG_PATH.read_text(encoding="utf-8")) if args.category not in config_all: print(f"ERROR: category {args.category!r} not in {CONFIG_PATH}", file=sys.stderr) return 2 cfg = config_all[args.category] source_dir = REPO_ROOT / cfg["source_dir"] output_file = REPO_ROOT / cfg["output_file"] output_abs = output_file.resolve() items: list[dict] = [] errors: list[str] = [] for path in sorted(source_dir.glob("*.md")): if path.resolve() == output_abs: continue try: fm = parse_frontmatter(path) except SchemaError as e: errors.append(str(e)) continue if fm is None: print( f"WARNING: {path.name}: no '---' frontmatter block — skipping " f"(it will not appear in the {args.category} index).", file=sys.stderr, ) continue try: validate(path, fm, cfg) except SchemaError as e: errors.append(str(e)) continue items.append(fm) if errors: report_errors(errors, args.category) return 1 output_file.parent.mkdir(parents=True, exist_ok=True) tmp = output_file.with_suffix(output_file.suffix + ".tmp") tmp.write_text(render(cfg, items), encoding="utf-8") tmp.replace(output_file) print(f"Wrote {output_file.relative_to(REPO_ROOT)} ({len(items)} item(s))") return 0 if __name__ == "__main__": sys.exit(main())