def generate_ranking_history(data_dir: str, force: bool = False) -> None:
"""Generate/update author and institution ranking history."""
date = _snapshot_date()
# ── Author rankings ──────────────────────────────────────────────────
cr_path = Path(data_dir) / "assets/data/combined_rankings.json"
author_hist_path = Path(data_dir) / "assets/data/ranking_history.json"
rankings = _load_json(cr_path)
author_history: list = _load_json(author_hist_path) # type: ignore[assignment]
if _has_snapshot(author_history, date) and not force:
logger.warning(
f" Author ranking history: snapshot for {date} already exists, skipping (use --force to overwrite)"
)
else:
author_entries = {}
for r in rankings:
name = r.get("name", "")
if not name:
continue
author_entries[name] = {
"rank": r.get("rank", 0),
"score": r.get("combined_score", 0),
"as": r.get("artifact_score", 0),
"aes": r.get("ae_score", 0),
"tp": r.get("total_papers", 0),
"ta": r.get("artifact_count", 0),
"ar": r.get("artifact_pct", 0),
"rr": r.get("repro_pct", 0),
}
author_history = _update_history(author_history, author_entries, date)
save_json(author_hist_path, author_history, compact=True)
logger.info(
f" Author ranking history: {len(author_history)} snapshots, {len(author_entries)} entries for {date}"
)
logger.info(f" Wrote {author_hist_path} ({author_hist_path.stat().st_size / 1024:.0f}KB)")
# ── Institution rankings ─────────────────────────────────────────────
ir_path = Path(data_dir) / "assets/data/institution_rankings.json"
inst_hist_path = Path(data_dir) / "assets/data/institution_ranking_history.json"
inst_rankings = _load_json(ir_path)
inst_history: list = _load_json(inst_hist_path) # type: ignore[assignment]
if _has_snapshot(inst_history, date) and not force:
logger.warning(
f" Institution ranking history: snapshot for {date} already exists, skipping (use --force to overwrite)"
)
else:
inst_entries = {}
for idx, r in enumerate(inst_rankings):
name = r.get("affiliation", "")
if not name:
continue
# Calculate repro rate for institution
inst_rr = 0
if r.get("artifact_count", 0) > 0:
inst_rr = round((r.get("badges_reproducible", 0) / r["artifact_count"]) * 100, 1)
inst_entries[name] = {
"rank": idx + 1,
"score": r.get("combined_score", 0),
"as": r.get("artifact_score", 0),
"aes": r.get("ae_score", 0),
"tp": r.get("total_papers", 0),
"ta": r.get("artifact_count", 0),
"ar": r.get("artifact_pct", 0),
"rr": inst_rr,
"r": r.get("author_count", 0),
}
inst_history = _update_history(inst_history, inst_entries, date)
save_json(inst_hist_path, inst_history, compact=True)
logger.info(
f" Institution ranking history: {len(inst_history)} snapshots, {len(inst_entries)} entries for {date}"
)
logger.info(f" Wrote {inst_hist_path} ({inst_hist_path.stat().st_size / 1024:.0f}KB)")