def generate_ranking_history(data_dir: str, force: bool = False) -> None:
"""Generate/update author and institution ranking history."""
date = _snapshot_date()
# ── Author rankings ──────────────────────────────────────────────────
cr_path = os.path.join(data_dir, "assets/data/combined_rankings.json")
author_hist_path = os.path.join(data_dir, "assets/data/ranking_history.json")
rankings = _load_json(cr_path)
author_history: list = _load_json(author_hist_path) # type: ignore[assignment]
if _has_snapshot(author_history, date) and not force:
logger.warning(
f" Author ranking history: snapshot for {date} already exists, skipping (use --force to overwrite)"
)
else:
author_entries = {}
for r in rankings:
name = r.get("name", "")
if not name:
continue
author_entries[name] = {
"rank": r.get("rank", 0),
"score": r.get("combined_score", 0),
"as": r.get("artifact_score", 0),
"aes": r.get("ae_score", 0),
"tp": r.get("total_papers", 0),
"ta": r.get("artifacts", 0),
"ar": r.get("artifact_rate", 0),
"rr": r.get("repro_rate", 0),
}
author_history = _update_history(author_history, author_entries, date)
with open(author_hist_path, "w") as f:
json.dump(author_history, f, ensure_ascii=False, separators=(",", ":"))
logger.info(
f" Author ranking history: {len(author_history)} snapshots, {len(author_entries)} entries for {date}"
)
logger.info(f" Wrote {author_hist_path} ({os.path.getsize(author_hist_path) / 1024:.0f}KB)")
# ── Institution rankings ─────────────────────────────────────────────
ir_path = os.path.join(data_dir, "assets/data/institution_rankings.json")
inst_hist_path = os.path.join(data_dir, "assets/data/institution_ranking_history.json")
inst_rankings = _load_json(ir_path)
inst_history: list = _load_json(inst_hist_path) # type: ignore[assignment]
if _has_snapshot(inst_history, date) and not force:
logger.warning(
f" Institution ranking history: snapshot for {date} already exists, skipping (use --force to overwrite)"
)
else:
inst_entries = {}
for idx, r in enumerate(inst_rankings):
name = r.get("affiliation", "")
if not name:
continue
# Calculate repro rate for institution
inst_rr = 0
if r.get("artifacts", 0) > 0:
inst_rr = round((r.get("badges_reproducible", 0) / r["artifacts"]) * 100, 1)
inst_entries[name] = {
"rank": idx + 1,
"score": r.get("combined_score", 0),
"as": r.get("artifact_score", 0),
"aes": r.get("ae_score", 0),
"tp": r.get("total_papers", 0),
"ta": r.get("artifacts", 0),
"ar": r.get("artifact_rate", 0),
"rr": inst_rr,
"r": r.get("num_authors", 0),
}
inst_history = _update_history(inst_history, inst_entries, date)
with open(inst_hist_path, "w") as f:
json.dump(inst_history, f, ensure_ascii=False, separators=(",", ":"))
logger.info(
f" Institution ranking history: {len(inst_history)} snapshots, {len(inst_entries)} entries for {date}"
)
logger.info(f" Wrote {inst_hist_path} ({os.path.getsize(inst_hist_path) / 1024:.0f}KB)")