187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302 | def run_pipeline(cfg: PipelineConfig, *, max_workers: int = 4, message: str = "") -> bool:
"""Execute the full pipeline using the stage dependency graph.
Returns True if all required stages succeeded.
"""
cfg.ensure_dirs()
_detect_github_token()
_seed_staging(cfg)
_check_dblp(cfg)
if cfg.http_proxy:
os.environ.setdefault("http_proxy", cfg.http_proxy)
os.environ.setdefault("HTTP_PROXY", cfg.http_proxy)
if cfg.https_proxy:
os.environ.setdefault("https_proxy", cfg.https_proxy)
os.environ.setdefault("HTTPS_PROXY", cfg.https_proxy)
groups = parallel_groups(STAGES)
total_stages = sum(len(g) for g in groups)
completed = 0
failed: list[str] = []
timings: dict[str, float] = {}
pipeline_start = time.monotonic()
for tier_idx, tier in enumerate(groups):
logger.info("── Tier %d/%d (%d stages) ──", tier_idx + 1, len(groups), len(tier))
if len(tier) == 1:
name, ok, elapsed = _run_stage(tier[0], cfg)
completed += 1
timings[name] = elapsed
if not ok:
failed.append(name)
return False
else:
with ThreadPoolExecutor(max_workers=min(max_workers, len(tier))) as pool:
futures = {pool.submit(_run_stage, stage, cfg): stage for stage in tier}
for future in as_completed(futures):
name, ok, elapsed = future.result()
completed += 1
timings[name] = elapsed
if not ok:
failed.append(name)
if failed:
logger.error("Pipeline aborted — required stage(s) failed: %s", ", ".join(failed))
return False
total_elapsed = time.monotonic() - pipeline_start
logger.info("Pipeline complete! %d/%d stages in %.1fs", completed, total_stages, total_elapsed)
# Log timing summary
for name, elapsed in sorted(timings.items(), key=lambda x: -x[1]):
if elapsed > 0:
logger.info(" %-25s %6.1fs", name, elapsed)
# ── Post-pipeline invariant checks ───────────────────────────────────
logger.info("── Running invariant checks ──")
violations = check_invariants(cfg.output_dir)
errors = [v for v in violations if v.severity == "error"]
warnings = [v for v in violations if v.severity == "warning"]
for v in warnings:
logger.warning("⚠ %s", v)
for v in errors:
logger.error("✗ %s", v)
if errors:
logger.error("Invariant check failed: %d error(s), %d warning(s)", len(errors), len(warnings))
return False
if warnings:
logger.info("Invariant checks passed with %d warning(s)", len(warnings))
else:
logger.info("✓ All invariant checks passed")
# ── Cross-run monotonicity checks ────────────────────────────────────
reference = load_snapshot()
current_snapshot = create_summary(cfg.output_dir)
if reference is not None:
logger.info("── Running monotonicity checks ──")
mono_violations = check_monotonicity(reference, current_snapshot)
mono_errors = [mv for mv in mono_violations if mv.severity == "error"]
mono_warnings = [mw for mw in mono_violations if mw.severity == "warning"]
for mw in mono_warnings:
logger.warning("⚠ %s", mw)
for me in mono_errors:
logger.error("✗ %s", me)
if mono_errors:
logger.error(
"Monotonicity check failed: %d error(s), %d warning(s)",
len(mono_errors),
len(mono_warnings),
)
return False
if mono_warnings:
logger.info("Monotonicity checks passed with %d warning(s)", len(mono_warnings))
else:
logger.info("✓ All monotonicity checks passed")
else:
logger.info("No reference snapshot — skipping monotonicity checks")
# Update reference snapshot
save_snapshot(current_snapshot)
# ── Write run metadata ───────────────────────────────────────────────
write_run_metadata(
cfg.output_dir,
timings=timings,
dblp_file=cfg.dblp_file,
)
# ── Save results snapshot ────────────────────────────────────────────
if cfg.save_results:
logger.info("── Saving results snapshot ──")
save_results(cfg, message=message)
return True
|