Skip to content

author_stats

src.models.authors.author_stats

Author statistics schema.

Generated by generate_author_stats.pyauthors.json, authors.yml. Variants: systems_authors.yml, security_authors.yml.

ArtifactPaper

Bases: BaseModel

A paper that has an associated artifact evaluation.

Source code in src/models/authors/author_stats.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
class ArtifactPaper(BaseModel):
    """A paper that has an associated artifact evaluation."""

    title: str = Field(
        description="Full paper title as it appears in the proceedings.",
        examples=["Understanding and Detecting Software Upgrade Failures in Distributed Systems"],
    )
    conference: str = Field(description="Conference abbreviation, e.g. 'OSDI', 'USENIXSEC'.", examples=["OSDI"])
    year: int = Field(description="Publication year, e.g. 2023.", examples=[2023])
    badges: list[str] = Field(
        description=(
            "Artifact evaluation badges awarded by the AE committee. "
            "Canonical lowercase values: 'available', 'functional', 'reproduced', 'reusable', 'replicated'."
        ),
        examples=[["available", "functional", "reproduced"]],
    )
    category: Literal["systems", "security", "unknown"] = Field(
        description="Research domain: 'systems' or 'security', determined by the conference. 'unknown' if metadata is missing."
    )
    artifact_citations: int = Field(
        ge=0,
        description="Number of times this artifact has been cited (via DOI tracking). 0 if not yet tracked.",
        examples=[3],
    )

    model_config = {"extra": "forbid"}

PlainPaper

Bases: BaseModel

A published paper without an artifact evaluation.

Source code in src/models/authors/author_stats.py
42
43
44
45
46
47
48
49
50
51
52
class PlainPaper(BaseModel):
    """A published paper without an artifact evaluation."""

    title: str = Field(
        description="Full paper title as it appears in the proceedings.",
        examples=["Understanding and Detecting Software Upgrade Failures in Distributed Systems"],
    )
    conference: str = Field(description="Conference abbreviation, e.g. 'OSDI', 'USENIXSEC'.", examples=["OSDI"])
    year: int = Field(description="Publication year, e.g. 2023.", examples=[2023])

    model_config = {"extra": "forbid"}

AuthorCore

Bases: BaseModel

Common author fields shared by per-author statistics and profile records.

Source code in src/models/authors/author_stats.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
class AuthorCore(BaseModel):
    """Common author fields shared by per-author statistics and profile records."""

    name: str = Field(
        description="Full name in DBLP format, e.g. 'Mathias Payer' or 'Jing Liu 0074' (with disambiguation suffix).",
        examples=["Mathias Payer"],
    )
    affiliation: str = Field(
        description="Normalized institution affiliation, e.g. 'EPFL', 'MIT'. Empty string if unknown.",
        examples=["ETH Zurich"],
    )
    papers: list[ArtifactPaper] = Field(
        default_factory=list,
        description="Papers by this author that have evaluated artifacts, with badge and citation details.",
    )
    papers_without_artifacts: list[PlainPaper] = Field(
        default_factory=list,
        description="Papers by this author at tracked conferences that do not have evaluated artifacts.",
    )
    conferences: list[str] = Field(
        description="Conference abbreviations where author has published, e.g. ['OSDI', 'USENIXSEC'].",
        examples=[["OSDI", "ATC", "USENIXSEC"]],
    )
    years: list[int] = Field(
        description="Sorted list of years with activity, e.g. [2020, 2021, 2023].", examples=[[2021, 2022, 2023]]
    )
    artifact_count: int = Field(
        ge=0, description="Total number of artifacts authored across all conferences.", examples=[5]
    )
    total_papers: int = Field(
        ge=0,
        description="Total papers published at tracked conferences (both with and without artifacts).",
        examples=[42],
    )
    artifact_pct: float = Field(
        ge=0,
        le=100,
        description="Percentage of papers with artifacts: (artifact_count / total_papers) * 100.",
        examples=[71.4],
    )
    artifact_citations: int = Field(
        ge=0, description="Sum of citation counts across all this author's artifacts.", examples=[3]
    )
    badges_available: int = Field(
        ge=0,
        description="Total number of 'available' badges across all this author's artifacts.",
        examples=[12],
    )
    badges_functional: int = Field(
        ge=0,
        description="Total number of 'functional' badges across all this author's artifacts.",
        examples=[10],
    )
    badges_reproducible: int = Field(
        ge=0,
        description="Total number of 'reproduced' badges across all this author's artifacts.",
        examples=[8],
    )
    paper_ids: list[int] = Field(
        default_factory=list,
        description="Stable integer IDs referencing papers in papers.json that have evaluated artifacts.",
    )
    papers_without_artifact_ids: list[int] = Field(
        default_factory=list,
        description="Stable integer IDs referencing papers in papers.json that do NOT have evaluated artifacts.",
    )
    category: Literal["systems", "security", "both", "unknown"] = Field(
        description="Research domain based on conferences published at: 'systems', 'security', 'both', or 'unknown'.",
    )
    author_id: int | None = Field(
        default=None,
        ge=1,
        description="Stable integer ID referencing the canonical author_index. Null for authors not yet indexed.",
        examples=[42],
    )

    model_config = {"extra": "forbid"}

AuthorStats

Bases: AuthorCore

Per-author artifact statistics including badge counts, paper breakdowns, and conference history.

Source code in src/models/authors/author_stats.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
class AuthorStats(AuthorCore):
    """Per-author artifact statistics including badge counts, paper breakdowns, and conference history."""

    display_name: str = Field(
        description="Human-readable name without DBLP disambiguation suffix, e.g. 'Mathias Payer'.",
        examples=["Mathias Payer"],
    )
    total_papers_by_conf: dict[str, int] = Field(
        description="Conference name → paper count, e.g. {'OSDI': 3, 'USENIXSEC': 5}.",
    )
    total_papers_by_conf_year: dict[str, dict[str, int]] = Field(
        description="Nested mapping: conference → year → count, e.g. {'OSDI': {'2023': 2}}.",
    )
    repro_pct: float = Field(
        ge=0,
        le=100,
        description="Percentage of artifacts with a 'reproduced' badge: (reproduced / artifact_count) * 100.",
        examples=[66.7],
    )
    functional_pct: float = Field(
        ge=0,
        le=100,
        description="Percentage of artifacts with a 'functional' badge: (functional / artifact_count) * 100.",
        examples=[83.3],
    )
    category: Literal["systems", "security", "both", "unknown"] = Field(
        description="Research domain: 'systems', 'security', 'both', or 'unknown' (if no publications yet).",
    )
    year_range: str = Field(
        pattern=r"^\d{4}-\d{4}$", description="Activity range as 'YYYY-YYYY', e.g. '2019-2024'.", examples=["2017-2026"]
    )
    recent_count: int = Field(
        ge=0, description="Number of papers published in the most recent 3 calendar years.", examples=[3]
    )
    paper_ids: list[int] = Field(
        default_factory=list,
        description="List of stable paper IDs (from paper_index) for this author's artifact papers.",
        examples=[[1, 42, 137]],
    )
    papers_without_artifact_ids: list[int] = Field(
        default_factory=list,
        description="List of stable paper IDs (from paper_index) for this author's non-artifact papers.",
        examples=[[5, 88]],
    )

    model_config = {"extra": "forbid"}