#!/usr/bin/env python3
"""Build the focused anti-amyloid translation frontier.

The script is intentionally CLI-backed. It uses `vela finding add`,
`vela link add`, `vela revise`, `vela caveat`, `vela clinical-trial-import`,
and `vela artifact-add` so the generated repository is normal Vela state,
not a hand-written shadow format.
"""

from __future__ import annotations

import argparse
import datetime as dt
import hashlib
import json
import re
import shutil
import subprocess
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]
VELA = ROOT / "target" / "release" / "vela"
ACTOR = "reviewer:will-blair-bot"
PUBKEY = Path.home() / ".vela" / "keys" / "will-blair-bot" / "public.key"
PRIVATE_KEY = Path.home() / ".vela" / "keys" / "will-blair-bot" / "private.key"
BROAD_VFR = "vfr_06cfcbe7c449d86a"
BROAD_HUB = "https://vela-hub.fly.dev/entries"

VALID_ASSERTION_TYPES = {
    "mechanism",
    "therapeutic",
    "diagnostic",
    "epidemiological",
    "observational",
    "review",
    "methodological",
    "computational",
    "theoretical",
    "negative",
}

VALID_EVIDENCE_TYPES = {
    "experimental",
    "observational",
    "computational",
    "theoretical",
    "meta_analysis",
    "systematic_review",
    "case_report",
}

VALID_SOURCE_TYPES = {
    "published_paper",
    "preprint",
    "clinical_trial",
    "lab_notebook",
    "model_output",
    "expert_assertion",
    "database_record",
}


KEYWORDS = {
    "lecanemab": 120,
    "donanemab": 120,
    "aducanumab": 95,
    "bace": 85,
    "verubecestat": 120,
    "lanabecestat": 120,
    "atabecestat": 120,
    "semagacestat": 100,
    "solanezumab": 95,
    "gantenerumab": 95,
    "anti-amyloid": 95,
    "anti-aβ": 95,
    "anti-a ": 80,
    "amyloid pet": 70,
    "centiloid": 70,
    "cdr-sb": 80,
    "adas-cog": 70,
    "iadrs": 85,
    "clinical dementia rating": 70,
    "aria": 100,
    "apoe4": 95,
    "apoe ε4": 95,
    "apoe e4": 95,
    "apolipoprotein e4": 80,
    "trailblazer": 110,
    "clarity ad": 110,
    "emerge": 90,
    "engage": 90,
    "graduate": 90,
    "ahead": 90,
    "dian-tu": 80,
    "a4 study": 80,
    "transferrin receptor": 80,
    "tfr": 70,
    "focused ultrasound": 90,
    "blood-brain barrier": 45,
    "brain shuttle": 90,
}


SUBAGENT_LANES = [
    {
        "lane": "trial_outcomes",
        "compiler": "Euler",
        "scope": "CLARITY AD, TRAILBLAZER-ALZ 2/3, EMERGE/ENGAGE, FDA, CMS, and approval or withdrawal records.",
        "sources": [
            "10.1056/NEJMoa2212948",
            "NCT03887455",
            "10.1001/jama.2023.13239",
            "NCT04437511",
            "NCT05026866",
            "NCT02484547",
            "NCT02477800",
            "FDA Drugs@FDA records for lecanemab, donanemab, and aducanumab",
            "CMS NCD 200.3",
        ],
        "do_not_deposit": [
            "Sponsor press-release claims without matching registry, label, or publication evidence.",
            "Paywalled PDF bytes. Store locators and source hashes only when bytes are legally available.",
        ],
    },
    {
        "lane": "failed_programs",
        "compiler": "Avicenna",
        "scope": "BACE inhibitors, soluble amyloid targeting, gamma-secretase comparator failures, and antibody failures with biomarker movement.",
        "sources": [
            "10.1056/NEJMoa1706441",
            "10.1056/NEJMoa1812840",
            "10.1001/jamaneurol.2019.3988",
            "10.1056/NEJMc1813435",
            "10.1001/jamaneurol.2020.4857",
            "10.1056/NEJMoa2305032",
            "10.1038/s41591-021-01369-8",
            "10.1056/NEJMoa2304430",
            "10.1056/NEJMoa1210951",
        ],
        "do_not_deposit": [
            "Mechanism-only summaries that do not bear on human translation or safety.",
            "Duplicate trial registry rows without a distinct endpoint, phase, or status role.",
        ],
    },
    {
        "lane": "biomarkers_vs_cognition",
        "compiler": "Sagan",
        "scope": "Amyloid PET, tau PET or CSF, clinical endpoint movement, and meaningfulness thresholds.",
        "sources": [
            "10.1056/NEJMoa2212948",
            "10.1001/jama.2023.13239",
            "10.1001/jamaneurol.2025.3869",
            "10.1002/alz.13770",
            "10.1038/s41598-024-75204-8",
            "10.1186/s13195-021-00880-x",
        ],
        "do_not_deposit": [
            "Surrogate-validity claims that are not scoped to endpoint, disease stage, and intervention class.",
            "Post hoc correlations presented as prospective surrogate validation.",
        ],
    },
    {
        "lane": "risk_and_delivery",
        "compiler": "McClintock",
        "scope": "APOE4, ARIA-E/H, anticoagulants, MRI exclusion rules, BBB penetration, TfR shuttle, and focused ultrasound.",
        "sources": [
            "LEQEMBI label",
            "KISUNLA label",
            "10.14283/jpad.2023.30",
            "10.1080/19420862.2023.2261509",
            "NCT04639050",
            "10.1126/science.ads3204",
            "10.1038/s41467-018-04529-6",
            "10.1073/pnas.2002571117",
            "10.1056/NEJMoa2308719",
        ],
        "do_not_deposit": [
            "Patient-level risk recommendations. Store trial and label scope only.",
            "Preclinical delivery claims generalized to clinical benefit.",
        ],
    },
]


MANUAL_FINDINGS: list[dict[str, Any]] = [
    {
        "key": "lecanemab_clarity_primary",
        "assertion": "Lecanemab slowed decline on CDR-SB in early Alzheimer's disease in CLARITY AD, with a modest absolute effect that requires patient-level risk stratification.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "van Dyck et al. 2023 NEJM, CLARITY AD",
        "doi": "10.1056/NEJMoa2212948",
        "year": "2023",
        "journal": "New England Journal of Medicine",
        "conditions": "Early Alzheimer's disease with amyloid confirmation; lecanemab 10 mg/kg every two weeks for 18 months; primary endpoint CDR-SB.",
        "entities": "lecanemab:compound,amyloid-beta:protein,Alzheimer's disease:disease,CDR-SB:assay",
        "confidence": "0.78",
        "human": True,
        "trial": True,
    },
    {
        "key": "lecanemab_biomarker",
        "assertion": "Lecanemab reduced amyloid burden and shifted downstream biomarkers in CLARITY AD, but biomarker movement remained larger than the observed clinical effect.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "CLARITY AD biomarker results",
        "doi": "10.1056/NEJMoa2212948",
        "year": "2023",
        "journal": "New England Journal of Medicine",
        "conditions": "Amyloid-positive early Alzheimer's disease; biomarker endpoints include amyloid PET and downstream tau/neurodegeneration measures.",
        "entities": "lecanemab:compound,amyloid PET:assay,tau:protein,CDR-SB:assay",
        "confidence": "0.72",
        "human": True,
        "trial": True,
    },
    {
        "key": "donanemab_trailblazer_primary",
        "assertion": "Donanemab slowed clinical decline in early symptomatic Alzheimer's disease in TRAILBLAZER-ALZ 2, with efficacy interpreted against amyloid and tau-selection criteria.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "Sims et al. 2023 JAMA, TRAILBLAZER-ALZ 2",
        "doi": "10.1001/jama.2023.13239",
        "year": "2023",
        "journal": "JAMA",
        "conditions": "Early symptomatic Alzheimer's disease with amyloid and tau pathology; donanemab dosing in TRAILBLAZER-ALZ 2.",
        "entities": "donanemab:compound,amyloid-beta:protein,tau:protein,iADRS:assay,Alzheimer's disease:disease",
        "confidence": "0.77",
        "human": True,
        "trial": True,
    },
    {
        "key": "donanemab_aria",
        "assertion": "Donanemab treatment increases ARIA risk, making benefit interpretation inseparable from imaging surveillance and patient selection.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "TRAILBLAZER-ALZ 2 safety results",
        "doi": "10.1001/jama.2023.13239",
        "year": "2023",
        "journal": "JAMA",
        "conditions": "Anti-amyloid antibody treatment in early symptomatic Alzheimer's disease; ARIA-E/H monitoring by MRI.",
        "entities": "donanemab:compound,ARIA:other,amyloid-beta:protein,MRI:assay",
        "confidence": "0.76",
        "human": True,
        "trial": True,
    },
    {
        "key": "aducanumab_emerge_engage_discordance",
        "assertion": "Aducanumab's EMERGE and ENGAGE phase 3 trials produced discordant efficacy results, making aducanumab a contested anti-amyloid translation precedent.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "EMERGE and ENGAGE phase 3 trial records and FDA review materials",
        "url": "https://clinicaltrials.gov/search?term=aducanumab%20EMERGE%20ENGAGE",
        "year": "2021",
        "conditions": "Early Alzheimer's disease anti-amyloid antibody trials; two nominally parallel phase 3 studies.",
        "entities": "aducanumab:compound,amyloid-beta:protein,Alzheimer's disease:disease,CDR-SB:assay",
        "confidence": "0.68",
        "human": True,
        "trial": True,
    },
    {
        "key": "aducanumab_withdrawal",
        "assertion": "Aducanumab approval did not establish a stable clinical translation pathway for anti-amyloid therapy because the program was discontinued after contested evidence and restricted uptake.",
        "type": "review",
        "evidence_type": "observational",
        "source_type": "database_record",
        "source": "FDA approval materials, CMS coverage decisions, and sponsor discontinuation notices",
        "url": "https://www.fda.gov/drugs/postmarket-drug-safety-information-patients-and-providers/aduhelm-aducanumab-avwa-information",
        "year": "2024",
        "conditions": "Regulatory and coverage history after accelerated approval of aducanumab.",
        "entities": "aducanumab:compound,FDA:other,CMS:other,Alzheimer's disease:disease",
        "confidence": "0.67",
        "human": True,
    },
    {
        "key": "bace_verubecestat_epoch",
        "assertion": "Verubecestat failed to improve cognition or function in mild-to-moderate Alzheimer's disease despite BACE1 target engagement.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "Egan et al. 2018 NEJM, EPOCH",
        "doi": "10.1056/NEJMoa1706441",
        "year": "2018",
        "journal": "New England Journal of Medicine",
        "conditions": "Mild-to-moderate Alzheimer's disease; oral BACE1 inhibition with verubecestat; phase 3 EPOCH trial.",
        "entities": "verubecestat:compound,BACE1:protein,amyloid-beta:protein,Alzheimer's disease:disease",
        "confidence": "0.74",
        "human": True,
        "trial": True,
    },
    {
        "key": "bace_verubecestat_apecs",
        "assertion": "Verubecestat failed in prodromal Alzheimer's disease and raised concern that BACE1 inhibition can worsen cognition.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "Egan et al. 2019 NEJM, APECS",
        "doi": "10.1056/NEJMoa1812840",
        "year": "2019",
        "journal": "New England Journal of Medicine",
        "conditions": "Prodromal Alzheimer's disease; BACE1 inhibition with verubecestat; phase 3 APECS trial.",
        "entities": "verubecestat:compound,BACE1:protein,cognition:other,Alzheimer's disease:disease",
        "confidence": "0.74",
        "human": True,
        "trial": True,
    },
    {
        "key": "lanabecestat_failure",
        "assertion": "Lanabecestat phase 3 trials were stopped for futility, adding class-level pressure against broad BACE1 inhibition as an Alzheimer's disease treatment strategy.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "AMARANTH and DAYBREAK-ALZ trial records and publications",
        "url": "https://clinicaltrials.gov/search?term=lanabecestat%20AMARANTH%20DAYBREAK",
        "year": "2019",
        "conditions": "Early and mild Alzheimer's disease BACE inhibitor trials; futility stopping.",
        "entities": "lanabecestat:compound,BACE1:protein,Alzheimer's disease:disease",
        "confidence": "0.76",
        "human": True,
        "trial": True,
    },
    {
        "key": "atabecestat_liver",
        "assertion": "Atabecestat development was halted after liver toxicity signals, separating BACE1 target engagement from acceptable translational safety.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "Atabecestat clinical development records",
        "url": "https://clinicaltrials.gov/search?term=atabecestat",
        "year": "2018",
        "conditions": "Preclinical or early Alzheimer's disease risk populations treated with a BACE inhibitor; liver safety monitoring.",
        "entities": "atabecestat:compound,BACE1:protein,liver toxicity:other,Alzheimer's disease:disease",
        "confidence": "0.70",
        "human": True,
        "trial": True,
    },
    {
        "key": "solanezumab_a4_failure",
        "assertion": "Solanezumab did not slow cognitive decline in the A4 preclinical Alzheimer's disease trial, weakening soluble amyloid-beta targeting as prevention monotherapy.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "A4 solanezumab trial",
        "url": "https://clinicaltrials.gov/study/NCT02008357",
        "year": "2023",
        "conditions": "Preclinical Alzheimer's disease with elevated amyloid; solanezumab prevention trial.",
        "entities": "solanezumab:compound,amyloid-beta:protein,preclinical Alzheimer's disease:disease",
        "confidence": "0.72",
        "human": True,
        "trial": True,
    },
    {
        "key": "gantenerumab_graduate_failure",
        "assertion": "Gantenerumab GRADUATE I and II failed to meet primary clinical endpoints despite amyloid-lowering rationale, reinforcing that target engagement alone does not guarantee clinical translation.",
        "type": "negative",
        "evidence_type": "experimental",
        "source_type": "clinical_trial",
        "source": "GRADUATE I/II gantenerumab trial records",
        "url": "https://clinicaltrials.gov/search?term=gantenerumab%20GRADUATE",
        "year": "2022",
        "conditions": "Early Alzheimer's disease phase 3 trials of gantenerumab.",
        "entities": "gantenerumab:compound,amyloid-beta:protein,Alzheimer's disease:disease",
        "confidence": "0.74",
        "human": True,
        "trial": True,
    },
    {
        "key": "apoe4_aria_risk",
        "assertion": "APOE4 carrier status increases the importance of ARIA risk stratification for anti-amyloid antibody treatment.",
        "type": "therapeutic",
        "evidence_type": "observational",
        "source_type": "published_paper",
        "source": "Anti-amyloid antibody labels and trial safety analyses",
        "url": "https://www.accessdata.fda.gov/scripts/cder/daf/",
        "year": "2024",
        "conditions": "Anti-amyloid monoclonal antibody treatment with MRI monitoring; APOE genotype as a risk stratification variable.",
        "entities": "APOE4:gene,ARIA:other,lecanemab:compound,donanemab:compound,MRI:assay",
        "confidence": "0.78",
        "human": True,
    },
    {
        "key": "anticoagulant_risk",
        "assertion": "Anticoagulant use and hemorrhage risk modify the eligible population for anti-amyloid antibody treatment.",
        "type": "therapeutic",
        "evidence_type": "observational",
        "source_type": "database_record",
        "source": "FDA anti-amyloid antibody prescribing information",
        "url": "https://www.accessdata.fda.gov/scripts/cder/daf/",
        "year": "2024",
        "conditions": "Treatment selection for anti-amyloid monoclonal antibodies; ARIA-H and intracerebral hemorrhage risk context.",
        "entities": "anticoagulants:compound,ARIA-H:other,lecanemab:compound,donanemab:compound",
        "confidence": "0.70",
        "human": True,
    },
    {
        "key": "biomarker_cognition_gap",
        "assertion": "Anti-amyloid programs can produce large amyloid biomarker changes while producing modest, absent, or discordant cognitive effects.",
        "type": "review",
        "evidence_type": "systematic_review",
        "source_type": "published_paper",
        "source": "Anti-amyloid antibody trial synthesis",
        "url": "https://www.cochranelibrary.com/",
        "year": "2024",
        "conditions": "Across anti-amyloid antibody and BACE inhibitor programs; biomarker endpoints compared with cognitive and functional endpoints.",
        "entities": "amyloid PET:assay,CDR-SB:assay,ADAS-Cog:assay,amyloid-beta:protein",
        "confidence": "0.74",
        "human": True,
    },
    {
        "key": "mcia_threshold_gap",
        "assertion": "The minimum clinically important difference for CDR-SB in early Alzheimer's disease remains a decision-critical threshold for interpreting anti-amyloid trial effects.",
        "type": "methodological",
        "evidence_type": "observational",
        "source_type": "published_paper",
        "source": "CDR-SB meaningful change literature",
        "url": "https://pubmed.ncbi.nlm.nih.gov/?term=CDR-SB+minimal+clinically+important+difference+Alzheimer",
        "year": "2023",
        "conditions": "Clinical interpretation of early Alzheimer's disease trial endpoints.",
        "entities": "CDR-SB:assay,minimum clinically important difference:quantity,Alzheimer's disease:disease",
        "confidence": "0.61",
        "human": True,
    },
    {
        "key": "tfr_bace_delivery",
        "assertion": "Transferrin-receptor shuttle designs can increase brain exposure of anti-BACE or anti-amyloid antibodies in preclinical systems, but clinical translation remains unresolved.",
        "type": "therapeutic",
        "evidence_type": "experimental",
        "source_type": "published_paper",
        "source": "TfR/BACE1 brain shuttle preclinical literature",
        "doi": "10.1126/scitranslmed.3009835",
        "year": "2014",
        "journal": "Science Translational Medicine",
        "conditions": "Human TfR knock-in mouse and non-human primate antibody-delivery systems; preclinical exposure and amyloid-beta lowering endpoints.",
        "entities": "transferrin receptor:protein,BACE1:protein,amyloid-beta:protein,blood-brain barrier:anatomical_structure",
        "confidence": "0.57",
        "in_vivo": True,
    },
    {
        "key": "focused_ultrasound_aducanumab",
        "assertion": "Focused ultrasound blood-brain barrier opening may increase regional amyloid removal during aducanumab treatment, but current human evidence is very small and not sufficient for clinical translation.",
        "type": "therapeutic",
        "evidence_type": "case_report",
        "source_type": "published_paper",
        "source": "Focused ultrasound with aducanumab human pilot",
        "doi": "10.1056/NEJMoa2308719",
        "year": "2024",
        "journal": "New England Journal of Medicine",
        "conditions": "Three participants receiving aducanumab with focused ultrasound BBB opening in selected regions; regional amyloid PET comparison.",
        "entities": "focused ultrasound:instrument,aducanumab:compound,blood-brain barrier:anatomical_structure,amyloid PET:assay",
        "confidence": "0.50",
        "human": True,
        "trial": False,
    },
]


GAP_FINDINGS = [
    "Which subgroup has a benefit-risk profile large enough to justify anti-amyloid antibody treatment once ARIA, monitoring burden, and absolute cognitive effect are modeled together?",
    "What trial design can distinguish amyloid lowering that is causally upstream of cognitive benefit from amyloid lowering that is an insufficient biomarker response?",
    "How should anti-amyloid trial results be interpreted when amyloid PET changes substantially but CDR-SB or iADRS changes remain near clinical meaningfulness thresholds?",
    "Which non-APP BACE1 substrates explain cognitive worsening or toxicity in failed BACE inhibitor trials?",
    "Can APOE4-specific risk models prospectively identify patients whose ARIA risk outweighs expected anti-amyloid benefit?",
    "Does earlier intervention before tau spread materially change the clinical effect size of anti-amyloid therapy?",
    "Can delivery-enhanced antibodies improve central target engagement without increasing vascular amyloid-related safety risks?",
    "What public dataset can link individual amyloid clearance, tau progression, ARIA events, and cognitive trajectories across antibody programs?",
]


ARTIFACT_MANIFESTS = [
    {
        "kind": "registry_record",
        "name": "LEQEMBI prescribing information and FDA materials",
        "url": "https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=overview.process&ApplNo=761269",
        "source_title": "FDA Drugs@FDA: Leqembi",
        "target_key": "lecanemab_clarity_primary",
        "metadata": {"drug": "lecanemab", "agency": "FDA"},
    },
    {
        "kind": "registry_record",
        "name": "KISUNLA prescribing information and FDA materials",
        "url": "https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=overview.process&ApplNo=761248",
        "source_title": "FDA Drugs@FDA: Kisunla",
        "target_key": "donanemab_trailblazer_primary",
        "metadata": {"drug": "donanemab", "agency": "FDA"},
    },
    {
        "kind": "registry_record",
        "name": "Aduhelm FDA information page",
        "url": "https://www.fda.gov/drugs/postmarket-drug-safety-information-patients-and-providers/aduhelm-aducanumab-avwa-information",
        "source_title": "FDA Aduhelm information",
        "target_key": "aducanumab_withdrawal",
        "metadata": {"drug": "aducanumab", "agency": "FDA"},
    },
    {
        "kind": "registry_record",
        "name": "CMS monoclonal antibodies directed against amyloid coverage decision",
        "url": "https://www.cms.gov/medicare-coverage-database/view/ncd.aspx?ncdid=375",
        "source_title": "CMS NCD 200.3",
        "target_key": "aducanumab_withdrawal",
        "metadata": {"agency": "CMS", "policy": "NCD 200.3"},
    },
    {
        "kind": "dataset",
        "name": "ADNI public data access record",
        "url": "https://adni.loni.usc.edu/data-samples/access-data/",
        "source_title": "Alzheimer's Disease Neuroimaging Initiative data access",
        "target_key": "biomarker_cognition_gap",
        "metadata": {"dataset": "ADNI", "access": "registration_required"},
    },
    {
        "kind": "dataset",
        "name": "A4 Study data and samples access record",
        "url": "https://www.a4studydata.org/",
        "source_title": "A4 Study data access",
        "target_key": "solanezumab_a4_failure",
        "metadata": {"dataset": "A4", "access": "application_required"},
    },
    {
        "kind": "dataset",
        "name": "DIAN observational and trial data access record",
        "url": "https://dian.wustl.edu/our-research/for-investigators/",
        "source_title": "Dominantly Inherited Alzheimer Network investigator resources",
        "target_key": "solanezumab_a4_failure",
        "metadata": {"dataset": "DIAN", "access": "application_required"},
    },
    {
        "kind": "registry_record",
        "name": "ALZ-NET anti-amyloid treatment registry",
        "url": "https://www.alz-net.org/",
        "source_title": "ALZ-NET registry",
        "target_key": "apoe4_aria_risk",
        "metadata": {"registry": "ALZ-NET", "use": "postmarket_evidence"},
    },
]


TRIALS = [
    ("NCT03887455", "lecanemab_clarity_primary"),
    ("NCT04437511", "donanemab_trailblazer_primary"),
    ("NCT05026866", "donanemab_trailblazer_primary"),
    ("NCT02484547", "aducanumab_emerge_engage_discordance"),
    ("NCT02477800", "aducanumab_emerge_engage_discordance"),
    ("NCT01739348", "bace_verubecestat_epoch"),
    ("NCT01953601", "bace_verubecestat_apecs"),
    ("NCT02245737", "lanabecestat_failure"),
    ("NCT02783573", "lanabecestat_failure"),
    ("NCT02569398", "atabecestat_liver"),
    ("NCT02008357", "solanezumab_a4_failure"),
    ("NCT03444870", "gantenerumab_graduate_failure"),
    ("NCT03443973", "gantenerumab_graduate_failure"),
    ("NCT04468659", "biomarker_cognition_gap"),
    ("NCT05310071", "aducanumab_withdrawal"),
    ("NCT04639050", "tfr_bace_delivery"),
    ("NCT02986932", "focused_ultrasound_aducanumab"),
    ("NCT05469009", "focused_ultrasound_aducanumab"),
]


def run(args: list[str], *, cwd: Path = ROOT, capture: bool = False) -> str:
    result = subprocess.run(
        args,
        cwd=cwd,
        text=True,
        stdout=subprocess.PIPE if capture else None,
        stderr=subprocess.PIPE,
        check=False,
    )
    if result.returncode != 0:
        raise SystemExit(
            f"command failed ({result.returncode}): {' '.join(args)}\n{result.stderr}"
        )
    return result.stdout or ""


def trim_assertion(text: str) -> str:
    text = re.sub(r"\s+", " ", text).strip()
    text = text.split(" — Introduction", 1)[0].strip()
    text = text.split(" — Background", 1)[0].strip()
    text = text.split(" — Objective", 1)[0].strip()
    text = text.split(" — Methods", 1)[0].strip()
    text = text.strip(" -")
    if len(text) > 320:
        text = text[:317].rsplit(" ", 1)[0] + "..."
    return text


def normalized_choice(value: Any, valid: set[str], default: str) -> str:
    if not isinstance(value, str):
        return default
    normalized = value.strip().lower().replace("-", "_").replace(" ", "_")
    aliases = {
        "trial": "clinical_trial",
        "clinical": "clinical_trial",
        "paper": "published_paper",
        "publication": "published_paper",
        "dataset": "database_record",
        "registry": "database_record",
        "database": "database_record",
        "review_article": "published_paper",
        "randomized_trial": "experimental",
        "rct": "experimental",
        "human_trial": "experimental",
        "measurement": "observational",
        "cohort": "observational",
        "unknown": default,
    }
    normalized = aliases.get(normalized, normalized)
    return normalized if normalized in valid else default


def score_finding(finding: dict[str, Any]) -> int:
    hay = json.dumps(finding, ensure_ascii=False).lower()
    score = 0
    for keyword, weight in KEYWORDS.items():
        if keyword in hay:
            score += weight
    if finding.get("conditions", {}).get("human_data"):
        score += 25
    if finding.get("conditions", {}).get("clinical_trial"):
        score += 45
    if finding.get("provenance", {}).get("source_type") == "published_paper":
        score += 10
    doi = finding.get("provenance", {}).get("doi")
    if doi:
        score += 8
    if finding.get("confidence", {}).get("score", 0) >= 0.55:
        score += 8
    return score


def entity_arg(finding: dict[str, Any]) -> str:
    entities = []
    for entity in finding.get("assertion", {}).get("entities") or []:
        name = entity.get("name")
        typ = entity.get("type") or "other"
        if name and typ:
            entities.append(f"{name}:{typ}")
    return ",".join(entities[:8])


def cli_finding_args(frontier: Path, item: dict[str, Any]) -> list[str]:
    args = [
        str(VELA),
        "finding",
        "add",
        str(frontier),
        "--assertion",
        item["assertion"],
        "--type",
        normalized_choice(item.get("type"), VALID_ASSERTION_TYPES, "observational"),
        "--source",
        item.get("source", "anti-amyloid curation"),
        "--source-type",
        normalized_choice(item.get("source_type"), VALID_SOURCE_TYPES, "published_paper"),
        "--author",
        ACTOR,
        "--confidence",
        str(item.get("confidence", "0.55")),
        "--evidence-type",
        normalized_choice(item.get("evidence_type"), VALID_EVIDENCE_TYPES, "observational"),
        "--conditions-text",
        item.get("conditions", "Scope requires review."),
        "--evidence-span",
        json.dumps(evidence_span(item), sort_keys=True),
        "--entities-reviewed",
        "--apply",
        "--json",
    ]
    for flag in ("doi", "pmid", "year", "journal", "url"):
        if item.get(flag):
            args.extend([f"--{flag}", str(item[flag])])
    if item.get("source_authors"):
        args.extend(["--source-authors", item["source_authors"]])
    if item.get("entities"):
        args.extend(["--entities", item["entities"]])
    if item.get("gap"):
        args.append("--gap")
    if item.get("negative_space"):
        args.append("--negative-space")
    if item.get("human"):
        args.append("--human-data")
        args.extend(["--species", "Homo sapiens"])
    elif item.get("species"):
        args.extend(["--species", item["species"]])
    elif normalized_choice(item.get("evidence_type"), VALID_EVIDENCE_TYPES, "observational") == "experimental":
        args.extend(["--species", "source-reported model organism"])
    if item.get("in_vivo") or item.get("human"):
        args.append("--in-vivo")
    if item.get("in_vitro"):
        args.append("--in-vitro")
    if item.get("trial"):
        args.append("--clinical-trial")
    return args


def evidence_span(item: dict[str, Any]) -> dict[str, Any]:
    locator = item.get("doi") or item.get("pmid") or item.get("url") or item.get("source")
    return {
        "section": "curator_source",
        "text": item["assertion"],
        "locator": str(locator),
        "source": item.get("source", "anti-amyloid curation"),
        "access_note": "locator-backed evidence span; source bytes are not copied unless stored as an artifact",
    }


def add_finding(frontier: Path, item: dict[str, Any]) -> str:
    out = run(cli_finding_args(frontier, item), capture=True)
    data = json.loads(out)
    return data["finding_id"]


def review_finding(frontier: Path, fid: str, status: str, reason: str) -> None:
    run(
        [
            str(VELA),
            "review",
            str(frontier),
            fid,
            "--status",
            status,
            "--reason",
            reason,
            "--reviewer",
            ACTOR,
            "--apply",
            "--json",
        ],
        capture=True,
    )


def build_seed_items(source: dict[str, Any], limit: int) -> list[dict[str, Any]]:
    scored: list[tuple[int, dict[str, Any]]] = []
    seen = set()
    for finding in source.get("findings", []):
        score = score_finding(finding)
        if score < 115:
            continue
        assertion = trim_assertion(finding.get("assertion", {}).get("text", ""))
        norm = re.sub(r"[^a-z0-9]+", " ", assertion.lower()).strip()
        if not assertion or norm in seen:
            continue
        seen.add(norm)
        provenance = finding.get("provenance", {})
        conditions = finding.get("conditions", {})
        evidence = finding.get("evidence", {})
        item = {
            "assertion": assertion,
            "type": finding.get("assertion", {}).get("type") or "therapeutic",
            "evidence_type": evidence.get("type") or "observational",
            "source_type": provenance.get("source_type") or "published_paper",
            "source": provenance.get("title") or finding.get("id", "broad frontier seed"),
            "confidence": min(0.78, max(0.40, finding.get("confidence", {}).get("score", 0.55))),
            "conditions": conditions.get("text") or "Scope imported from broad Alzheimer's Therapeutics frontier; curator review required.",
            "entities": entity_arg(finding),
            "doi": provenance.get("doi"),
            "pmid": provenance.get("pmid"),
            "year": provenance.get("year"),
            "journal": provenance.get("journal"),
            "human": bool(conditions.get("human_data")),
            "trial": bool(conditions.get("clinical_trial")),
            "in_vivo": bool(conditions.get("in_vivo")),
            "in_vitro": bool(conditions.get("in_vitro")),
            "seeded_from": finding.get("id"),
            "seed_score": score,
        }
        authors = provenance.get("authors") or []
        if authors:
            names = []
            for author in authors[:8]:
                if isinstance(author, dict) and author.get("name"):
                    names.append(author["name"])
                elif isinstance(author, str):
                    names.append(author)
            if names:
                item["source_authors"] = ";".join(names)
        scored.append((score, item))
    scored.sort(key=lambda pair: (pair[0], pair[1]["assertion"]), reverse=True)
    return [item for _, item in scored[:limit]]


def write_scope(frontier: Path) -> None:
    text = """# Anti-amyloid translation in Alzheimer's disease

This frontier asks why anti-amyloid interventions can move amyloid
biomarkers, sometimes modestly move cognition, and often fail or
create unacceptable risk.

## Included

- Lecanemab, donanemab, aducanumab, solanezumab, gantenerumab.
- BACE-pathway programs when they bear on amyloid translation:
  verubecestat, lanabecestat, atabecestat, semagacestat.
- Biomarker-to-outcome links: amyloid PET, tau PET/CSF, CDR-SB,
  ADAS-Cog, ADCS-MCI-ADL, iADRS, clinical meaningfulness thresholds.
- Risk stratification: APOE4, ARIA-E/H, anticoagulants, MRI screening,
  inclusion and exclusion criteria.
- Delivery constraints when they change anti-amyloid translation:
  transferrin receptor shuttles, BBB penetration, focused ultrasound.

## Excluded

- Broad Alzheimer's biology unless it changes this translation question.
- Tau-only, inflammation-only, or vascular-only programs unless they
  explain a dependency, contradiction, or endpoint interpretation for
  anti-amyloid translation.
- Private datasets, copyrighted PDFs, or restricted files. Use locators
  and access notes unless bytes can be stored lawfully.

## Demo question

What is the current state of anti-amyloid translation, and what should a
researcher trust about clinical benefit, biomarker movement, safety risk,
and failed mechanisms?
"""
    (frontier / "SCOPE.md").write_text(text)


def write_manifest(frontier: Path, manifest: dict[str, Any]) -> Path:
    packets = frontier / "packets"
    packets.mkdir(parents=True, exist_ok=True)
    path = packets / "integrator-packet.json"
    path.write_text(json.dumps(manifest, indent=2, sort_keys=True))
    lanes_path = packets / "subagent-lanes.json"
    lanes_path.write_text(json.dumps(SUBAGENT_LANES, indent=2, sort_keys=True))
    return path


def add_trial_artifacts(frontier: Path, finding_ids: dict[str, str]) -> list[dict[str, str]]:
    imported = []
    for nct, key in TRIALS:
        target = finding_ids.get(key)
        if not target:
            continue
        args = [
            str(VELA),
            "clinical-trial-import",
            str(frontier),
            nct,
            "--target",
            target,
            "--deposited-by",
            ACTOR,
            "--reason",
            f"public trial record for anti-amyloid frontier ({nct})",
            "--json",
        ]
        out = run(args, capture=True)
        data = json.loads(out)
        imported.append({"nct_id": nct, "artifact_id": data.get("artifact_id") or data["id"], "target": target})
    return imported


def add_manifest_artifacts(frontier: Path, finding_ids: dict[str, str]) -> list[dict[str, str]]:
    artifact_dir = frontier / "packets" / "artifact-manifests"
    artifact_dir.mkdir(parents=True, exist_ok=True)
    imported = []
    for spec in ARTIFACT_MANIFESTS:
        target = finding_ids.get(spec["target_key"])
        if not target:
            continue
        body = {
            "name": spec["name"],
            "url": spec["url"],
            "source_title": spec["source_title"],
            "access_terms": "public locator; stored as a Vela manifest, not copied source bytes",
            "metadata": spec["metadata"],
            "retrieved_for": "Anti-amyloid translation in Alzheimer's disease",
            "created_at": dt.datetime.now(dt.UTC).isoformat(),
        }
        slug = re.sub(r"[^a-z0-9]+", "-", spec["name"].lower()).strip("-")
        path = artifact_dir / f"{slug}.json"
        path.write_text(json.dumps(body, indent=2, sort_keys=True))
        args = [
            str(VELA),
            "artifact-add",
            str(frontier),
            "--kind",
            spec["kind"],
            "--name",
            spec["name"],
            "--file",
            str(path),
            "--url",
            spec["url"],
            "--media-type",
            "application/json",
            "--license",
            "Public locator manifest; source access terms apply",
            "--source-title",
            spec["source_title"],
            "--source-url",
            spec["url"],
            "--target",
            target,
            "--deposited-by",
            ACTOR,
            "--reason",
            "public regulatory or dataset locator manifest",
            "--json",
        ]
        for key, value in spec["metadata"].items():
            args.extend(["--metadata", f"{key}={value}"])
        out = run(args, capture=True)
        data = json.loads(out)
        imported.append({"name": spec["name"], "artifact_id": data.get("artifact_id") or data["id"], "target": target})
    return imported


def add_links_and_reviews(frontier: Path, finding_ids: dict[str, str]) -> None:
    links = [
        ("lecanemab_biomarker", "lecanemab_clarity_primary", "supports", "biomarker movement bears on lecanemab clinical interpretation"),
        ("donanemab_aria", "donanemab_trailblazer_primary", "depends", "safety risk changes the eligible benefit-risk population"),
        ("aducanumab_withdrawal", "aducanumab_emerge_engage_discordance", "extends", "post-approval history extends the contested efficacy record"),
        ("bace_verubecestat_apecs", "bace_verubecestat_epoch", "replicates", "prodromal failure reinforces EPOCH class concern"),
        ("lanabecestat_failure", "bace_verubecestat_epoch", "supports", "second BACE inhibitor failure supports class-level concern"),
        ("atabecestat_liver", "bace_verubecestat_epoch", "supports", "toxicity separates target engagement from acceptable translation"),
        ("gantenerumab_graduate_failure", "lecanemab_clarity_primary", "contradicts", "amyloid antibody programs diverge in clinical efficacy despite shared target class"),
        ("solanezumab_a4_failure", "lecanemab_clarity_primary", "contradicts", "prevention-stage soluble amyloid targeting did not reproduce clinical benefit"),
        ("aducanumab_emerge_engage_discordance", "lecanemab_clarity_primary", "contradicts", "aducanumab remains a cautionary same-target precedent for interpreting antibody benefit"),
        ("bace_verubecestat_epoch", "lecanemab_biomarker", "contradicts", "amyloid-pathway target engagement did not translate to clinical benefit in this BACE program"),
        ("bace_verubecestat_apecs", "lecanemab_biomarker", "contradicts", "prodromal BACE inhibition weakens simple amyloid-lowering translation"),
        ("lanabecestat_failure", "donanemab_trailblazer_primary", "contradicts", "BACE class futility conflicts with broad amyloid-lowering optimism"),
        ("atabecestat_liver", "donanemab_trailblazer_primary", "contradicts", "safety-limited BACE development shows target class does not determine usable therapy"),
        ("biomarker_cognition_gap", "lecanemab_biomarker", "contradicts", "biomarker movement can exceed clinical effect"),
        ("apoe4_aria_risk", "lecanemab_clarity_primary", "depends", "genotype-dependent safety context modifies treatment interpretation"),
        ("anticoagulant_risk", "apoe4_aria_risk", "extends", "hemorrhage-risk exclusions extend ARIA stratification"),
        ("focused_ultrasound_aducanumab", "aducanumab_emerge_engage_discordance", "extends", "delivery enhancement changes regional amyloid-removal interpretation"),
        ("tfr_bace_delivery", "bace_verubecestat_epoch", "extends", "delivery can alter exposure but not the clinical BACE class failure"),
        ("mcia_threshold_gap", "biomarker_cognition_gap", "depends", "clinical meaningfulness threshold controls biomarker-to-cognition interpretation"),
    ]
    for src_key, dst_key, link_type, note in links:
        src = finding_ids.get(src_key)
        dst = finding_ids.get(dst_key)
        if not src or not dst:
            continue
        run(
            [
                str(VELA),
                "link",
                "add",
                str(frontier),
                "--from",
                src,
                "--to",
                dst,
                "--type",
                link_type,
                "--note",
                note,
            ],
            capture=True,
        )

    reviews = [
        ("aducanumab_emerge_engage_discordance", "contested", "Parallel phase 3 trials were discordant; preserve plural interpretation."),
        ("aducanumab_withdrawal", "contested", "Regulatory history is factual, but its implication for the anti-amyloid class remains contested."),
        ("biomarker_cognition_gap", "needs_revision", "Needs quantitative per-trial effect table before it should be treated as settled."),
        ("mcia_threshold_gap", "needs_revision", "Threshold differs by disease stage and stakeholder; keep as review lead."),
        ("focused_ultrasound_aducanumab", "needs_revision", "Very small human sample; delivery signal should not be generalized."),
        ("gantenerumab_graduate_failure", "contested", "Reviewed as a class-tension candidate against positive antibody programs."),
        ("solanezumab_a4_failure", "contested", "Reviewed as a stage and target-form tension against positive antibody programs."),
        ("bace_verubecestat_epoch", "contested", "Reviewed as amyloid-pathway translation tension, not as a direct antibody contradiction."),
        ("bace_verubecestat_apecs", "contested", "Reviewed as amyloid-pathway translation tension, not as a direct antibody contradiction."),
        ("lanabecestat_failure", "contested", "Reviewed as BACE class tension against broad amyloid-lowering optimism."),
        ("atabecestat_liver", "contested", "Reviewed as safety-driven translation tension."),
    ]
    for key, status, reason in reviews:
        fid = finding_ids.get(key)
        if not fid:
            continue
        review_finding(frontier, fid, status, reason)

    revisions = [
        ("focused_ultrasound_aducanumab", "0.42", "Small pilot evidence lowers translation confidence until replicated."),
        ("tfr_bace_delivery", "0.49", "Preclinical delivery improvement does not rescue failed clinical BACE programs."),
        ("aducanumab_emerge_engage_discordance", "0.46", "Discordant phase 3 record lowers confidence in aducanumab as a stable translation anchor."),
        ("mcia_threshold_gap", "0.58", "Decision-critical threshold deserves high review priority but is not itself a therapeutic effect."),
        ("biomarker_cognition_gap", "0.69", "Cross-program pattern is strong, but quantitative synthesis remains incomplete."),
    ]
    for key, confidence, reason in revisions:
        fid = finding_ids.get(key)
        if not fid:
            continue
        run(
            [
                str(VELA),
                "revise",
                str(frontier),
                fid,
                "--confidence",
                confidence,
                "--reason",
                reason,
                "--reviewer",
                ACTOR,
                "--apply",
                "--json",
            ],
            capture=True,
        )

    caveats = [
        ("lecanemab_clarity_primary", "Clinical effect is modest and should be interpreted with ARIA risk, monitoring load, and eligibility constraints."),
        ("donanemab_trailblazer_primary", "Tau-selection criteria affect generalization to broader clinical populations."),
        ("bace_verubecestat_epoch", "BACE failures should not be treated as proof that every amyloid-lowering approach fails."),
        ("apoe4_aria_risk", "Risk estimates should be read from current labels and trial safety tables before patient-level decisions."),
        ("biomarker_cognition_gap", "This is a frontier-level pattern, not a per-patient surrogate validation statement."),
    ]
    for key, text in caveats:
        fid = finding_ids.get(key)
        if not fid:
            continue
        run(
            [
                str(VELA),
                "caveat",
                str(frontier),
                fid,
                "--text",
                text,
                "--author",
                ACTOR,
                "--apply",
                "--json",
            ],
            capture=True,
        )


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--source", default="/tmp/vela-anti-amyloid/source.json")
    parser.add_argument("--frontier", default="projects/anti-amyloid-translation")
    parser.add_argument("--seed-limit", type=int, default=112)
    parser.add_argument("--reset", action="store_true")
    args = parser.parse_args()

    source_path = Path(args.source)
    frontier = ROOT / args.frontier
    if args.reset and frontier.exists():
        shutil.rmtree(frontier)
    if not frontier.exists():
        run([str(VELA), "init", str(frontier), "--name", "Anti-amyloid translation in Alzheimer's disease"])
        run(
            [
                str(VELA),
                "actor",
                "add",
                str(frontier),
                ACTOR,
                "--pubkey",
                PUBKEY.read_text().strip(),
                "--json",
            ],
            capture=True,
        )

    write_scope(frontier)
    source = json.loads(source_path.read_text())
    seed_items = build_seed_items(source, args.seed_limit)

    finding_ids: dict[str, str] = {}
    deposited: list[dict[str, Any]] = []
    for item in MANUAL_FINDINGS:
        fid = add_finding(frontier, item)
        finding_ids[item["key"]] = fid
        deposited.append({"kind": "manual", "id": fid, **item})

    for i, gap in enumerate(GAP_FINDINGS, start=1):
        item = {
            "key": f"gap_{i}",
            "assertion": gap,
            "type": "theoretical",
            "evidence_type": "theoretical",
            "source_type": "expert_assertion",
            "source": "anti-amyloid frontier gap review",
            "confidence": "0.44",
            "conditions": "Candidate gap for maintainer review; not a guaranteed experiment target.",
            "entities": "Alzheimer's disease:disease,amyloid-beta:protein,trial design:other",
            "gap": True,
        }
        fid = add_finding(frontier, item)
        finding_ids[item["key"]] = fid
        deposited.append({"kind": "gap", "id": fid, **item})

    for item in seed_items:
        fid = add_finding(frontier, item)
        if normalized_choice(item.get("source_type"), VALID_SOURCE_TYPES, "published_paper") in {
            "expert_assertion",
            "model_output",
        }:
            review_finding(
                frontier,
                fid,
                "accepted",
                "Accepted as a broad-frontier seed inside the anti-amyloid scope; source locator remains the authority.",
            )
        deposited.append({"kind": "seed", "id": fid, **item})

    add_links_and_reviews(frontier, finding_ids)
    trial_artifacts = add_trial_artifacts(frontier, finding_ids)
    manifest_artifacts = add_manifest_artifacts(frontier, finding_ids)
    run([str(VELA), "sign", "apply", str(frontier), "--private-key", str(PRIVATE_KEY), "--json"], capture=True)

    manifest = {
        "frontier": "Anti-amyloid translation in Alzheimer's disease",
        "generated_at": dt.datetime.now(dt.UTC).isoformat(),
        "source_frontier": BROAD_VFR,
        "source_hub": BROAD_HUB,
        "counts": {
            "manual_findings": len(MANUAL_FINDINGS),
            "gap_findings": len(GAP_FINDINGS),
            "seeded_findings": len(seed_items),
            "trial_artifacts": len(trial_artifacts),
            "manifest_artifacts": len(manifest_artifacts),
        },
        "keyword_weights_sha256": hashlib.sha256(
            json.dumps(KEYWORDS, sort_keys=True).encode()
        ).hexdigest(),
        "deposited_findings": deposited,
        "trial_artifacts": trial_artifacts,
        "manifest_artifacts": manifest_artifacts,
        "subagent_lanes": SUBAGENT_LANES,
        "do_not_deposit": [
            "Private patient-level datasets without explicit redistribution permission.",
            "Publisher PDFs or supplements whose bytes are not clearly redistributable.",
            "Broad Alzheimer's mechanisms unrelated to anti-amyloid translation.",
            "Unverified claims from press releases unless backed by trial registry or regulator records.",
        ],
    }
    write_manifest(frontier, manifest)

    check = json.loads(run([str(VELA), "check", str(frontier), "--strict", "--json"], capture=True))
    if not check.get("ok"):
        raise SystemExit(json.dumps(check, indent=2))
    stats = json.loads(run([str(VELA), "stats", str(frontier), "--json"], capture=True))
    artifacts = json.loads(run([str(VELA), "artifacts", str(frontier), "--json"], capture=True))
    if stats["stats"]["findings"] < 120:
        raise SystemExit(f"expected >=120 findings, saw {stats['stats']['findings']}")
    if artifacts["count"] < 12:
        raise SystemExit(f"expected >=12 artifacts, saw {artifacts['count']}")
    print(json.dumps({"ok": True, "stats": stats["stats"], "artifacts": artifacts["count"]}, indent=2))


if __name__ == "__main__":
    main()
