#!/usr/bin/env python3
"""RANKIGI chain verifier. Stdlib only.

Usage:
    python3 verify.py chain.json
    python3 verify.py chain.json --rekor

The --rekor flag, when supplied alongside a closure bundle that carries a
sigstore_log_index, fetches the matching entry from rekor.sigstore.dev and
confirms that the hash anchored in the public transparency log matches the
chain head this bundle reports. If the network is unavailable, verification
falls through with a SKIPPED notice and exit code 0; local verification
proceeds independently.

Multi-version verifier:

  hash_version = 1 (legacy)
      Hash input is pipe-concatenated:
          prev_hash | chain_ts | org_id | agent_id | payload_canonical
                    [| intent_hash] [| decision_token_hash]
      chain_ts is COALESCE(server_received_at, occurred_at) cast to Postgres
      timestamptz::text. Chains are per-agent: prev_hash continuity and
      chain_index monotonicity are checked per agent_id.

  hash_version = 2
      Hash input is canonical JSON of:
          {
            "action":      <string>,
            "agent_id":    <uuid>,
            "chain_id":    <uuid or null>,
            "occurred_at": <ISO 8601 UTC, ms precision>,
            "org_id":      <uuid>,
            "payload":     <jsonb>,
            "prev_hash":   <hex>,
            "severity":    <string or null>,
            "tool":        <string or null>
          }
      Keys sorted lexicographically, no whitespace. prev_hash is scoped to
      chain_id when present, else to the agent global chain. Continuity and
      monotonicity are checked per (agent_id, chain_id) when chain_id is
      present, else per agent_id.

  hash_version = 3 (current)
      v2 input plus canon_id and canon_version, slotted alphabetically
      between agent_id and chain_id:
          {
            "action":        ...,
            "agent_id":      ...,
            "canon_id":      <string or null>,
            "canon_version": <string or null>,
            "chain_id":      ...,
            ...
          }
      Same canonicalization rules as v2. Existing v2 rows continue to
      verify under their own branch.

Per-event `hash_version` defaults to 1 when absent so legacy exports keep
verifying unchanged.
"""
import base64
import hashlib
import json
import sys
import urllib.error
import urllib.request
from datetime import datetime, timezone

try:
    from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey
    from cryptography.hazmat.primitives.serialization import load_pem_public_key
    from cryptography.exceptions import InvalidSignature
    CRYPTO_AVAILABLE = True
except ImportError:
    CRYPTO_AVAILABLE = False


# ── v1 helpers ────────────────────────────────────────────────────────────

def pg_timestamptz_text(iso):
    """Convert an ISO 8601 timestamp string to Postgres timestamptz::text."""
    if not iso:
        return ""
    s = str(iso).strip()
    if s.endswith("Z"):
        s = s[:-1] + "+00:00"
    dt = datetime.fromisoformat(s).astimezone(timezone.utc)
    base = dt.strftime("%Y-%m-%d %H:%M:%S")
    micro = dt.microsecond
    if micro == 0:
        return base + "+00"
    digits = ("%06d" % micro).rstrip("0")
    return base + "." + digits + "+00" if digits else base + "+00"


def event_hash_v1(e):
    chain_ts_iso = e.get("server_received_at") or e.get("occurred_at") or ""
    chain_ts_text = pg_timestamptz_text(chain_ts_iso)
    parts = [
        e.get("prev_hash", ""),
        chain_ts_text,
        str(e.get("org_id", "")),
        str(e.get("agent_id", "")),
        e.get("payload_canonical", ""),
    ]
    if e.get("intent_hash"):
        parts.append(e["intent_hash"])
    dt = e.get("decision_token")
    if dt:
        parts.append(hashlib.sha256(dt.encode()).hexdigest())
    elif e.get("decision_token_hash"):
        parts.append(e["decision_token_hash"])
    return hashlib.sha256("|".join(parts).encode()).hexdigest()


# ── v2 helpers ────────────────────────────────────────────────────────────

def normalize_occurred_at(s):
    """Reproduce the v2 SQL to_char shape: YYYY-MM-DDTHH:MM:SS.mmmZ (UTC, ms).

    Matches JS Date.prototype.toISOString and the SQL RPC's to_char format,
    so the canonical bytes are identical regardless of input timezone or
    fractional precision. Python 3.10's datetime.fromisoformat rejects
    fractional seconds that are not 3 or 6 digits, so we pad first.
    """
    if not s:
        raise ValueError("normalize_occurred_at: empty timestamp")
    text = s.replace("Z", "+00:00")
    # Pad the fractional-seconds field to 6 digits so fromisoformat accepts
    # it on Python 3.10. The fractional region is whatever sits between the
    # last "." and the next "+" or "-" (timezone), if any.
    if "." in text:
        dot = text.rfind(".")
        tz_idx = max(text.rfind("+"), text.rfind("-"))
        if tz_idx > dot:
            frac = text[dot + 1: tz_idx]
            rest = text[tz_idx:]
        else:
            frac = text[dot + 1:]
            rest = ""
        # Keep only digits; pad to 6 if shorter, truncate if longer.
        frac_digits = "".join(ch for ch in frac if ch.isdigit())
        if len(frac_digits) > 6:
            frac_digits = frac_digits[:6]
        else:
            frac_digits = frac_digits.ljust(6, "0")
        text = text[:dot + 1] + frac_digits + rest
    dt = datetime.fromisoformat(text)
    dt = dt.astimezone(timezone.utc)
    ms = dt.microsecond // 1000
    return dt.strftime("%Y-%m-%dT%H:%M:%S.") + ("%03d" % ms) + "Z"


def _canonical(value):
    """Canonical JSON, matching src/lib/crypto/canonical-json.ts rules.

    Object keys sorted lex; no whitespace; RFC 8259 string escaping; -0 ->
    0; integers without decimal point; no scientific notation; null for
    missing-undefined positions (the producer should not emit undefined).
    """
    if value is None:
        return "null"
    if value is True:
        return "true"
    if value is False:
        return "false"
    if isinstance(value, str):
        return _escape_string(value)
    if isinstance(value, int) and not isinstance(value, bool):
        return str(value)
    if isinstance(value, float):
        if value != value or value in (float("inf"), float("-inf")):
            raise ValueError("non-finite number not canonicalizable")
        if value == 0:
            return "0"
        if value.is_integer():
            return str(int(value))
        s = repr(value)
        if "e" in s or "E" in s:
            s = ("%.20f" % value).rstrip("0").rstrip(".")
        return s
    if isinstance(value, list):
        return "[" + ",".join(_canonical(v) for v in value) + "]"
    if isinstance(value, dict):
        keys = sorted(k for k, v in value.items() if v is not None or _explicit_null(value, k))
        # Drop undefined-equivalent (Python has no undefined, so include all)
        keys = sorted(value.keys())
        parts = []
        for k in keys:
            parts.append(_escape_string(k) + ":" + _canonical(value[k]))
        return "{" + ",".join(parts) + "}"
    raise TypeError("cannot canonicalize type %s" % type(value).__name__)


def _explicit_null(_d, _k):
    return True


def _escape_string(s):
    out = ['"']
    for ch in s:
        cp = ord(ch)
        if ch == '"':
            out.append("\\\"")
        elif ch == "\\":
            out.append("\\\\")
        elif cp == 0x08:
            out.append("\\b")
        elif cp == 0x0c:
            out.append("\\f")
        elif cp == 0x0a:
            out.append("\\n")
        elif cp == 0x0d:
            out.append("\\r")
        elif cp == 0x09:
            out.append("\\t")
        elif cp < 0x20:
            out.append("\\u%04x" % cp)
        else:
            out.append(ch)
    out.append('"')
    return "".join(out)


def event_hash_v2(e):
    obj = {
        "action":      e.get("action"),
        "agent_id":    e.get("agent_id"),
        "chain_id":    e.get("chain_id"),
        "occurred_at": normalize_occurred_at(e.get("occurred_at")),
        "org_id":      e.get("org_id"),
        "payload":     e.get("payload"),
        "prev_hash":   e.get("prev_hash"),
        "severity":    e.get("severity"),
        "tool":        e.get("tool"),
    }
    return hashlib.sha256(_canonical(obj).encode("utf-8")).hexdigest()


# - v3 helpers ------------------------------------------------------------

def event_hash_v3(e):
    """v3 canonical input adds canon_id and canon_version in alphabetical
    key order (between agent_id and chain_id).
    """
    obj = {
        "action":        e.get("action"),
        "agent_id":      e.get("agent_id"),
        "canon_id":      e.get("canon_id"),
        "canon_version": e.get("canon_version"),
        "chain_id":      e.get("chain_id"),
        "occurred_at":   normalize_occurred_at(e.get("occurred_at")),
        "org_id":        e.get("org_id"),
        "payload":       e.get("payload"),
        "prev_hash":     e.get("prev_hash"),
        "severity":      e.get("severity"),
        "tool":          e.get("tool"),
    }
    return hashlib.sha256(_canonical(obj).encode("utf-8")).hexdigest()


# - v4 helpers ------------------------------------------------------------

def event_hash_v4(e):
    """v4 canonical input adds passport_fingerprint and signature_hash in
    alphabetical key order. passport_fingerprint slots between org_id and
    payload; signature_hash slots between severity and tool. Both are emitted
    as JSON null when missing so the canonical shape is stable across signed
    and unsigned events.
    """
    obj = {
        "action":               e.get("action"),
        "agent_id":             e.get("agent_id"),
        "canon_id":             e.get("canon_id"),
        "canon_version":        e.get("canon_version"),
        "chain_id":             e.get("chain_id"),
        "occurred_at":          normalize_occurred_at(e.get("occurred_at")),
        "org_id":               e.get("org_id"),
        "passport_fingerprint": e.get("passport_fingerprint"),
        "payload":              e.get("payload"),
        "prev_hash":            e.get("prev_hash"),
        "severity":             e.get("severity"),
        "signature_hash":       e.get("signature_hash"),
        "tool":                 e.get("tool"),
    }
    return hashlib.sha256(_canonical(obj).encode("utf-8")).hexdigest()


def event_hash(e):
    """Dispatch to the version-specific hash recomputation.

    hash_version is read per-event. v4 events use v4 recomputation; v2/v3
    use their respective recomputations; v1 explicit values use the legacy
    pipe-concatenated form. When hash_version is missing or null on an
    event, we default to v3 (the current canonical-JSON form). Legacy v1
    exports must set hash_version explicitly to 1.
    """
    raw_version = e.get("hash_version")
    version = int(raw_version) if raw_version is not None else 3
    if version == 4:
        return event_hash_v4(e)
    if version == 3:
        return event_hash_v3(e)
    if version == 2:
        return event_hash_v2(e)
    return event_hash_v1(e)


# === AGENT-A-BLOCK: TSA VERIFICATION ===
# RFC 3161 TSA per-batch verification. Surfaces tsa_tokens[] from the
# closure bundle. Offline verifier prints the token metadata; full
# cryptographic validation against FreeTSA CA is performed server-side.
def verify_tsa_tokens(doc):
    if not isinstance(doc, dict):
        print("RFC 3161 TSA: NOT YET ANCHORED")
        return
    tokens = doc.get("tsa_tokens")
    if not isinstance(tokens, list) or len(tokens) == 0:
        print("RFC 3161 TSA: NOT YET ANCHORED")
        return
    for tok in tokens:
        if not isinstance(tok, dict):
            continue
        print("TSA token: %s" % tok.get("tsa_url", "unknown"))
        print("  Message imprint: %s" % tok.get("message_imprint", tok.get("hash", "unknown")))
        print("  Verified at: %s" % tok.get("verified_at", "unknown"))
        if tok.get("tsa_token_present"):
            print("  Token: PRESENT")
        else:
            print("  Token: PENDING")
# === END AGENT-A-BLOCK ===


# === AGENT-C-BLOCK: REKOR VERIFICATION ===
# Day 1 Agent C will harden this section. Already fixed in Day 0 prep to
# compare against anchor_payload_hash instead of last_event_hash.
# - Rekor binding (optional, --rekor flag) ---------------------------------

REKOR_BASE = "https://rekor.sigstore.dev/api/v1/log/entries"


def fetch_rekor_hash(log_index):
    """Fetch the hash anchored at a specific Rekor log index. Returns
    (rekor_hash_hex, None) on success, (None, reason) on any failure. Never
    raises; offline verification is authoritative.
    """
    try:
        url = "%s?logIndex=%d" % (REKOR_BASE, int(log_index))
        req = urllib.request.Request(
            url, headers={"Accept": "application/json"}
        )
        with urllib.request.urlopen(req, timeout=5) as resp:
            raw = resp.read()
    except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, OSError) as e:
        return None, "network unavailable (%s)" % type(e).__name__
    except Exception as e:
        return None, "fetch failed (%s)" % type(e).__name__

    try:
        entries = json.loads(raw.decode("utf-8"))
    except Exception:
        return None, "Rekor returned non-JSON"

    if not isinstance(entries, dict) or not entries:
        return None, "Rekor returned empty entry set"

    first_key = next(iter(entries))
    entry = entries[first_key]
    body_b64 = entry.get("body") if isinstance(entry, dict) else None
    if not isinstance(body_b64, str):
        return None, "Rekor entry has no body"

    try:
        decoded = json.loads(base64.b64decode(body_b64).decode("utf-8"))
    except Exception:
        return None, "Rekor body decode failed"

    value = (
        decoded.get("spec", {}).get("data", {}).get("hash", {}).get("value", "")
        if isinstance(decoded, dict) else ""
    )
    if not isinstance(value, str) or not value:
        return None, "Rekor body missing hash.value"

    return value.lower(), None


def verify_rekor_binding(log_index, expected_hash):
    """Compare expected_hash against the Rekor entry at log_index. Prints a
    structured block describing the third-party transparency-log witness and
    returns one of 'verified', 'mismatch', 'skipped'. Network failures are
    SKIPPED (offline verification remains authoritative) and never raise.
    """
    expected = (expected_hash or "").lower()
    public_url = "https://search.sigstore.dev/?logIndex=%s" % log_index

    rekor_hash, reason = fetch_rekor_hash(log_index)
    if rekor_hash is None:
        # Network/availability failure. Offline chain verification is
        # authoritative, so we report SKIPPED rather than failing.
        print("Rekor anchor: logIndex %s" % log_index)
        print("Anchor hash: sha256:%s..." % expected[:32] if expected else "Anchor hash: (none)")
        print("Verified at search.sigstore.dev: SKIPPED (%s)" % reason)
        print("Independent re-verify: %s" % public_url)
        return "skipped"

    if rekor_hash == expected:
        print("Rekor anchor: logIndex %s" % log_index)
        print("Anchor hash: sha256:%s..." % expected[:32])
        print("Verified at search.sigstore.dev: YES")
        print("Independent re-verify: %s" % public_url)
        return "verified"

    print("Rekor anchor: logIndex %s" % log_index)
    print("Anchor hash: sha256:%s..." % expected[:32] if expected else "Anchor hash: (none)")
    print("Verified at search.sigstore.dev: MISMATCH")
    print("  expected: %s..." % expected[:32])
    print("  rekor:    %s..." % rekor_hash[:32])
    print("Independent re-verify: %s" % public_url)
    sys.exit(1)


# === END AGENT-C-BLOCK ===


# === AGENT-E-BLOCK: CCAP VERIFICATION ===
# Day 1 Agent E: CCAP (Counterparty Closure Attestation Protocol) receipt
# verification. Reports receipts present in the closure bundle, the per-event
# coverage ratio, and per-receipt provider / witness class / status.
# Signature validation against /.well-known/ccap-keys.json is roadmap and
# intentionally not performed here; the verifier prints the witness_class
# label so an offline operator can see whether a receipt was counterparty
# signed, provider HMAC verified, or RANKIGI witness-of-last-resort signed.
def verify_ccap_receipts(doc):
    receipts = doc.get("counterparty_receipts") if isinstance(doc, dict) else None
    if not isinstance(receipts, list) or len(receipts) == 0:
        return
    event_hashes = doc.get("event_hashes") or []
    total_events = len(event_hashes) if isinstance(event_hashes, list) else 0
    receipted = doc.get("receipt_count")
    if not isinstance(receipted, int):
        receipted = len(receipts)
    print("Counterparty receipts: %d" % len(receipts))
    if total_events > 0:
        print("Receipt coverage: %d of %d events" % (receipted, total_events))
    else:
        print("Receipt coverage: %d events" % receipted)
    for r in receipts:
        if not isinstance(r, dict):
            continue
        print("  Provider: %s" % r.get("provider", "unknown"))
        print("  Witness class: %s" % r.get("witness_class", "unattested"))
        print("  Status: %s" % r.get("status", "unknown"))
# === END AGENT-E-BLOCK ===


# === AGENT-D-BLOCK: DOSSIER OUTPUT ===
# Action Dossier availability output. Renders the FRE 902(14) certification
# block summary and download pointer in a court-receivable text format.
def report_action_dossier(doc):
    if not isinstance(doc, dict):
        return
    version = doc.get("dossier_version")
    if not version:
        return
    chain_id = doc.get("chain_id") or doc.get("chainId") or "{chainId}"
    print("Action Dossier: AVAILABLE")
    print("  Version: %s" % version)
    print("  FRE applicable: %s" % doc.get("fre_applicable", False))
    print("  UAR completeness: %s%%" % doc.get("uar_completeness", 0))
    print("  Download: POST /api/chains/%s/dossier" % chain_id)
# === END AGENT-D-BLOCK ===


# === AGENT-B-BLOCK: AUTHORIZATIONS ===
def report_authorizations(doc):
    if not isinstance(doc, dict):
        print("Authorization records: NONE")
        return
    auths = doc.get("authorizations")
    if not isinstance(auths, list) or len(auths) == 0:
        print("Authorization records: NONE")
        return
    print("Authorization records: %d" % len(auths))
    for a in auths:
        if not isinstance(a, dict):
            continue
        print("  Type: %s" % a.get("authority_type", "unknown"))
        print("  Granted by: %s" % a.get("granted_by", "unknown"))
        print("  Scope hash: %s" % a.get("scope_hash", "unknown"))
        print("  Hash: %s" % a.get("authorization_hash", "unknown"))
    print("Authorization hashes: PRESENT")
# === END AGENT-B-BLOCK ===


# === AGENT-F-BLOCK: CITATIONS ===
def report_citations(doc):
    if not isinstance(doc, dict):
        print("Citation provenance: NONE")
        return
    citations = doc.get("citations")
    if citations is None:
        print("Citation provenance: NONE")
        return
    lock_status = doc.get("citation_lock_status") or doc.get("lock_status") or "unknown"
    if isinstance(citations, list):
        items = citations
    elif isinstance(citations, dict):
        items = citations.get("items") or []
        lock_status = citations.get("lock_status", lock_status)
    else:
        items = []
    print("Citation lock: %s" % lock_status)
    print("Citations: %d" % len(items))
    for c in items:
        if not isinstance(c, dict):
            continue
        print("  Source: %s" % c.get("source_name", "unknown"))
        print("  Type: %s" % c.get("citation_type", "unknown"))
        print("  Used in output: %s" % c.get("used_in_output", False))
        print("  Hash: %s" % c.get("citation_hash", "unknown"))
# === END AGENT-F-BLOCK ===


# === AGENT-CROSSING-BLOCK ===
def verify_crossings(doc):
    """Report world receipts (universal crossing layer) bound to this chain.

    A crossing is a world receipt (Stripe webhook, GitHub event, DKIM-signed
    email, Rekor entry, CloudTrail digest, etc.) bound to a chain event. This
    is world-down-to-agent provenance: proof the world acted, tied to the
    agent action that caused it. The crossings array is hash-bound by the
    envelope content_hash, so no RANKIGI access is needed to trust it.
    """
    crossings = doc.get("crossings", [])
    if not crossings:
        print("World receipts: NONE")
        return
    total = len(crossings)
    verified = sum(
        1 for c in crossings
        if c.get("witness_class") not in ("unattested", "sidecar_observed")
    )
    print("World receipts: %d" % total)
    print("Counterparty signed: %d/%d" % (verified, total))
    for c in crossings:
        provider = c.get("provider", "unknown")
        strength = c.get("claim_strength", "unknown")
        witness = c.get("witness_class", "unknown")
        bound = c.get("bound_event_id")
        print("  %s: %s (%s)" % (provider, strength, witness))
        if bound:
            print("    Bound to event: %s..." % str(bound)[:8])
        else:
            print("    WARNING: unbound world receipt")
# === END AGENT-CROSSING-BLOCK ===


# === AGENT-G-BLOCK: XACT ===
def verify_xact_commitments(doc):
    xact = doc.get("xact_commitments") or {}
    if isinstance(xact, list):
        commitments = xact
    else:
        commitments = xact.get("xact_commitments", []) if isinstance(xact, dict) else []
    if not commitments:
        print("XACT commitments: NONE")
        return
    count = len(commitments)
    bilateral = sum(1 for c in commitments if c.get("party_b_attestation_class") == "bilateral_signed")
    print(f"XACT commitments: {count}")
    print(f"Bilateral signed: {bilateral} of {count}")
    for c in commitments:
        cid = (c.get("commitment_id") or "unknown")[:8]
        state = c.get("state", "unknown")
        attestation = c.get("party_b_attestation_class", "unknown")
        print(f"  [{cid}] {state} | {attestation}")
        if attestation == "provider_hmac_bridge":
            print("    WARNING: Counterparty did not sign.")
            print("    RANKIGI witnessed provider HMAC only.")
        note = c.get("attestation_note")
        if note:
            print(f"    Note: {note}")
        if c.get("anchored_at"):
            idx = c.get("rekor_log_index")
            print(f"    Anchored: YES (Rekor #{idx})")
        else:
            print("    Anchored: PENDING")
# === END AGENT-G-BLOCK ===


def report_v4_binding(events):
    """When any event carries v4 binding fields, print the fingerprint and
    signature_hash previews so the operator can see what was checked."""
    bound = [
        e for e in events
        if isinstance(e, dict)
        and (e.get("passport_fingerprint") or e.get("signature_hash"))
    ]
    if not bound:
        return
    sample = bound[0]
    fp = sample.get("passport_fingerprint") or ""
    sh = sample.get("signature_hash") or ""
    if fp:
        print("Passport fingerprint: %s..." % fp[:16])
    if sh:
        print("Signature hash: %s..." % sh[:16])
    print("v4-bound events: %d of %d" % (len(bound), len(events)))
    if len(bound) < len(events):
        print("Note: events recorded before v4 SDK was installed are v3-bound.")
        print("All new events use v4 passport binding.")
    closure_event = next(
        (
            e for e in events
            if isinstance(e, dict)
            and isinstance(e.get("payload"), dict)
            and e["payload"].get("closure_binding") in ("server_attested", "v2_fallback")
        ),
        None,
    )
    if closure_event is not None:
        binding = closure_event.get("payload", {}).get("closure_binding")
        if binding == "server_attested":
            print("Closure attestation: server-attested. RANKIGI sealed the closure event;")
            print("                     event hashes verified by passport fingerprint.")
        elif binding == "v2_fallback":
            print("Closure attestation: v2 fallback (no passport binding)")


def verify_passport_binding(envelope, events):
    """FIX A. Cross-check the envelope's declared passport fingerprint
    against every v4 event's passport_fingerprint. Returns one of
    'verified', 'mismatch', 'no_binding' and prints a verdict line.
    """
    envelope_fp = envelope.get("passport_key_fingerprint") if isinstance(envelope, dict) else None
    v4_events = [
        e for e in events
        if isinstance(e, dict) and e.get("passport_fingerprint")
    ]
    if not v4_events and not envelope_fp:
        return "no_binding"
    if not v4_events:
        print("Passport binding: NO V4 EVENTS")
        print("Envelope declares passport %s... but no events carry v4 binding."
              % (envelope_fp or "")[:16])
        return "mismatch"
    if not envelope_fp:
        sample = v4_events[0].get("passport_fingerprint") or ""
        print("Passport binding: ENVELOPE MISSING FINGERPRINT")
        print("Events declare passport %s... but the envelope has no key." % sample[:16])
        return "mismatch"
    distinct = sorted({(e.get("passport_fingerprint") or "") for e in v4_events})
    if len(distinct) != 1:
        print("Passport binding: MIXED FINGERPRINTS ACROSS EVENTS")
        for fp in distinct:
            print("  fingerprint: %s..." % fp[:16])
        return "mismatch"
    event_fp = distinct[0]
    if event_fp.lower() != envelope_fp.lower():
        print("Passport binding: MISMATCH")
        print("Envelope: %s..." % envelope_fp[:16])
        print("Events:   %s..." % event_fp[:16])
        return "mismatch"
    print("Passport fingerprint binding: VERIFIED")
    print("Passport: %s..." % envelope_fp[:16])
    print("Events bound to envelope passport: %d of %d" % (len(v4_events), len(events)))
    return "verified"


def fingerprint_pem(pem_text):
    """SHA-256 hex fingerprint of PEM public key bytes.

    Matches the server side fingerprint computed over the PEM string
    (passport_key_fingerprint in the envelope).
    """
    if isinstance(pem_text, bytes):
        data = pem_text
    else:
        data = pem_text.encode("utf-8")
    return hashlib.sha256(data).hexdigest()


def load_pinned_pubkey(path):
    """Load a pinned PEM public key from disk. Returns (pem_text, public_key)
    or raises on any error; the caller is expected to handle failure.
    """
    with open(path, "rb") as f:
        pem_bytes = f.read()
    if not CRYPTO_AVAILABLE:
        return (pem_bytes.decode("utf-8", errors="replace"), None)
    public_key = load_pem_public_key(pem_bytes)
    return (pem_bytes.decode("utf-8", errors="replace"), public_key)


def check_nonce_uniqueness(events):
    """In-bundle nonce uniqueness check. Catches replay regardless of
    server-side state. Returns (ok, reason).
    """
    seen = {}
    for i, ev in enumerate(events):
        if not isinstance(ev, dict):
            continue
        n = ev.get("nonce")
        if not n:
            continue
        if n in seen:
            return False, "Duplicate nonce %r at events[%d] and events[%d]" % (n, seen[n], i)
        seen[n] = i
    return True, None


def verify_event_signatures(envelope, events, pinned_key=None):
    """Optional Ed25519 signature verification.

    When pinned_key is provided, it is used as the trust anchor (the
    envelope's public key is ignored for verification). Otherwise the
    envelope's self-reported passport_public_key_pem is used.

    Returns a tuple (status, verified_count, failed_idx, signed_total,
    server_attested_count). status is one of:
      'no_key'        no usable public key (envelope and pin both absent)
      'skipped'       cryptography library not installed
      'no_signatures' no events carried a signature field
      'passed'        at least one event verified successfully
      'failed'        an InvalidSignature was encountered at failed_idx
    server_attested_count counts chain_closure events that legitimately lack a
    signature because the closure was server-attested (RANKIGI does not hold
    the passport private key). Callers use it to relax envelope-level
    'no_signatures' hard-fails when every unsigned event is a server-attested
    closure.
    """
    if not CRYPTO_AVAILABLE:
        return ("skipped", 0, 0, 0, 0)

    # If the bundle claims a v4 passport binding (envelope carries
    # passport_key_fingerprint), every event must carry signature + nonce.
    # Missing signatures on a claimed-v4 bundle are treated as tampering and
    # hard-fail rather than silently skipping verification.
    claims_v4 = bool(
        isinstance(envelope, dict) and envelope.get("passport_key_fingerprint")
    )

    # Make self-trust visible when no external key was pinned.
    if pinned_key is None and isinstance(envelope, dict) and envelope.get("passport_public_key_pem"):
        print("")
        print("WARNING: No external public key provided. Verification trusts the passport key shipped inside this bundle.")
        print("For maximum assurance run:")
        print("  python3 verify.py bundle.json --pubkey rankigi-public-key.pem")
        print("")

    public_key = pinned_key
    if public_key is None:
        pem = envelope.get("passport_public_key_pem") if isinstance(envelope, dict) else None
        if not pem:
            return ("no_key", 0, 0, 0, 0)
        try:
            public_key = load_pem_public_key(pem.encode("utf-8") if isinstance(pem, str) else pem)
        except Exception:
            return ("no_key", 0, 0, 0, 0)

    verified_count = 0
    signed_seen = 0
    server_attested_count = 0
    for idx, ev in enumerate(events):
        if not isinstance(ev, dict):
            continue
        sig_b64 = ev.get("signature")
        nonce = ev.get("nonce")
        if not sig_b64 or not nonce:
            if claims_v4:
                action = ev.get("action")
                payload = ev.get("payload") if isinstance(ev.get("payload"), dict) else {}
                closure_binding = payload.get("closure_binding") if isinstance(payload, dict) else None
                if action == "chain_closure" and closure_binding == "server_attested":
                    print("Closure event %d: server-attested by RANKIGI (no agent signature)." % idx)
                    server_attested_count += 1
                    continue
                if action == "chain_closure" and not closure_binding:
                    print("ERROR: Closure event %d lacks both signature and closure_binding label." % idx)
                    sys.exit(1)
                print("SIGNATURE MISSING: Event %d claims v4 binding but has no signature or nonce. This bundle may have been tampered with." % idx)
                sys.exit(1)
            continue
        signed_seen += 1
        signing_obj = {
            "action":      ev.get("action"),
            "agent_id":    ev.get("agent_id"),
            "chain_id":    ev.get("chain_id"),
            "nonce":       nonce,
            "occurred_at": ev.get("occurred_at"),
            "payload":     ev.get("payload"),
            "tool":        ev.get("tool"),
        }
        signing_input = _canonical(signing_obj)
        try:
            sig = base64.b64decode(sig_b64)
        except Exception:
            return ("failed", verified_count, idx, signed_seen, server_attested_count)
        try:
            public_key.verify(sig, signing_input.encode("utf-8"))
        except InvalidSignature:
            return ("failed", verified_count, idx, signed_seen, server_attested_count)
        except Exception:
            return ("failed", verified_count, idx, signed_seen, server_attested_count)
        verified_count += 1

    if signed_seen == 0:
        return ("no_signatures", 0, 0, 0, server_attested_count)
    return ("passed", verified_count, 0, signed_seen, server_attested_count)


# ── output banner / disclaimer ────────────────────────────────────────────

BANNER = "=== RANKIGI CHAIN VERIFICATION ==="
DISCLAIMER = (
    "DISCLAIMER: This verifier checks hash chain integrity, passport fingerprint\n"
    "binding, and Ed25519 signature validity when signatures are present. It does\n"
    "not verify agent conduct or regulatory compliance. The closure event is\n"
    "server-attested (RANKIGI-signed), not passport-signed, by design."
)
PUBKEY_NOTICE = (
    "Note: passport public key is self-reported in this bundle.\n"
    "For maximum assurance use: python3 verify.py bundle.json --pubkey path/to/key.pem\n"
    "Download from: https://rankigi.com/.well-known/rankigi-public-key.json"
)


def print_banner(pinned_via_flag):
    print(BANNER)
    print()
    print(DISCLAIMER)
    print()
    if not pinned_via_flag:
        print(PUBKEY_NOTICE)
        print()


# ── main ──────────────────────────────────────────────────────────────────

def verify_closure_envelope(doc, check_rekor=False, pinned_pem=None, pinned_key=None, force_ccap=False, force_xact=False):
    """Closure envelope verifier (rankigi_schema 1.3+).

    Performs three checks in order:
      1. content_hash self-seal: recompute SHA-256 of canonical JSON of the
         envelope minus content_hash and confirm it matches.
      2. Per-event hash recomputation over events[] using the v1/v2
         dispatch already shipped for raw events.
      3. Computed hash list equals event_hashes[] in order.

    Per-event records in the closure envelope use the v2 field names
    (action, agent_id, chain_id, occurred_at, org_id, payload, prev_hash,
    severity, tool) plus hash and chain_index. event_hash is taken from
    the 'hash' field for closure records.
    """
    envelope_fp = doc.get("passport_key_fingerprint") if isinstance(doc, dict) else None
    if pinned_pem is not None:
        pinned_fp = fingerprint_pem(pinned_pem)
        if not envelope_fp or pinned_fp.lower() != str(envelope_fp).lower():
            print("PASSPORT PIN MISMATCH")
            print("Pinned key fingerprint:   %s..." % pinned_fp[:16])
            print("Envelope key fingerprint: %s..." % (str(envelope_fp or "")[:16]))
            return 1

    schema_version = doc.get("rankigi_schema")
    if schema_version is not None and str(schema_version) not in ("1.4", "1.5"):
        print("SCHEMA VERSION UNSUPPORTED")
        print("Bundle declares rankigi_schema=%s; this verifier supports 1.4 and 1.5." % schema_version)
        return 1

    print("--- Hash Chain ---")
    expected_content_hash = doc.get("content_hash")
    if not isinstance(expected_content_hash, str):
        print("Hash chain integrity: COMPROMISED")
        print("Missing content_hash on envelope")
        return 1

    envelope_without_seal = {k: v for k, v in doc.items() if k != "content_hash"}
    # closure_export_id is appended to the response body but is not part of
    # the signed envelope; strip it when present so verification matches
    # what the server actually hashed.
    envelope_without_seal.pop("closure_export_id", None)
    computed_content_hash = hashlib.sha256(
        _canonical(envelope_without_seal).encode("utf-8")
    ).hexdigest()
    if computed_content_hash != expected_content_hash:
        print("Hash chain integrity: COMPROMISED")
        print("content_hash mismatch")
        print("Expected: %s..." % expected_content_hash[:16])
        print("Computed: %s..." % computed_content_hash[:16])
        return 1

    events = doc.get("events")
    event_hashes = doc.get("event_hashes")
    if not isinstance(events, list) or not isinstance(event_hashes, list):
        print("Hash chain integrity: COMPROMISED")
        print("Envelope missing events[] or event_hashes[]")
        return 1
    if len(event_hashes) != len(events):
        print("EVENT COUNT MISMATCH: envelope declares %d hashes but bundle contains %d events"
              % (len(event_hashes), len(events)))
        sys.exit(1)

    # FIX 2: Block v4-to-v3 downgrade. If the envelope declares a v4 passport
    # binding via passport_key_fingerprint, every event must claim v4 or
    # higher; otherwise the bundle is a downgrade attack.
    has_v4_binding = bool(doc.get("passport_key_fingerprint")) if isinstance(doc, dict) else False
    if has_v4_binding:
        for n, ev in enumerate(events):
            if not isinstance(ev, dict):
                continue
            raw_v = ev.get("hash_version")
            try:
                v = int(raw_v) if raw_v is not None else 3
            except (TypeError, ValueError):
                v = 3
            if v < 4:
                print("VERSION DOWNGRADE DETECTED: envelope claims v4 binding but event %d is v%d"
                      % (n, v))
                sys.exit(1)

    # FIX B (defense in depth): For v4 bundles, any chain_closure event must
    # carry payload.closure_binding == "server_attested". Server-attested is
    # the only sanctioned way a v4 closure can lack an Ed25519 signature; any
    # other closure_binding value (or a missing label) on a v4 chain is a
    # forged or downgraded closure attempt and must hard-fail. Non-v4 bundles
    # are unaffected.
    if has_v4_binding:
        for n, ev in enumerate(events):
            if not isinstance(ev, dict):
                continue
            if ev.get("action") != "chain_closure":
                continue
            payload = ev.get("payload")
            cb = payload.get("closure_binding") if isinstance(payload, dict) else None
            if cb != "server_attested":
                print("CLOSURE BINDING INVALID: v4 bundle chain_closure at event %d has closure_binding=%r (expected 'server_attested')"
                      % (n, cb))
                sys.exit(1)

    computed_hashes = []
    for i, ev in enumerate(events):
        # Closure records carry the per-event hash under 'hash'; map it to
        # 'event_hash' so the shared event_hash() dispatch can consume it.
        normalized = dict(ev)
        if "event_hash" not in normalized and "hash" in normalized:
            normalized["event_hash"] = normalized["hash"]
        computed = event_hash(normalized)
        recorded = normalized.get("event_hash", "")
        if computed != recorded:
            print("Hash chain integrity: COMPROMISED")
            print("First broken event at index %d (chain_index %s)"
                  % (i, normalized.get("chain_index")))
            print("Expected hash: %s..." % recorded[:16])
            print("Computed hash: %s..." % computed[:16])
            return 1
        if computed != event_hashes[i]:
            print("Hash chain integrity: COMPROMISED")
            print("event_hashes[%d] does not match recomputed hash" % i)
            print("Expected: %s..." % event_hashes[i][:16])
            print("Computed: %s..." % computed[:16])
            return 1
        computed_hashes.append(computed)

    if doc.get("chain_verified") is False:
        print("Hash chain integrity: COMPROMISED")
        print("Reason: chain_verified is false in bundle")
        return 1

    print("Hash chain integrity: VERIFIED")
    print("Schema: %s" % doc.get("rankigi_schema"))
    print("Events: %d" % len(computed_hashes))
    print("Content hash: %s" % expected_content_hash)

    # In-bundle nonce uniqueness (replay defense)
    nonce_ok, nonce_reason = check_nonce_uniqueness(events)
    if not nonce_ok:
        print()
        print("DUPLICATE NONCE DETECTED")
        print(nonce_reason)
        return 1

    print()
    print("--- Passport Binding ---")
    report_v4_binding(events)
    binding_result = verify_passport_binding(doc, events)
    if binding_result == "mismatch":
        print("Hash chain integrity: COMPROMISED")
        print("Passport binding cross-check failed.")
        return 1
    pinned_via = "--pubkey" if pinned_pem is not None else "envelope (self-reported)"
    print("Pinned via: %s" % pinned_via)

    print()
    print("--- Signature Verification ---")
    sig_status, sig_count, sig_failed_idx, signed_total, server_attested_total = verify_event_signatures(
        doc, events, pinned_key=pinned_key
    )
    total_events = len(events)
    if sig_status == "passed":
        print("Ed25519 signatures: VERIFIED")
        unsigned = total_events - signed_total
        if unsigned > 0:
            print("Verified: %d of %d events (others lack signature field)" % (sig_count, total_events))
        else:
            print("Verified: %d of %d events" % (sig_count, total_events))
    elif sig_status == "no_signatures":
        if isinstance(doc, dict) and doc.get("passport_key_fingerprint"):
            if server_attested_total > 0:
                print("Ed25519 signatures: NOT REQUIRED")
                print("Note: every event lacking a signature is a server-attested chain_closure.")
            else:
                print("ERROR: Bundle envelope claims v4 passport binding but no events carry signatures. Hard fail.")
                sys.exit(1)
        else:
            print("Ed25519 signatures: SKIPPED")
            print("Reason: no events carry signature/nonce fields (legacy or v3 bundle)")
    elif sig_status == "skipped":
        print("Ed25519 signatures: SKIPPED")
        print("Reason: cryptography library not installed (pip install cryptography)")
    elif sig_status == "no_key":
        print("Ed25519 signatures: SKIPPED")
        print("Reason: no public key available (envelope omits passport_public_key_pem and no --pubkey provided)")
    elif sig_status == "failed":
        print("Ed25519 signatures: FAILED")
        print("SIGNATURE INVALID at event %d" % sig_failed_idx)
        print()
        print("=== RESULT: COMPROMISED ===")
        sys.exit(1)

    print()
    print("--- External Anchor ---")
    rekor = doc.get("sigstore_log_index")
    anchor_printed = False
    if rekor is not None:
        if check_rekor:
            # Compare against anchor_payload_hash if present (Day 0 prep fix).
            # The Rekor entry stores sha256({org_id, previous_sealed_head,
            # snapshot_date, snapshot_hash}), not last_event_hash. Bundles
            # exported before Day 0 prep have no anchor_payload_hash and
            # cannot be compared safely; SKIP rather than print MISMATCH.
            expected = doc.get("anchor_payload_hash")
            if expected:
                verify_rekor_binding(rekor, expected)
            else:
                print("Rekor binding: SKIPPED (pre-Day-0 bundle has no anchor_payload_hash)")
        else:
            print("Rekor logIndex: %s (PENDING --rekor flag)" % rekor)
        anchor_printed = True
    elif check_rekor:
        print("Rekor logIndex: NOT YET ANCHORED (daily snapshot pending)")
        anchor_printed = True
    rfc = doc.get("rfc3161_timestamp")
    if isinstance(rfc, dict) and rfc.get("tsa_timestamp"):
        print("TSA verification: UNVERIFIED (token present but not cryptographically verified offline)")
        anchor_printed = True
    if not anchor_printed:
        print("Rekor logIndex: SKIPPED (none in bundle)")
        print("TSA timestamp: SKIPPED (none in bundle)")

    print()
    print("--- RFC 3161 TSA ---")
    verify_tsa_tokens(doc)

    print()
    print("--- Authorizations ---")
    report_authorizations(doc)

    print()
    print("--- Citations ---")
    report_citations(doc)

    print()
    print("--- Counterparty Receipts ---")
    if isinstance(doc.get("counterparty_receipts"), list) and len(doc.get("counterparty_receipts")) > 0:
        verify_ccap_receipts(doc)
    elif force_ccap:
        print("Counterparty receipts: NONE (--ccap requested)")
    else:
        print("Counterparty receipts: NONE")

    print()
    print("--- Crossings (world receipts) ---")
    verify_crossings(doc)

    print()
    print("--- XACT Commitments ---")
    if "xact_commitments" in doc or force_xact:
        verify_xact_commitments(doc)
    else:
        print("XACT commitments: NONE")

    print()
    print("--- Action Dossier ---")
    if doc.get("dossier_version"):
        report_action_dossier(doc)
    else:
        print("Action Dossier: NOT AVAILABLE")

    print()
    print("=== RESULT: VERIFIED ===")
    return 0


def main(path, check_rekor=False, pubkey_path=None, force_ccap=False, force_xact=False):
    with open(path) as f:
        doc = json.load(f)

    pinned_pem = None
    pinned_key = None
    if pubkey_path is not None:
        try:
            pinned_pem, pinned_key = load_pinned_pubkey(pubkey_path)
        except Exception as e:
            print("PASSPORT PIN LOAD FAILED")
            print("Path: %s" % pubkey_path)
            print("Reason: %s" % e)
            return 1

    print_banner(pinned_via_flag=pubkey_path is not None)

    # Closure envelope (mode B) is identified by the rankigi_schema key.
    # Anything else falls through to the events-only mode (mode A).
    if isinstance(doc, dict) and "rankigi_schema" in doc:
        return verify_closure_envelope(
            doc, check_rekor=check_rekor, pinned_pem=pinned_pem, pinned_key=pinned_key,
            force_ccap=force_ccap, force_xact=force_xact,
        )

    events = doc.get("events", doc) if isinstance(doc, dict) else doc
    print("--- Hash Chain ---")
    print("Verifying %d events..." % len(events))

    # Group by chain scope. v2 events with chain_id chain per (agent_id,
    # chain_id). Everything else chains per agent_id.
    def scope_key(ev):
        raw = ev.get("hash_version")
        version = int(raw) if raw is not None else 3
        # v2+ events with chain_id chain per (agent_id, chain_id). v1 chains
        # per agent_id only.
        if version >= 2 and ev.get("chain_id"):
            return (ev.get("agent_id", ""), ev.get("chain_id"))
        return (ev.get("agent_id", ""), None)

    events_sorted = sorted(
        events,
        key=lambda e: (scope_key(e), e.get("chain_index", 0) or 0),
    )

    ZERO = "0" * 64
    last_by_scope = {}
    versions_seen = set()
    for e in events_sorted:
        raw_v = e.get("hash_version")
        version = int(raw_v) if raw_v is not None else 3
        versions_seen.add(version)
        key = scope_key(e)
        idx = e.get("chain_index", 0) or 0
        recorded = e.get("hash", e.get("event_hash", ""))
        prev_hash, prev_index = last_by_scope.get(key, (None, -1))
        expected_prev = ZERO if prev_hash is None else prev_hash
        if idx <= prev_index:
            print("Hash chain integrity: COMPROMISED")
            print("First broken link at chain_index: %s (hash_version %s)" % (idx, version))
            print("Index not monotonic for scope %s (previous %s)" % (key, prev_index))
            return 1
        if e.get("prev_hash", "") != expected_prev:
            print("Hash chain integrity: COMPROMISED")
            print("First broken link at chain_index: %s (scope %s, hash_version %s)" % (idx, key, version))
            print("Expected prev_hash: %s..." % expected_prev[:16])
            print("Found prev_hash:    %s..." % e.get("prev_hash", "")[:16])
            return 1
        computed = event_hash(e)
        if computed != recorded:
            print("Hash chain integrity: COMPROMISED")
            print("First broken link at chain_index: %s (scope %s, hash_version %s)" % (idx, key, version))
            print("Expected hash: %s..." % recorded[:16])
            print("Computed hash: %s..." % computed[:16])
            return 1
        last_by_scope[key] = (recorded, idx)

    print("Hash chain integrity: VERIFIED")
    print("Events: %d" % len(events))
    print("Hash versions seen: %s" % sorted(versions_seen))
    for k in sorted(last_by_scope, key=lambda x: (x[0], x[1] or "")):
        h, _ = last_by_scope[k]
        print("Head[%s]: %s..." % (k, h[:16]))

    # In-bundle nonce uniqueness (replay defense)
    nonce_ok, nonce_reason = check_nonce_uniqueness(events)
    if not nonce_ok:
        print()
        print("DUPLICATE NONCE DETECTED")
        print(nonce_reason)
        return 1

    print()
    print("--- Passport Binding ---")
    report_v4_binding(events)
    pinned_via = "--pubkey" if pinned_pem is not None else "envelope (self-reported)"
    print("Pinned via: %s" % pinned_via)

    print()
    print("--- Signature Verification ---")
    envelope_for_sig = doc if isinstance(doc, dict) else {}
    sig_status, sig_count, sig_failed_idx, signed_total, server_attested_total = verify_event_signatures(
        envelope_for_sig, events, pinned_key=pinned_key
    )
    total_events = len(events)
    if sig_status == "passed":
        unsigned = total_events - signed_total
        print("Ed25519 signatures: VERIFIED")
        if unsigned > 0:
            print("Verified: %d of %d events (others lack signature field)" % (sig_count, total_events))
        else:
            print("Verified: %d of %d events" % (sig_count, total_events))
    elif sig_status == "no_signatures":
        if isinstance(doc, dict) and doc.get("passport_key_fingerprint"):
            if server_attested_total > 0:
                print("Ed25519 signatures: NOT REQUIRED")
                print("Note: every event lacking a signature is a server-attested chain_closure.")
            else:
                print("ERROR: Bundle envelope claims v4 passport binding but no events carry signatures. Hard fail.")
                sys.exit(1)
        else:
            print("Ed25519 signatures: SKIPPED")
            print("Reason: no events carry signature/nonce fields (legacy or v3 bundle)")
    elif sig_status == "skipped":
        print("Ed25519 signatures: SKIPPED")
        print("Reason: cryptography library not installed (pip install cryptography)")
    elif sig_status == "no_key":
        print("Ed25519 signatures: SKIPPED")
        print("Reason: no public key available (no envelope key and no --pubkey provided)")
    elif sig_status == "failed":
        print("Ed25519 signatures: FAILED")
        print("SIGNATURE INVALID at event %d" % sig_failed_idx)
        print()
        print("=== RESULT: COMPROMISED ===")
        sys.exit(1)

    print()
    print("--- External Anchor ---")
    rekor = doc.get("rekor_log_index") if isinstance(doc, dict) else None
    if rekor is not None:
        if check_rekor:
            head_hash = ""
            if last_by_scope:
                last_key = max(last_by_scope, key=lambda x: (x[0], x[1] or ""))
                head_hash, _ = last_by_scope[last_key]
            verify_rekor_binding(rekor, head_hash)
        else:
            print("Rekor logIndex: %s (PENDING --rekor flag)" % rekor)
    else:
        print("Rekor logIndex: SKIPPED (none in bundle)")
    print("TSA timestamp: SKIPPED (events-only mode)")

    print()
    print("=== RESULT: VERIFIED ===")
    return 0


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(
        prog="verify.py",
        description="RANKIGI chain verifier. Checks hash chain integrity, "
                    "passport fingerprint binding, and Ed25519 signatures.",
    )
    parser.add_argument("bundle", help="Path to chain.json or closure bundle.")
    parser.add_argument(
        "--rekor",
        action="store_true",
        help="Cross-check the chain head against the Rekor transparency log.",
    )
    parser.add_argument(
        "--pubkey",
        help="Path to expected passport public key PEM. Pins trust out-of-band so the verifier does not have to trust the key shipped inside the bundle.",
    )
    parser.add_argument(
        "--ccap",
        action="store_true",
        help="Force CCAP (Counterparty Closure Attestation Protocol) receipt reporting even when no receipts are present in the bundle.",
    )
    parser.add_argument(
        "--xact",
        action="store_true",
        help="Force XACT bilateral commitment reporting even when no xact_commitments are present in the bundle.",
    )
    ns = parser.parse_args()
    sys.exit(main(ns.bundle, check_rekor=ns.rekor, pubkey_path=ns.pubkey, force_ccap=ns.ccap, force_xact=ns.xact))
