{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://llm-token-heatmap.venuiti.com/schemas/activation.schema.json",
  "title": "LLM Token Heatmap Activation Trace",
  "description": "Companion to `trace.schema.json` describing the activation summary captured by an `ActivationProbe` for a single generation. Carries per-step `activations: ActivationLayerEntry[]` plus a top-level `activation_metadata` block, and a cross-tokenizer alignment field (`decoded_text_offset`) on every step so two traces produced by different tokenizers can still be aligned position-for-position. Consumed by `compare_activations` and the Activations tab UI. One file == one prompt == one generation.",
  "type": "object",
  "required": ["schema_version", "activation_metadata", "steps"],
  "additionalProperties": false,
  "properties": {
    "schema_version": {
      "type": "string",
      "description": "Semver of this schema. Bumped independently of the main trace schema. Consumers must hard-fail on unknown major. 1.1.0 adds the optional per-step `activation_sidecar_ref` field as a backwards-compatible additive change for the Tier 2 sidecar.",
      "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$",
      "examples": ["1.1.0"]
    },
    "activation_metadata": {
      "$ref": "#/$defs/ActivationMetadata",
      "description": "Architecture and tokenizer metadata describing the activation captures referenced by per-step `activations` blocks. Required whenever any step carries an `activations` array."
    },
    "steps": {
      "type": "array",
      "description": "Per-generation-step activation records. `steps[i].step` MUST equal `i`. Iteration ends early on EOS, so length matches the parent trace's `steps` length.",
      "items": { "$ref": "#/$defs/ActivationStep" }
    }
  },
  "$defs": {
    "ActivationMetadata": {
      "type": "object",
      "description": "Per-trace metadata describing the ActivationProbe captures. Required whenever any step carries an `activations` array.",
      "required": [
        "captured_submodules",
        "num_layers",
        "hidden_dim",
        "tokenizer_fingerprint"
      ],
      "additionalProperties": false,
      "properties": {
        "captured_submodules": {
          "type": "array",
          "description": "Submodule names captured by the probe, in the order their per-(layer, submodule) entries appear inside each step's `activations` array (e.g. `[\"resid_pre\", \"resid_post\", \"mlp.down_proj\", \"o_proj\"]`).",
          "minItems": 1,
          "items": { "type": "string" }
        },
        "num_layers": {
          "type": "integer",
          "minimum": 1,
          "description": "Total number of decoder layers in the model (regardless of how many were captured)."
        },
        "hidden_dim": {
          "type": "integer",
          "minimum": 1,
          "description": "Per-layer residual-stream / hidden dimension. Used by consumers to interpret `top_neurons.index` ranges."
        },
        "tokenizer_fingerprint": {
          "type": "string",
          "description": "Stable identifier for the tokenizer that produced this trace's token ids. Two traces with matching fingerprints can be aligned by `token_id`; mismatching fingerprints must fall back to position alignment via `decoded_text_offset`."
        },
        "captured_layers": {
          "type": "array",
          "description": "Zero-indexed decoder layer indices that the probe captured (ascending; duplicate-free). When omitted, consumers MAY assume all layers `[0, num_layers)` are captured.",
          "items": { "type": "integer", "minimum": 0 }
        }
      }
    },
    "ActivationStep": {
      "type": "object",
      "description": "Activation record for a single generation step. The `token_id` and `decoded_text_offset` fields carry the cross-tokenizer alignment information so this entry can be matched against the corresponding entry in another trace.",
      "required": ["step", "token_id", "decoded_text_offset", "activations"],
      "additionalProperties": false,
      "properties": {
        "step": {
          "type": "integer",
          "minimum": 0,
          "description": "Zero-indexed position in the generation loop. MUST equal the array index."
        },
        "token_id": {
          "type": "integer",
          "minimum": 0,
          "description": "Selected token id at this step. Used for `token_id` alignment when the two compared traces share a tokenizer (matching `tokenizer_fingerprint`)."
        },
        "decoded_text_offset": {
          "type": "integer",
          "minimum": 0,
          "description": "Character offset of this step's decoded token in the concatenated decoded text. Used for `position` alignment when the two compared traces use different tokenizers — alignment falls back to matching equal-or-overlapping character offsets in the rendered text."
        },
        "activations": {
          "type": "array",
          "description": "Per (layer, submodule) activation summary entries for this step. Ordering is layer-major then submodule-major following `activation_metadata.captured_submodules`.",
          "items": { "$ref": "#/$defs/ActivationLayerEntry" }
        },
        "activation_sidecar_ref": {
          "type": ["string", "null"],
          "description": "Relative path (or URL) of the Tier 2 `.npz` activation sidecar for this step, or `null` when no sidecar was written (the default; sidecars are opt-in via `--capture-full-activations`). The referenced file conforms to `activation-sidecar.schema.json`."
        }
      }
    },
    "ActivationLayerEntry": {
      "type": "object",
      "description": "Summary statistics for one (step, layer, submodule) activation tensor. Captured inline (Tier 1); the full tensor lives in an optional sidecar.",
      "required": [
        "layer",
        "submodule",
        "l2_norm",
        "mean_abs",
        "sparsity",
        "top_neurons"
      ],
      "additionalProperties": false,
      "properties": {
        "layer": {
          "type": "integer",
          "minimum": 0,
          "description": "Zero-indexed decoder layer index; when `activation_metadata.captured_layers` is present, MUST appear in it."
        },
        "submodule": {
          "type": "string",
          "description": "Submodule name (e.g. `resid_pre`, `mlp.down_proj`, `o_proj`). MUST appear in `activation_metadata.captured_submodules`."
        },
        "l2_norm": {
          "type": "number",
          "minimum": 0,
          "description": "L2 norm of the captured activation vector at this (step, layer, submodule)."
        },
        "mean_abs": {
          "type": "number",
          "minimum": 0,
          "description": "Mean absolute value across the `hidden_dim` neurons of the captured activation vector."
        },
        "sparsity": {
          "type": "number",
          "minimum": 0,
          "maximum": 1,
          "description": "Fraction of neurons whose absolute value lies below the probe's near-zero threshold."
        },
        "top_neurons": {
          "type": "array",
          "description": "Highest-magnitude neurons within this (step, layer, submodule), sorted by descending `|value|`.",
          "items": { "$ref": "#/$defs/TopNeuron" }
        }
      }
    },
    "TopNeuron": {
      "type": "object",
      "required": ["index", "value"],
      "additionalProperties": false,
      "properties": {
        "index": {
          "type": "integer",
          "minimum": 0,
          "description": "Zero-indexed neuron position within the layer's hidden dimension. MUST be `< activation_metadata.hidden_dim`."
        },
        "value": {
          "type": "number",
          "description": "Signed activation value at this neuron index (not absolute)."
        }
      }
    }
  }
}
