{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://llm-token-heatmap.venuiti.com/schemas/attention-sidecar.schema.json",
  "title": "LLM Token Heatmap Attention Sidecar",
  "description": "Schema describing the structure of a Tier 2 attention sidecar payload referenced by a trace step's `attention_sidecar_ref`. The sidecar is typically stored as an `.npz` archive (numpy.savez); the keys and array shapes listed below mirror that archive. The corresponding `read_sidecar` helper materializes it as a dict matching this schema. One sidecar file == one (trace, step) pair.",
  "type": "object",
  "required": [
    "schema_version",
    "step",
    "num_attention_heads",
    "num_key_value_heads",
    "head_dim",
    "layers"
  ],
  "additionalProperties": false,
  "properties": {
    "schema_version": {
      "type": "string",
      "description": "Semver of this sidecar schema. Bumps independently of the main trace schema.",
      "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$",
      "examples": ["2.0.0"]
    },
    "step": {
      "type": "integer",
      "minimum": 0,
      "description": "Zero-indexed generation step the sidecar belongs to."
    },
    "num_attention_heads": {
      "type": "integer",
      "minimum": 1
    },
    "num_key_value_heads": {
      "type": "integer",
      "minimum": 1
    },
    "head_dim": {
      "type": "integer",
      "minimum": 1
    },
    "layers": {
      "type": "array",
      "description": "Per-captured-layer payloads. Order matches the `attention_metadata.captured_layers` array on the parent trace.",
      "items": { "$ref": "#/$defs/SidecarLayer" }
    }
  },
  "$defs": {
    "SidecarLayer": {
      "type": "object",
      "required": ["layer", "attention_weights"],
      "additionalProperties": false,
      "properties": {
        "layer": {
          "type": "integer",
          "minimum": 0,
          "description": "Zero-indexed decoder layer index."
        },
        "attention_weights": {
          "type": "array",
          "description": "Full attention distribution for this layer: shape [num_attention_heads, key_sequence_length]; each row sums to 1.",
          "items": {
            "type": "array",
            "items": {
              "type": "number",
              "minimum": 0,
              "maximum": 1
            }
          }
        },
        "q_last": {
          "type": ["array", "null"],
          "description": "Q vector for the current (last) position: shape [num_attention_heads, head_dim]. Null when capture_qkv=false.",
          "items": {
            "type": "array",
            "items": { "type": "number" }
          }
        },
        "k_last": {
          "type": ["array", "null"],
          "description": "K vector for the current (last) position: shape [num_key_value_heads, head_dim]. Null when capture_qkv=false.",
          "items": {
            "type": "array",
            "items": { "type": "number" }
          }
        },
        "v_last": {
          "type": ["array", "null"],
          "description": "V vector for the current (last) position: shape [num_key_value_heads, head_dim]. Null when capture_qkv=false.",
          "items": {
            "type": "array",
            "items": { "type": "number" }
          }
        }
      }
    }
  }
}
