{
  "version": "v2.0",
  "date": "2026-06-25",
  "schema_url": "/methodology.v2.schema.json",
  "license": "CC-BY-4.0",
  "formula": "agi_score = round((autonomy + multimodality + production) / 15 * 100)",
  "blend": {
    "curator_weight": 0.6,
    "community_weight": 0.4,
    "community_quorum": 5,
    "max_per_category": 5,
    "category_count": 3
  },
  "rubric": [
    {
      "key": "autonomy",
      "label_en": "Autonomy",
      "label_de": "Autonomie",
      "description_en": "Self-direction, planning, tool-use, self-correction, runtime hours without a human in the loop.",
      "description_de": "Self-direction, Planung, Tool-Use, Selbstkorrektur, Lauflängen ohne Mensch im Loop.",
      "levels": [
        {
          "level": 1,
          "label": "Scripted",
          "anchor_en": "Single-prompt chains, no planner, no tool selection at runtime.",
          "anchor_de": "Einzel-Prompt-Ketten, kein Planner, keine Tool-Wahl zur Laufzeit.",
          "evidence": [
            "repo example without planner",
            "no tool-router module"
          ]
        },
        {
          "level": 2,
          "label": "Reactive",
          "anchor_en": "ReAct-style tool calls, one-shot retries, no memory across runs.",
          "anchor_de": "ReAct-Tool-Calls, einmalige Retries, kein Cross-Run-Memory.",
          "evidence": [
            "tool-call loop in core",
            "retry policy in code"
          ]
        },
        {
          "level": 3,
          "label": "Planning",
          "anchor_en": "Explicit planner module, subgoal decomposition, bounded self-critique.",
          "anchor_de": "Expliziter Planner, Subziel-Zerlegung, begrenzte Selbstkritik.",
          "evidence": [
            "planner class/graph",
            "critique step in docs"
          ]
        },
        {
          "level": 4,
          "label": "Persistent",
          "anchor_en": "Multi-step persistent state, long-horizon memory, supervised self-correction.",
          "anchor_de": "Persistenter Zustand, Long-Horizon-Memory, überwachte Selbstkorrektur.",
          "evidence": [
            "persistent memory store",
            "documented horizon > 30 steps"
          ]
        },
        {
          "level": 5,
          "label": "Open-ended",
          "anchor_en": "Open-ended task pursuit, reproducible runs > 8h with no human input, published evals.",
          "anchor_de": "Open-ended Aufgaben, reproduzierbare Runs > 8h ohne Mensch, publizierte Evals.",
          "evidence": [
            "published eval (e.g. GAIA/SWE-bench)",
            "public run trace > 8h"
          ]
        }
      ]
    },
    {
      "key": "multimodality",
      "label_en": "Multimodality",
      "label_de": "Multimodalität",
      "description_en": "Text · vision · voice · code · computer-use · world models — how natively the framework reaches in.",
      "description_de": "Text · Vision · Voice · Code · Computer-Use · World-Models — wie nativ das Framework zugreift.",
      "levels": [
        {
          "level": 1,
          "label": "Text-only",
          "anchor_en": "Text in / text out. No vision, audio, or code execution loops.",
          "anchor_de": "Text rein / Text raus. Keine Vision, Audio oder Code-Execution.",
          "evidence": [
            "only text adapters in repo"
          ]
        },
        {
          "level": 2,
          "label": "Code-capable",
          "anchor_en": "Code execution sandbox or first-class code-interpreter integration.",
          "anchor_de": "Code-Execution-Sandbox oder first-class Code-Interpreter.",
          "evidence": [
            "code-exec module",
            "sandbox docs"
          ]
        },
        {
          "level": 3,
          "label": "Bi-modal",
          "anchor_en": "Native vision OR voice in addition to text+code.",
          "anchor_de": "Native Vision ODER Voice zusätzlich zu Text+Code.",
          "evidence": [
            "vision adapter",
            "voice adapter"
          ]
        },
        {
          "level": 4,
          "label": "Tri-modal",
          "anchor_en": "Text + code + vision + voice with documented examples.",
          "anchor_de": "Text + Code + Vision + Voice mit dokumentierten Beispielen.",
          "evidence": [
            "multi-modal example",
            "released cookbook"
          ]
        },
        {
          "level": 5,
          "label": "Computer-use",
          "anchor_en": "Computer-use, GUI control or world-model integration shipped as supported feature.",
          "anchor_de": "Computer-Use, GUI-Steuerung oder World-Model als unterstütztes Feature.",
          "evidence": [
            "computer-use module",
            "release notes shipping it"
          ]
        }
      ]
    },
    {
      "key": "production",
      "label_en": "Production",
      "label_de": "Production",
      "description_en": "Eval harnesses, observability, cost control, scale, deployment paths.",
      "description_de": "Eval-Harnesses, Observability, Cost-Control, Skalierung, Deployment-Pfade.",
      "levels": [
        {
          "level": 1,
          "label": "Demo",
          "anchor_en": "Notebook-grade. No tests, no observability, no deployment guide.",
          "anchor_de": "Notebook-Niveau. Keine Tests, keine Observability, keine Deployment-Guide.",
          "evidence": [
            "no CI",
            "no tracing"
          ]
        },
        {
          "level": 2,
          "label": "Tested",
          "anchor_en": "CI test suite, basic logging, manual deployment recipe.",
          "anchor_de": "CI-Tests, Basis-Logging, manuelles Deployment-Rezept.",
          "evidence": [
            "CI workflow",
            "logging module"
          ]
        },
        {
          "level": 3,
          "label": "Observable",
          "anchor_en": "Built-in tracing (OTel/LangSmith/W&B), cost telemetry, structured retries.",
          "anchor_de": "Eingebautes Tracing (OTel/LangSmith/W&B), Cost-Telemetrie, strukturierte Retries.",
          "evidence": [
            "tracing integration",
            "cost metering hooks"
          ]
        },
        {
          "level": 4,
          "label": "Scaled",
          "anchor_en": "Async/distributed runtime, queueing, horizontal scaling docs, SLO guidance.",
          "anchor_de": "Async/Distributed-Runtime, Queueing, Scale-Out-Docs, SLO-Guidance.",
          "evidence": [
            "distributed runner",
            "scale-out guide"
          ]
        },
        {
          "level": 5,
          "label": "Hardened",
          "anchor_en": "Reference deployment (k8s/serverless), eval harness gating releases, SOC2-ready logging.",
          "anchor_de": "Referenz-Deployment (k8s/serverless), Eval-Harness als Release-Gate, SOC2-fähiges Logging.",
          "evidence": [
            "reference k8s/serverless template",
            "eval gating release notes"
          ]
        }
      ]
    }
  ],
  "changelog": [
    {
      "version": "v2.0",
      "date": "2026-06-25",
      "summary_en": "Added 5-level anchor rubric per category, evidence requirements, confidence score, and JSON-Schema for the framework record.",
      "summary_de": "5-stufige Anker-Rubrik pro Kategorie, Evidenz-Anforderungen, Confidence-Score und JSON-Schema für den Framework-Record ergänzt."
    },
    {
      "version": "v1.0",
      "date": "2026-04-01",
      "summary_en": "Initial release: three categories (autonomy, multimodality, production), 60/40 curator/community blend, 1–5 scale.",
      "summary_de": "Erstveröffentlichung: Drei Kategorien (Autonomie, Multimodalität, Production), 60/40 Kurator/Community, 1–5-Skala."
    }
  ]
}