spec: update Spec 049 artifacts

2026-01-11 16:54:57 +01:00 · 2026-01-11 16:54:57 +01:00 · 66d8d90c30
commit 66d8d90c30
parent 6aac45ce2e
7 changed files with 697 additions and 17 deletions
--- a/specs/049-backup-restore-job-orchestration/contracts/admin-runs.openapi.yaml
+++ b/specs/049-backup-restore-job-orchestration/contracts/admin-runs.openapi.yaml
@ -0,0 +1,169 @@
 openapi: 3.0.3
 info:
  title: TenantPilot Admin Run Orchestration (049)
  version: 0.1.0
  description: |
    Internal admin contracts for starting long-running backup/restore operations
    and reading run status/progress. These endpoints are tenant-scoped.
 servers:
  - url: /admin
 paths:
  /t/{tenantExternalId}/runs/{runType}:
    post:
      operationId: startRun
      summary: Start a background run
      description: |
        Starts an operation by creating (or reusing) a Run Record and enqueueing
        background work. Must return quickly.
      parameters:
        - in: path
          name: tenantExternalId
          required: true
          schema:
            type: string
        - in: path
          name: runType
          required: true
          schema:
            type: string
            enum:
              - backup_set_add_policies
              - restore_execute
              - restore_preview
              - snapshot_capture
      requestBody:
        required: false
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RunStartRequest'
      responses:
        '201':
          description: Run created and queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RunStartResponse'
        '200':
          description: Existing active run reused
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RunStartResponse'
        '403':
          description: Forbidden
  /t/{tenantExternalId}/runs/{runType}/{runId}:
    get:
      operationId: getRun
      summary: Get run status and progress
      parameters:
        - in: path
          name: tenantExternalId
          required: true
          schema:
            type: string
        - in: path
          name: runType
          required: true
          schema:
            type: string
        - in: path
          name: runId
          required: true
          schema:
            type: string
      responses:
        '200':
          description: Run record
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RunRecord'
        '404':
          description: Not found
 components:
  schemas:
    RunStartRequest:
      type: object
      additionalProperties: false
      properties:
        targetObjectId:
          type: string
          nullable: true
          description: Operation target used for de-duplication.
        payloadHash:
          type: string
          nullable: true
          description: Optional stable hash of relevant payload to strengthen idempotency.
        itemIds:
          type: array
          items:
            type: string
          nullable: true
          description: Optional internal item ids to process.
    RunStartResponse:
      type: object
      required: [run]
      properties:
        reused:
          type: boolean
          default: false
        run:
          $ref: '#/components/schemas/RunRecord'
    RunRecord:
      type: object
      required:
        - id
        - tenantExternalId
        - type
        - status
      properties:
        id:
          type: string
        tenantExternalId:
          type: string
        type:
          type: string
        status:
          type: string
          enum: [queued, running, succeeded, failed, partial]
        createdAt:
          type: string
          format: date-time
        startedAt:
          type: string
          format: date-time
          nullable: true
        finishedAt:
          type: string
          format: date-time
          nullable: true
        counts:
          type: object
          additionalProperties: false
          properties:
            total:
              type: integer
              minimum: 0
            succeeded:
              type: integer
              minimum: 0
            failed:
              type: integer
              minimum: 0
        safeError:
          type: object
          nullable: true
          additionalProperties: false
          properties:
            code:
              type: string
            context:
              type: object
              additionalProperties: true
--- a/specs/049-backup-restore-job-orchestration/data-model.md
+++ b/specs/049-backup-restore-job-orchestration/data-model.md
@ -0,0 +1,94 @@
 # Data Model: Backup/Restore Job Orchestration (049)
 This feature relies on existing “run record” models/tables and (optionally) extends them to meet the orchestration requirements.
 ## Entities
 ## 1) RestoreRun (`restore_runs`)
 **Purpose:** Run record for restore executions and dry-run/preview workflows.
 **Model:** `App\Models\RestoreRun`
 **Key fields (existing):**
 - `id` (PK)
 - `tenant_id` (FK → tenants)
 - `backup_set_id` (FK → backup_sets)
 - `requested_by` (string|null)
 - `is_dry_run` (bool)
 - `status` (string)
 - `requested_items` (json|null)
 - `preview` (json|null) — persisted preview output
 - `results` (json|null) — persisted execution output (may include per-item outcomes)
 - `failure_reason` (text|null)
 - `started_at` / `completed_at` (timestamp|null)
 - `metadata` (json|null)
 **Relationships:**
 - `RestoreRun belongsTo Tenant`
 - `RestoreRun belongsTo BackupSet`
 **State transitions (target):**
 - `queued → running → succeeded|failed|partial`
 **Validation constraints (creation/dispatch):**
 - tenant-scoped access required
 - `backup_set_id` must belong to tenant
 - preview/dry-run must not perform writes (constitution Read/Write Separation)
 ---
 ## 2) BulkOperationRun (`bulk_operation_runs`)
 **Purpose:** Run record for background operations that process many internal items, including backup-set capture-like actions.
 **Model:** `App\Models\BulkOperationRun`
 **Key fields (existing):**
 - `id` (PK)
 - `tenant_id` (FK → tenants)
 - `user_id` (FK → users)
 - `resource` (string) — e.g. `policy`, `backup_set`
 - `action` (string) — e.g. `export`, `add_policies`
 - `status` (string) — `pending`, `running`, `completed`, `completed_with_errors`, `failed`, `aborted`
 - `total_items`, `processed_items`, `succeeded`, `failed`, `skipped`
 - `item_ids` (jsonb)
 - `failures` (jsonb|null) — safe per-item error summaries
 - `audit_log_id` (FK → audit_logs|null)
 **Relationships:**
 - `BulkOperationRun belongsTo Tenant`
 - `BulkOperationRun belongsTo User`
 **Recommended additions (to satisfy FR-002/FR-004 cleanly):**
 - `idempotency_key` (string, indexed; uniqueness enforced for active statuses via partial index)
 - `started_at` / `finished_at` (timestampTz)
 - `error_code` (string|null)
 - `error_context` (jsonb|null)
 **State transitions (target):**
 - `queued → running → succeeded|failed|partial`
  - `pending` maps to `queued`
  - `completed_with_errors` maps to `partial`
 ---
 ## 3) Notification Event (DB notifications)
 **Purpose:** Persist state transitions and completion notices for the initiating user.
 **Storage:** Laravel Notifications (DB channel).
 **Payload shape (target):**
 - `tenant_id`
 - `run_type` (restore_run / bulk_operation_run)
 - `run_id`
 - `status` (queued/running/succeeded/failed/partial)
 - `counts` (optional)
 - `safe_error_code` + `safe_error_context` (optional)
 ## Notes on “per-item outcomes” (FR-005)
 - For restore workflows, per-item outcomes can initially be stored in `restore_runs.results` as a structured JSON array/object keyed by internal item identifiers.
 - For bulk operations, per-item outcomes are already persisted as `bulk_operation_runs.failures` plus the counter columns.
 - If Phase 1 needs relational per-item tables for querying/filtering, introduce a dedicated “run item results” table per run type (Phase 2+ preferred).
--- a/specs/049-backup-restore-job-orchestration/plan.md
+++ b/specs/049-backup-restore-job-orchestration/plan.md
@ -0,0 +1,102 @@
 # Implementation Plan: Backup/Restore Job Orchestration (049)
 **Branch**: `feat/049-backup-restore-job-orchestration-session-1768091854` | **Date**: 2026-01-11 | **Spec**: [specs/049-backup-restore-job-orchestration/spec.md](specs/049-backup-restore-job-orchestration/spec.md)
 **Input**: Feature specification from `specs/049-backup-restore-job-orchestration/spec.md`
 **Note**: This template is filled in by the `/speckit.plan` command. See `.specify/scripts/` for helper scripts.
 ## Summary
 Move all backup/restore “start/execute” actions off the interactive request path.
 - Interactive actions must only create (or reuse) a tenant-scoped Run Record and enqueue work.
 - Background jobs perform Graph calls, capture/restore work, and update run records with status + counts + safe error summaries.
 - Idempotency prevents double-click duplicates by reusing an active run for the same `(tenant + operation type + target)`.
 Design choices are captured in [specs/049-backup-restore-job-orchestration/research.md](specs/049-backup-restore-job-orchestration/research.md).
 ## Phasing
 ### Phase 1 (this spec’s implementation target)
 - Ensure all in-scope operations are job-only (no heavy work inline).
 - Create/reuse run records with idempotency for active runs.
 - Provide **Run detail** views for progress (status + counts) and **DB notifications** for state transitions.
 ### Phase 2 (explicitly out-of-scope for Phase 1)
 - Add a **global progress widget** that surfaces all run types (not just bulk ops) across the admin UI.
 ## Technical Context
 **Language/Version**: PHP 8.4.15  
 **Primary Dependencies**: Laravel 12, Filament 4, Livewire 3  
 **Storage**: PostgreSQL (JSONB used for run payloads/summaries where appropriate)  
 **Testing**: Pest 4 (feature tests + job tests)  
 **Target Platform**: Containerized web app (Sail for local dev; Dokploy for staging/prod)
 **Project Type**: Web application (Laravel monolith)  
 **Performance Goals**: 95% of start actions confirm “queued” within 2 seconds (SC-001)  
 **Constraints**: No heavy work during interactive requests; jobs must be idempotent + observable; no secrets in run records  
 **Scale/Scope**: Multi-tenant MSP usage; long-running Graph operations; frequent retries/double-click scenarios
 ## Constitution Check
 *GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
 - Inventory-first: orchestration is run-record centric; inventory stays “last observed”, backups remain explicit actions.
 - Read/write separation: preview/dry-run stays read-only; live restore remains behind explicit confirmation + audit + tests.
 - Graph contract path: all Graph calls remain behind `GraphClientInterface` and contract registry (`config/graph_contracts.php`).
 - Deterministic capabilities: no new capability derivation introduced by this feature (existing resolver remains authoritative).
 - Tenant isolation: all run visibility + execution is tenant-scoped; no cross-tenant run access.
 - Automation: enforce de-duplication for active runs; jobs use locks/backoff for 429/503 where applicable.
 - Data minimization: run records store only safe summaries (error codes + whitelisted context), never secrets/tokens.
 ## Project Structure
 ### Documentation (this feature)
 ```text
 specs/049-backup-restore-job-orchestration/
 ├── plan.md              # This file (/speckit.plan command output)
 ├── research.md          # Phase 0 output (/speckit.plan command)
 ├── data-model.md        # Phase 1 output (/speckit.plan command)
 ├── quickstart.md        # Phase 1 output (/speckit.plan command)
 ├── contracts/           # Phase 1 output (/speckit.plan command)
 └── tasks.md             # Phase 2 output (/speckit.tasks command - NOT created by /speckit.plan)
 ```
 ### Source Code (repository root)
 ```text
 app/
 ├── Filament/
 │   └── Resources/
 ├── Jobs/
 ├── Livewire/
 ├── Models/
 ├── Services/
 └── Support/
 database/
 └── migrations/
 resources/
 └── views/
 tests/
 ├── Feature/
 └── Unit/
 ```
 **Structure Decision**: Laravel monolith; orchestration implemented via queued jobs + run records in existing models/tables.
 ## Complexity Tracking
 > **Fill ONLY if Constitution Check has violations that must be justified**
 | Violation | Why Needed | Simpler Alternative Rejected Because |
 |-----------|------------|-------------------------------------|
 | [e.g., 4th project] | [current need] | [why 3 projects insufficient] |
 | [e.g., Repository pattern] | [specific problem] | [why direct DB access insufficient] |
 No constitution violations are required for this feature.
--- a/specs/049-backup-restore-job-orchestration/quickstart.md
+++ b/specs/049-backup-restore-job-orchestration/quickstart.md
@ -0,0 +1,26 @@
 # Quickstart: Backup/Restore Job Orchestration (049)
 ## Goal
 Ensure backup/restore “start/execute” actions never run heavy work inline. They create (or reuse) a Run Record and queue the work.
 ## Local development
 - Bring Sail up: `./vendor/bin/sail up -d`
 - Run migrations: `./vendor/bin/sail artisan migrate`
 - Run a queue worker (separate terminal): `./vendor/bin/sail artisan queue:work`
 ## Testing
 Run the most relevant tests first:
 - Unit helpers: `./vendor/bin/sail artisan test tests/Unit/RunIdempotencyTest.php`
 - Snapshot capture orchestration: `./vendor/bin/sail artisan test --filter=PolicyCaptureSnapshot`
 - Restore orchestration: `./vendor/bin/sail artisan test --filter=RestoreRun`
 - Cross-tenant authorization: `./vendor/bin/sail artisan test --filter=RunAuthorization`
 ## Operational notes
 - Run records must be tenant-scoped and never contain secrets.
 - Preview/dry-run must remain read-only.
 - Use de-duplication for active runs to prevent double-click duplication.
--- a/specs/049-backup-restore-job-orchestration/research.md
+++ b/specs/049-backup-restore-job-orchestration/research.md
@ -0,0 +1,78 @@
 # Research: Backup/Restore Job Orchestration (049)
 This document resolves Phase 0 open questions and records design choices.
 ## Decisions
 ### 1) Run Record storage strategy
 **Decision:** Reuse existing run-record primitives instead of introducing a brand-new “unified run” subsystem in Phase 1.
 - Restore + re-run restore + dry-run/preview: use the existing `restore_runs` table / `App\Models\RestoreRun`.
 - Backup set capture-like operations (e.g., “add policies and capture”): reuse `bulk_operation_runs` / `App\Models\BulkOperationRun` (already used for long-running background work like bulk exports) and (if needed) extend it to satisfy FR-002 fields.
 **Rationale:**
 - The codebase already has multiple proven “run tables” (`restore_runs`, `inventory_sync_runs`, `backup_schedule_runs`, `bulk_operation_runs`).
 - Minimizes migration risk and avoids broad refactors.
 - Lets Phase 1 focus on eliminating inline heavy work while keeping UX consistent.
 **Alternatives considered:**
 - **Create a new generic `operation_runs` + `operation_run_items` data model** for all queued automation.
  - Rejected (Phase 1): higher migration + backfill cost; high coordination risk across many features.
 ### 2) Status lifecycle mapping
 **Decision:** Standardize at the *UI + plan* level on `queued → running → (succeeded | failed | partial)` while allowing underlying storage to keep its existing status vocabulary.
 - `BulkOperationRun.status` mapping: `pending→queued`, `running→running`, `completed→succeeded`, `completed_with_errors→partial`, `failed/aborted→failed`.
 - `RestoreRun.status` mapping will be aligned (e.g., `pending→queued`, `running→running`, etc.) as part of implementation.
 **Rationale:**
 - Keeps the spec’s lifecycle consistent without forcing an immediate cross-table refactor.
 **Alternatives considered:**
 - **Rename and normalize all run statuses across all run tables.**
  - Rejected (Phase 1): touches many workflows and tests.
 ### 3) Idempotency & de-duplication
 **Decision:** Enforce de-duplication for *active* runs via a deterministic key and a DB query gate, with an optional lock for race reduction.
 - Dedupe key format: `tenant_id + operation_type + target_object_id` (plus a stable hash of relevant payload if needed).
 - Behavior: if an identical run is `queued`/`running`, reuse it and return/link to it; allow a new run only after terminal.
 **Rationale:**
 - Matches the constitution (“Automation must be Idempotent & Observable”) and aligns with existing patterns (inventory selection hash + schedule locks).
 **Alternatives considered:**
 - **Cache-only locks** (`Cache::lock(...)`) without persisted keys.
  - Rejected: harder to reason about after restarts; less observable.
 ### 4) Restore preview must be asynchronous
 **Decision:** Move restore preview generation (“Generate preview” in the wizard) into a queued job which persists preview outputs to the run record.
 **Rationale:**
 - Preview can require Graph calls and normalization work; it should never block an interactive request.
 **Alternatives considered:**
 - **Keep preview synchronous** and increase timeouts.
  - Rejected: timeouts, poor UX, and violates FR-001.
 ### 5) Notifications for progress visibility
 **Decision:** Use DB notifications for state transitions (queued/running/terminal) and keep a Run detail view as the primary progress surface in Phase 1.
 **Rationale:**
 - Inventory sync + backup schedule runs already use this pattern.
 - Survives page reloads and doesn’t require the user to keep the page open.
 **Alternatives considered:**
 - **Frontend polling only** (no DB notifications).
  - Rejected: weaker UX and weaker observability.
 ## Clarifications resolved
 - **SC-003 includes “canceled”** while Phase 1 explicitly has “no cancel”.
  - Resolution for Phase 1 planning: treat “canceled” as out-of-scope (Phase 2+) and map “aborted” (if present) into the `failed` bucket for SC accounting.
--- a/specs/049-backup-restore-job-orchestration/spec.md
+++ b/specs/049-backup-restore-job-orchestration/spec.md
@ -42,7 +42,21 @@ ### User Story 1 - Capture snapshot runs in background (Priority: P1)
 ---
-### User Story 2 - Restore runs in background with per-item results (Priority: P1)
+### User Story 2 - Backup set create/capture runs in background (Priority: P2)
 An admin can create a backup set and optionally start a capture/sync operation without the request doing heavy work.
 **Why this priority**: Creating backup sets is frequent and should not be coupled to long-running capture logic.
 **Independent Test**: Creating a backup set returns quickly and any capture/sync work appears as a run with progress.
 **Acceptance Scenarios**:
 1. **Given** an admin creates a backup set with capture enabled, **When** they submit, **Then** the backup set is created and a capture run is queued.
 ---
 ### User Story 3 - Restore runs in background with per-item results (Priority: P1)
 An admin can start a “restore to Intune” or “re-run restore” operation as a background run and later inspect item-level outcomes and errors.
@ -54,20 +68,7 @@ ### User Story 2 - Restore runs in background with per-item results (Priority: P
 1. **Given** an admin starts a restore, **When** they confirm the action, **Then** the UI queues a run and returns immediately (no long-running request).
 2. **Given** a restore run finishes with mixed outcomes, **When** the admin views the run details, **Then** they see succeeded/failed counts and a safe error summary per failed item.
-
+3. **Given** an admin executes a live restore, **When** the run is queued/executed, **Then** an auditable event is recorded that links to the run.
 ---
 ### User Story 3 - Backup set create/capture runs in background (Priority: P2)
 An admin can create a backup set and optionally start a capture/sync operation without the request doing heavy work.
 **Why this priority**: Creating backup sets is frequent and should not be coupled to long-running capture logic.
 **Independent Test**: Creating a backup set returns quickly and any capture/sync work appears as a run with progress.
 **Acceptance Scenarios**:
 1. **Given** an admin creates a backup set with capture enabled, **When** they submit, **Then** the backup set is created and a capture run is queued.
 ---
@ -82,6 +83,7 @@ ### User Story 4 - Dry-run/preview runs in background (Priority: P2)
 **Acceptance Scenarios**:
 1. **Given** an admin starts a preview run, **When** the run completes, **Then** the UI shows preview results without requiring re-execution.
 2. **Given** an admin starts a preview/dry-run, **When** the run executes, **Then** no write/change is performed against the external system.
 ### Edge Cases
@ -135,7 +137,9 @@ ### Functional Requirements
 - **FR-007 Safety rules**: Preview/dry-run MUST be safe (no writes). Live restore MUST remain guarded with explicit confirmation and an auditable trail consistent with existing safety practices.
- **FR-008 Resilience**: The system MUST handle external service throttling/outages gracefully, including retries with backoff when appropriate, and MUST end runs in a clear terminal state (failed/partial) rather than silently failing.
+- **FR-008 Resilience (Post-MVP / Phase 2)**: The system MUST handle external service throttling/outages gracefully, including retries with backoff when appropriate, and MUST end runs in a clear terminal state (failed/partial) rather than silently failing.
  *Note*: MVP/Phase 1 relies on existing retry behavior where present; standardized backoff + jitter hardening is scheduled post-MVP.
 - **FR-009 Safe logging & data minimization**: The system MUST NOT store secrets/tokens in Run Records, notifications, or error contexts. Error context MUST be limited to a defined, safe set of fields.
@ -151,6 +155,9 @@ ### Acceptance Checks
 - Item-level outcomes and safe error summaries are viewable after completion.
 - Run counts reflect persisted internal item results.
 - Preview/dry-run never performs writes.
 - Unauthorized users cannot start runs for a tenant they do not belong to.
 - Users cannot list/view run records across tenants.
 - Live restore creates an auditable event linked to the run.
 ### Key Entities *(include if feature involves data)*
@ -164,9 +171,11 @@ ### Measurable Outcomes
 - **SC-001**: For 95% of operation starts, the UI confirms “queued” within 2 seconds.
 - **SC-002**: Double-clicking an operation start results in at most one queued/running run for the same tenant + operation + target.
- **SC-003**: 99% of runs end in a clear terminal state (succeeded/failed/partial/canceled) with a human-readable summary.
+- **SC-003**: 99% of runs end in a clear terminal state (succeeded/failed/partial) with a human-readable summary.
 - **SC-004**: Admins can locate the latest run status for an operation in under 30 seconds without requiring access to system logs.
 *Note*: “canceled” is reserved for Phase 2+ (Phase 1 has no cancel support).
 ## Assumptions
 - This feature builds on the UI safety constraints from 048: admin pages must remain usable even when the external service API is unavailable.
--- a/specs/049-backup-restore-job-orchestration/tasks.md
+++ b/specs/049-backup-restore-job-orchestration/tasks.md
@ -0,0 +1,202 @@
 # Tasks: Backup/Restore Job Orchestration (049)
 **Input**: Design documents from `specs/049-backup-restore-job-orchestration/`
 **Prerequisites**: plan.md (required), spec.md (required), research.md, data-model.md, contracts/, quickstart.md
 **Tests**: REQUIRED (Pest) for these runtime behavior changes.
 **MVP scope**: Strictly limited to **T001–T016 (US1 only)**. The **Phase 7 global progress widget (T037)** is **Phase 2** and explicitly **NOT** part of the MVP.
 ## Phase 1: Setup (Shared Infrastructure)
 - [x] T001 Verify queue + DB notifications prerequisites in config/queue.php and database/migrations/*notifications* (add missing migration if needed)
 - [x] T002 Confirm existing run tables and status enums used by RestoreRun in app/Support/RestoreRunStatus.php and database/migrations/2025_12_10_000150_create_restore_runs_table.php
 - [x] T003 [P] Add quickstart sanity commands for this feature in specs/049-backup-restore-job-orchestration/quickstart.md
 ---
 ## Phase 2: Foundational (Blocking Prerequisites)
 **⚠️ CRITICAL**: No user story work should begin until this phase is complete.
 - [x] T004 Add idempotency support to bulk_operation_runs via database/migrations/2026_01_11_120001_add_idempotency_key_to_bulk_operation_runs_table.php
 - [x] T005 Add idempotency support to restore_runs via database/migrations/2026_01_11_120002_add_idempotency_key_to_restore_runs_table.php
 - [x] T006 [P] Add casts/fillables for idempotency + timestamps in app/Models/BulkOperationRun.php and app/Models/RestoreRun.php
 - [x] T007 Implement idempotency key helpers in app/Support/RunIdempotency.php (build key, find active run, enforce reuse)
 - [x] T008 [P] Add a read-only Filament resource to inspect run details for BulkOperationRun in app/Filament/Resources/BulkOperationRunResource.php
 - [x] T009 [P] Add notification for run status transitions in app/Notifications/RunStatusChangedNotification.php (DB channel)
 - [x] T010 Add unit tests for RunIdempotency helpers in tests/Unit/RunIdempotencyTest.php
 **CRITICAL (must-fix before implementing any new run flows): Tenant isolation + authorization**
 - [x] T042 Add tenant-scoped authorization for run list/view/start across all run flows (BulkOperationRun + RestoreRun) using policies/resources and ensure every query is tenant-scoped (e.g., app/Filament/Resources/BulkOperationRunResource.php, app/Filament/Resources/RestoreRunResource.php, and each start action/page that creates runs)
 - [x] T043 [P] Add Pest feature tests that run list/view are tenant-scoped (cannot list/view another tenant’s runs) in tests/Feature/RunAuthorizationTenantIsolationTest.php
 - [x] T044 [P] Add Pest feature tests that unaffiliated users cannot start runs (capture snapshot / restore execute / preview / backup set capture) in tests/Feature/RunStartAuthorizationTest.php
 **Checkpoint**: Foundation ready (idempotency + run detail view + notifications).
 ---
 ## Phase 3: User Story 1 - Capture snapshot runs in background (Priority: P1) 🎯 MVP
 **Goal**: Capturing a policy snapshot never blocks the UI; it creates/reuses a run record and processes in a queued job with visible progress.
 **Independent Test**: Trigger “Capture snapshot” on a policy; the request returns quickly and a BulkOperationRun transitions `queued → running → succeeded|failed|partial`, with details viewable.
 ### Tests (write first)
 - [x] T011 [P] [US1] Add Pest feature test that capture snapshot queues a job (no inline capture) in tests/Feature/PolicyCaptureSnapshotQueuedTest.php
 - [x] T012 [P] [US1] Add Pest feature test that double-click reuses the active run (idempotency) in tests/Feature/PolicyCaptureSnapshotIdempotencyTest.php
 ### Implementation
 - [x] T013 [US1] Create queued job to capture one policy snapshot in app/Jobs/CapturePolicySnapshotJob.php (updates BulkOperationRun counts + failures)
 - [x] T014 [US1] Update UI action to create/reuse run and dispatch job in app/Filament/Resources/PolicyResource/Pages/ViewPolicy.php
 - [x] T015 [P] [US1] Add linking from UI notifications to BulkOperationRunResource view page in app/Filament/Resources/BulkOperationRunResource.php
 - [x] T016 [US1] Ensure failures are safe/minimized (no secrets) when recording run failures in app/Services/BulkOperationService.php
 **Checkpoint**: User Story 1 is independently usable and testable.
 ---
 ## Phase 4: User Story 3 - Restore runs in background with per-item results (Priority: P1)
 **Goal**: Restore execution and re-run restore operate exclusively via queued jobs, with persisted per-item outcomes and safe error summaries visible in the run detail UI.
 **Independent Test**: Starting restore creates/reuses a RestoreRun in `queued` state, queues execution, and later shows item outcomes without relying on logs.
 ### Tests (write first)
 - [x] T017 [P] [US3] Add Pest feature test that restore execution reuses active run for identical (tenant+backup_set+scope) starts in tests/Feature/RestoreRunIdempotencyTest.php
 - [x] T018 [P] [US3] Extend existing restore job test to assert per-item outcome persistence in tests/Feature/ExecuteRestoreRunJobTest.php
 - [x] T045 [P] [US3] Add Pest feature test that live restore writes an audit event (run-id linked) in tests/Feature/RestoreAuditLoggingTest.php
 ### Implementation
 - [x] T019 [US3] Implement idempotency key computation for restore runs (tenant + operation + target + scope hash) in app/Support/RunIdempotency.php
 - [x] T020 [US3] Update restore run creation/execute flow to reuse active runs (no duplicates) in app/Filament/Resources/RestoreRunResource.php
 - [x] T021 [US3] Update app/Jobs/ExecuteRestoreRunJob.php to set started/finished timestamps and emit DB notifications (queued/running/terminal)
 - [x] T022 [US3] Persist deterministic per-item outcomes into restore_runs.results (keyed by backup_item_id) in app/Services/Intune/RestoreService.php
 - [x] T023 [US3] Derive total/succeeded/failed counts from persisted results and surface in RestoreRunResource view/table in app/Filament/Resources/RestoreRunResource.php
 - [x] T046 [US3] Ensure live restore execution emits an auditable event linked to the run (e.g., audit_logs FK or structured audit record) in app/Jobs/ExecuteRestoreRunJob.php and/or app/Services/Intune/RestoreService.php
 **Checkpoint**: Restore runs are job-only, idempotent, and observable with item outcomes.
 ---
 ## Phase 5: User Story 2 - Backup set create/capture runs in background (Priority: P2)
 **Goal**: Creating a backup set and adding policies to a backup set does not perform Graph-heavy snapshot capture inline; capture occurs in jobs with a run record.
 **Independent Test**: Creating a backup set returns quickly and produces a BulkOperationRun showing progress; adding policies via the picker also queues work.
 ### Tests (write first)
 - [ ] T024 [P] [US2] Add Pest feature test that backup set create does not run capture inline and instead queues a job in tests/Feature/BackupSetCreateCaptureQueuedTest.php
 - [ ] T025 [P] [US2] Add Pest feature test that “Add selected” in policy picker queues background work in tests/Feature/BackupSetPolicyPickerQueuesCaptureTest.php
 ### Implementation
 - [ ] T026 [US2] Refactor capture work out of BackupService::createBackupSet into separate methods in app/Services/Intune/BackupService.php
 - [ ] T027 [US2] Create queued job to capture backup set items in app/Jobs/CaptureBackupSetJob.php (uses BackupService; updates BulkOperationRun)
 - [ ] T028 [US2] Update backup set create flow to create backup_set record quickly and dispatch CaptureBackupSetJob in app/Filament/Resources/BackupSetResource.php
 - [ ] T029 [US2] Create queued job to add policies to a backup set (and capture foundations if requested) in app/Jobs/AddPoliciesToBackupSetJob.php
 - [ ] T030 [US2] Update bulk action in app/Livewire/BackupSetPolicyPickerTable.php to create/reuse BulkOperationRun and dispatch AddPoliciesToBackupSetJob
 **Checkpoint**: Backup set capture workloads are job-only and observable.
 ---
 ## Phase 6: User Story 4 - Dry-run/preview runs in background (Priority: P2)
 **Goal**: Restore preview generation is queued, persisted, and viewable without re-execution.
 **Independent Test**: Clicking “Generate preview” returns quickly; a queued RestoreRun performs the diff generation asynchronously and persists preview output that the UI can display.
 ### Tests (write first)
 - [ ] T031 [P] [US4] Add Pest feature test that preview generation queues a job (no inline RestoreDiffGenerator call) in tests/Feature/RestorePreviewQueuedTest.php
 - [ ] T032 [P] [US4] Add Pest feature test that preview results persist and are reusable in tests/Feature/RestorePreviewPersistenceTest.php
 - [ ] T047 [P] [US4] Add Pest feature test that preview/dry-run never performs writes (must be read-only) in tests/Feature/RestorePreviewReadOnlySafetyTest.php
 ### Implementation
 - [ ] T033 [US4] Create queued job to generate preview diffs and persist to restore_runs.preview + metadata in app/Jobs/GenerateRestorePreviewJob.php
 - [ ] T034 [US4] Update preview action in app/Filament/Resources/RestoreRunResource.php to create/reuse a dry-run RestoreRun and dispatch GenerateRestorePreviewJob
 - [ ] T035 [US4] Update restore run view component to read preview from the persisted run record in resources/views/filament/forms/components/restore-run-preview.blade.php
 - [ ] T036 [US4] Emit DB notifications for preview queued/running/completed/failed transitions in app/Jobs/GenerateRestorePreviewJob.php
 - [ ] T048 [US4] Enforce preview/dry-run read-only behavior: block write-capable operations and record a safe failure if a write would occur (in app/Jobs/GenerateRestorePreviewJob.php and/or restore diff generation service)
 **Checkpoint**: Preview is asynchronous, persisted, and visible.
 ---
 ## Phase 7: Phase 2 - Global Progress Widget (All Run Types)
 - [ ] T037 [P] Add a global progress widget for restore runs (Phase 2 requirement) by extending app/Livewire/BulkOperationProgress.php or adding a dedicated Livewire component in app/Livewire/RestoreRunProgress.php
 ---
 ## Phase 8: Polish & Cross-Cutting Concerns
 - [ ] T038 Ensure Graph throttling/backoff behavior is applied inside queued jobs (429/503) in app/Services/Intune/PolicySnapshotService.php and app/Services/Intune/RestoreService.php
 - [ ] T039 [P] Add/extend run status notification formatting to include safe error codes/contexts in app/Notifications/RunStatusChangedNotification.php
 - [ ] T040 Run formatter on modified files: vendor/bin/pint --dirty
 - [ ] T041 Run targeted tests for affected areas: tests/Feature/*Restore* tests/Feature/*BackupSet* tests/Feature/*Policy* (use php artisan test with filters)
 ---
 ## Dependencies & Execution Order
 ### Story order
 - Phase 1 → Phase 2 must complete first.
 - After Phase 2:
  - US1 and US3 can proceed in parallel.
  - US4 can proceed in parallel but may be easiest after US3 (shared RestoreRun patterns).
  - US2 can proceed independently after Phase 2.
 ### Dependency graph
 - Setup → Foundational → { US1, US2, US3, US4 } → Polish
 - Setup → Foundational → { US1, US2, US3, US4 } → Phase 2 Global Widget → Polish
 - Suggested minimal MVP: Setup → Foundational → US1
 ---
 ## Parallel execution examples
 ### US1
 - In parallel: T011 (queues test), T012 (idempotency test)
 - In parallel: T013 (job), T014 (UI action update) after foundational tasks
 ### US2
 - In parallel: T024 (create queues test), T025 (picker queues test)
 - In parallel: T027 (job) and T029 (job) after BackupService refactor task T026
 ### US3
 - In parallel: T017 (idempotency test), T018 (job behavior test)
 - In parallel: T021 (job notifications) and T023 (UI view enhancements) once results format is defined
 ### US4
 - In parallel: T031 (queues test), T032 (persistence test)
 - In parallel: T033 (job) and T035 (view reads persisted preview) once run persistence shape is agreed
 ---
 ## Implementation strategy
 - MVP (fastest value): deliver US1 first (policy snapshot capture becomes queued + idempotent + observable).
 - Next: US3 + US4 to fully de-risk restore execution and preview.
 - Then: US2 to eliminate inline Graph work from backup set flows.
 ## Format validation
 All tasks above follow the required checklist format:
 `- [ ] T### [P?] [US#?] Description with file path`