spec: add 094 assignment ops observability hardening

2026-02-15 13:39:34 +01:00 · 2026-02-15 13:39:34 +01:00 · c905f211a5
commit c905f211a5
parent 92a36ab89e
8 changed files with 787 additions and 0 deletions
--- a/specs/094-assignment-ops-observability-hardening/checklists/requirements.md
+++ b/specs/094-assignment-ops-observability-hardening/checklists/requirements.md
@ -0,0 +1,35 @@
+# Specification Quality Checklist: 094 Assignment Ops Observability Hardening
+
+**Purpose**: Validate specification completeness and quality before proceeding to planning
+**Created**: 2026-02-14
+**Feature**: [specs/094-assignment-ops-observability-hardening/spec.md](specs/094-assignment-ops-observability-hardening/spec.md)
+
+## Content Quality
+
+- [x] No implementation details (languages, frameworks, APIs)
+- [x] Focused on user value and business needs
+- [x] Written for non-technical stakeholders
+- [x] All mandatory sections completed
+
+## Requirement Completeness
+
+- [x] No [NEEDS CLARIFICATION] markers remain
+- [x] Requirements are testable and unambiguous
+- [x] Success criteria are measurable
+- [x] Success criteria are technology-agnostic (no implementation details)
+- [x] All acceptance scenarios are defined
+- [x] Edge cases are identified
+- [x] Scope is clearly bounded
+- [x] Dependencies and assumptions identified
+
+## Feature Readiness
+
+- [x] All functional requirements have clear acceptance criteria
+- [x] User scenarios cover primary flows
+- [x] Feature meets measurable outcomes defined in Success Criteria
+- [x] No implementation details leak into specification
+
+## Notes
+
+- Spec is intentionally ship-safety hardening only; no new domain features.
+- Spec was rewritten to follow the official template structure and removed code/file/class references.
--- a/specs/094-assignment-ops-observability-hardening/contracts/assignment-ops.openapi.yaml
+++ b/specs/094-assignment-ops-observability-hardening/contracts/assignment-ops.openapi.yaml
@ -0,0 +1,127 @@
+openapi: 3.0.3
+info:
+  title: TenantPilot - Assignment Operations (Internal)
+  version: "1.0"
+  description: |
+    Internal contract describing the user-triggered operation start surfaces and Monitoring read surfaces
+    relevant to assignment fetch/restore observability.
+
+servers:
+  - url: /
+
+paths:
+  /admin/t/{tenant}/backup-items/{backupItem}/assignments/fetch:
+    post:
+      summary: Start assignment fetch/enrichment
+      parameters:
+        - in: path
+          name: tenant
+          required: true
+          schema: { type: string }
+        - in: path
+          name: backupItem
+          required: true
+          schema: { type: integer }
+      responses:
+        "202":
+          description: Accepted; operation run created/reused and job enqueued
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/OperationRunStartResponse"
+        "403": { description: Forbidden (member missing capability) }
+        "404": { description: Not found (non-member / wrong plane) }
+
+  /admin/t/{tenant}/restore-runs/{restoreRun}/assignments/restore:
+    post:
+      summary: Start assignment restore
+      parameters:
+        - in: path
+          name: tenant
+          required: true
+          schema: { type: string }
+        - in: path
+          name: restoreRun
+          required: true
+          schema: { type: integer }
+      responses:
+        "202":
+          description: Accepted; operation run created/reused and job enqueued
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/OperationRunStartResponse"
+        "403": { description: Forbidden (member missing capability) }
+        "404": { description: Not found (non-member / wrong plane) }
+
+  /admin/monitoring/operations:
+    get:
+      summary: List operation runs (Monitoring)
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/OperationRunSummary"
+
+  /admin/monitoring/operations/{operationRun}:
+    get:
+      summary: Get operation run detail (Monitoring)
+      parameters:
+        - in: path
+          name: operationRun
+          required: true
+          schema: { type: integer }
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/OperationRunDetail"
+        "404": { description: Not found (non-member / no entitlement) }
+
+components:
+  schemas:
+    OperationRunStartResponse:
+      type: object
+      required: [operationRunId]
+      properties:
+        operationRunId:
+          type: integer
+
+    OperationRunSummary:
+      type: object
+      required: [id, type, status, outcome, createdAt]
+      properties:
+        id: { type: integer }
+        type: { type: string }
+        status: { type: string }
+        outcome: { type: string }
+        createdAt: { type: string, format: date-time }
+
+    OperationRunDetail:
+      allOf:
+        - $ref: "#/components/schemas/OperationRunSummary"
+        - type: object
+          properties:
+            startedAt: { type: string, format: date-time, nullable: true }
+            completedAt: { type: string, format: date-time, nullable: true }
+            context:
+              type: object
+              additionalProperties: true
+            failures:
+              type: array
+              items:
+                $ref: "#/components/schemas/FailureItem"
+
+    FailureItem:
+      type: object
+      required: [code, reasonCode, message]
+      properties:
+        code: { type: string }
+        reasonCode: { type: string }
+        message: { type: string }
--- a/specs/094-assignment-ops-observability-hardening/data-model.md
+++ b/specs/094-assignment-ops-observability-hardening/data-model.md
@ -0,0 +1,50 @@
+# Data Model — 094 Assignment Operations Observability Hardening
+
+This feature introduces **no new entities**. It strengthens how existing operational artifacts are created, deduplicated, and displayed.
+
+## Entities (existing)
+
+### OperationRun
+
+Represents a single observable operation for Monitoring.
+
+- Ownership:
+  - `workspace_id` required.
+  - `tenant_id` present for tenant-bound runs.
+- Key fields:
+  - `type`: operation type identifier (string).
+  - `status`: queued/running/completed.
+  - `outcome`: pending/succeeded/failed/etc.
+  - `run_identity_hash`: stable identity used for active-run dedupe.
+  - `context`: structured details for Monitoring (inputs, progress, counters).
+  - `failures`: array of failure items with stable `code`, normalized `reason_code`, and sanitized `message`.
+  - Counter-like fields may be stored in `context` depending on existing conventions.
+
+### AuditLog
+
+Immutable audit record for security/ops-relevant mutations.
+
+- Scope invariant: if `tenant_id` is present, `workspace_id` must also be present.
+- This spec requires exactly one audit entry per assignment restore execution.
+
+### RestoreRun
+
+Represents a restore execution request and its lifecycle. Assignment restores are performed under an existing restore run.
+
+### BackupItem
+
+Represents an item within a backup set; assignment fetch/enrichment is performed for a specific backup item.
+
+## Relationships (conceptual)
+
+- One tenant has many operation runs.
+- One restore run should be associated with one or more operation runs depending on orchestration; this spec requires that assignment restore execution is observable via at least one operation run.
+- One backup item can have a related fetch/enrichment operation run.
+
+## State transitions
+
+### OperationRun lifecycle
+
+- `queued` → `running` → `completed`
+- Outcome set on completion to indicate success or failure.
+
--- a/specs/094-assignment-ops-observability-hardening/plan.md
+++ b/specs/094-assignment-ops-observability-hardening/plan.md
@ -0,0 +1,174 @@
+# Implementation Plan: 094 Assignment Operations Observability Hardening
+
+**Branch**: `094-assignment-ops-observability-hardening` | **Date**: 2026-02-15 | **Spec**: [spec.md](spec.md)
+**Input**: Feature specification from [spec.md](spec.md)
+
+## Summary
+
+This work hardens ship-safety by making assignment fetch/restore executions fully observable in Monitoring (via `OperationRun`), improving Graph testability by using the Graph client interface, and closing a small set of authorization inconsistencies (cross-plane guard leak, policy bypass, and 404/403 ordering).
+
+## Technical Context
+
+**Language/Version**: PHP 8.4 (Laravel 12)  
+**Primary Dependencies**: Filament v5, Livewire v4, Laravel Sail, Microsoft Graph integration  
+**Storage**: PostgreSQL (Sail)  
+**Testing**: Pest v4 (`./vendor/bin/sail artisan test`)  
+**Target Platform**: Linux container runtime (Sail/Dokploy)  
+**Project Type**: Laravel web application (admin UI via Filament)  
+**Performance Goals**: Monitoring pages must remain DB-only and render quickly (no external calls during render).  
+**Constraints**: No secrets/tokens in logs or run failures; RBAC-UX semantics must match constitution (404 vs 403).  
+**Scale/Scope**: Small hardening change set; no new domain entities; focused on two jobs + a few UI/policy surfaces.
+
+## Constitution Check
+
+*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
+
+- Inventory-first: PASS (no inventory semantics changed).
+- Read/write separation: PASS (restore remains a user-triggered operation; change is observability/auditability).
+- Graph contract path: PASS-BY-DESIGN (plan includes removing concrete client injections in assignment-related services).
+- Deterministic capabilities: PASS (no new capability model; micro-fixes ensure enforcement uses canonical patterns).
+- RBAC-UX planes: PASS (plan includes closing remaining cross-plane guard leak; non-member 404, member missing capability 403).
+- Workspace isolation: PASS (operations remain tenant-bound; Monitoring remains DB-only).
+- Destructive confirmations: PASS (no new destructive actions added; enforcement fixes must preserve confirmations).
+- Global search safety: N/A (no global search changes).
+- Tenant isolation: PASS (no cross-tenant views added; monitoring surfaces already entitlement-checked).
+- Run observability: PASS-BY-DESIGN (this spec exists to bring remaining assignment jobs under `OperationRun`).
+- Automation: PASS (dedupe identity clarified; existing partial unique index patterns used).
+- Data minimization: PASS (failures must be sanitized; no raw payloads stored).
+- Badge semantics: N/A (no badge mapping changes).
+- Filament UI Action Surface Contract: PASS (no new resources; micro-fixes must not remove inspect affordances).
+
+## Project Structure
+
+### Documentation (this feature)
+
+```text
+specs/094-assignment-ops-observability-hardening/
+├── plan.md
+├── spec.md
+├── tasks.md
+├── research.md
+├── data-model.md
+├── quickstart.md
+├── contracts/
+│   └── assignment-ops.openapi.yaml
+└── checklists/
+    └── requirements.md
+```
+
+### Source Code (repository root)
+
+```text
+app/
+├── Jobs/
+│   ├── FetchAssignmentsJob.php
+│   ├── RestoreAssignmentsJob.php
+│   └── Middleware/TrackOperationRun.php
+├── Services/
+│   ├── OperationRunService.php
+│   ├── AssignmentBackupService.php
+│   ├── AssignmentRestoreService.php
+│   └── Graph/
+│       ├── GraphClientInterface.php
+│       ├── MicrosoftGraphClient.php
+│       ├── NullGraphClient.php
+│       ├── AssignmentFetcher.php
+│       ├── AssignmentFilterResolver.php
+│       └── GroupResolver.php
+├── Filament/
+│   └── Resources/...
+└── Http/
+    └── Middleware/EnsureCorrectGuard.php
+
+routes/
+└── web.php
+
+tests/
+├── Feature/
+└── Unit/
+```
+
+**Structure Decision**: Use the existing Laravel structure; changes are limited to the job layer, Graph service DI, a few Filament surfaces, and targeted Pest tests.
+
+## Phase 0 — Outline & Research
+
+Artifacts:
+
+- [research.md](research.md)
+
+Research outcomes (resolved decisions):
+
+- Operation run identity & dedupe: tenant + type + target/scope.
+- Counters semantics: total attempted, processed succeeded, failed failed.
+- Failure convention: operation-specific `code` + normalized `reason_code`.
+- Audit log granularity: exactly one entry per restore execution.
+
+## Phase 1 — Design & Contracts
+
+Artifacts:
+
+- [data-model.md](data-model.md)
+- [contracts/assignment-ops.openapi.yaml](contracts/assignment-ops.openapi.yaml)
+- [quickstart.md](quickstart.md)
+
+Design notes:
+
+- No new persistent entities.
+- Operation run tracking must use existing dedupe/index patterns.
+- Monitoring surfaces remain DB-only.
+
+## Phase 1 — Agent Context Update
+
+Run:
+
+- `.specify/scripts/bash/update-agent-context.sh copilot`
+
+## Phase 1 — Constitution Check (re-evaluation)
+
+Re-check result: PASS expected once implementation removes concrete Graph client injections and adds OperationRun tracking for the two remaining assignment jobs.
+
+## Phase 2 — Implementation Plan
+
+### Step 1 — OperationRun tracking for assignment fetch/restore
+
+- Locate job dispatch/start surfaces.
+- Ensure each execution creates/reuses an `OperationRun`:
+  - Dedupe identity:
+    - Fetch: tenant + type + backup item (or equivalent policy-version identifier).
+    - Restore: tenant + type + restore run identifier.
+- Ensure Tracking middleware is applied and the job exposes a run handle per existing conventions.
+- Ensure failure details:
+  - `code`: operation-specific namespace.
+  - `reason_code`: normalized cause.
+  - message: sanitized.
+- Ensure counters match the agreed semantics.
+
+### Step 2 — Audit log for assignment restore execution
+
+- Ensure exactly one audit log entry is written per assignment restore execution.
+
+### Step 3 — Graph client interface enforcement
+
+- Update assignment-related services to accept the Graph client interface.
+
+### Step 4 — Authorization micro-fixes
+
+- Close cross-plane guard leak on workspace-scoped admin routes.
+- Remove any policy bypasses on Provider Connections list surfaces.
+- Fix membership (404) vs capability (403) ordering for backup item surfaces.
+- Ensure legacy UI enforcement helpers are not used where the canonical helper exists.
+
+### Step 5 — Tests + formatting
+
+- Add targeted Pest regression coverage for:
+  - operation run tracking (success/failure)
+  - guard leak
+  - policy enforcement
+  - 404/403 ordering
+  - Graph interface mockability
+- Run `./vendor/bin/sail bin pint --dirty`.
+- Run targeted tests then widen as needed.
+
+## Complexity Tracking
+
+No constitution violations are required for this feature.
--- a/specs/094-assignment-ops-observability-hardening/quickstart.md
+++ b/specs/094-assignment-ops-observability-hardening/quickstart.md
@ -0,0 +1,33 @@
+# Quickstart — 094 Assignment Operations Observability Hardening
+
+## Prerequisites
+
+- Docker + Laravel Sail
+- PHP/Composer dependencies installed (via Sail)
+
+## Setup
+
+- Start services: `./vendor/bin/sail up -d`
+- Install PHP deps (if needed): `./vendor/bin/sail composer install`
+- Run migrations: `./vendor/bin/sail artisan migrate`
+
+## Running tests (targeted)
+
+Run the smallest set first, then widen:
+
+- Feature tests added for this spec (once implemented):
+  - `./vendor/bin/sail artisan test --compact tests/Feature/Operations`
+  - `./vendor/bin/sail artisan test --compact tests/Feature/Auth`
+  - `./vendor/bin/sail artisan test --compact tests/Feature/Rbac`
+
+## Formatting
+
+- Run Pint on touched files: `./vendor/bin/sail bin pint --dirty`
+
+## Manual verification (admin)
+
+- Trigger an assignment fetch and confirm a Monitoring → Operations entry appears.
+- Trigger an assignment restore and confirm:
+  - Monitoring shows a run with failure codes (if any) and counters.
+  - Exactly one audit log entry is created for the restore execution.
+
--- a/specs/094-assignment-ops-observability-hardening/research.md
+++ b/specs/094-assignment-ops-observability-hardening/research.md
@ -0,0 +1,59 @@
+# Research — 094 Assignment Operations Observability Hardening
+
+This document resolves implementation-relevant questions raised by the spec and records the key decisions.
+
+## Decision 1 — Operation run identity + dedupe
+
+- Decision: Use **dedupe per tenant + operation type + operation target/scope**.
+  - Fetch target/scope: backup item identifier (or equivalent policy-version identifier).
+  - Restore target/scope: restore run identifier.
+- Rationale: Prevents operator confusion while allowing independent operations to proceed in parallel.
+- Alternatives considered:
+  - Dedupe per tenant only → collapses unrelated runs and obscures what is actually running.
+  - No dedupe → increases confusion and makes tests non-deterministic.
+
+## Decision 2 — How jobs become OperationRun-tracked
+
+- Decision: Prefer passing an existing operation run identifier into queued jobs when a user-triggered start surface exists; otherwise the job must create/reuse a canonical run using the standard service.
+- Rationale: Start surfaces should remain enqueue-only while ensuring a single umbrella run record exists for Monitoring.
+- Alternatives considered:
+  - Create runs only inside jobs → makes it harder to link UI initiation to the run and can lead to duplicate runs.
+
+## Decision 3 — Counters semantics
+
+- Decision: `total` = items attempted, `processed` = succeeded, `failed` = failed.
+- Rationale: Works for both read-only fetch and destructive restore, and is easy to interpret in Monitoring.
+- Alternatives considered:
+  - “total discovered” semantics → ambiguous when discovery and execution are separate steps.
+  - Different semantics per operation type → harder to reason about and to test.
+
+## Decision 4 — Failure structure: stable code + normalized reason_code
+
+- Decision: Use operation-specific failure `code` namespaces (e.g., `assignments.fetch_failed`, `assignments.restore_failed`) and store the underlying cause as a normalized `reason_code`.
+- Rationale: Operators can identify what failed (operation) and why (normalized cause) consistently.
+- Alternatives considered:
+  - Generic failure codes only → loses context; Monitoring becomes less actionable.
+  - Reusing unrelated restore codes for assignment operations → conflates domains and increases ambiguity.
+
+## Decision 5 — Audit log granularity for assignment restores
+
+- Decision: Write **exactly one audit log entry per assignment restore execution** (per restore run).
+- Rationale: Satisfies auditability while keeping log volume predictable and reviewable.
+- Alternatives considered:
+  - Per-item audit logs → noisy for large restores.
+  - Both summary + per-item → overkill for this hardening scope.
+
+## Decision 6 — Graph client abstraction
+
+- Decision: Ensure assignment-related services depend on the **Graph client interface**, not a concrete implementation.
+- Rationale: Enables deterministic non-production tests and allows safe stub/null implementations.
+- Alternatives considered:
+  - Concrete client type-hints → blocks mocking and makes tests fragile.
+
+## Decision 7 — Authorization semantics & cross-plane safety
+
+- Decision: Enforce deny-as-not-found (404) for non-members/cross-plane access and forbidden (403) for members lacking capability.
+- Rationale: Matches constitution RBAC-UX rules and prevents route/resource enumeration.
+- Alternatives considered:
+  - Using 403 for non-members → leaks existence and violates deny-as-not-found standard.
+
--- a/specs/094-assignment-ops-observability-hardening/spec.md
+++ b/specs/094-assignment-ops-observability-hardening/spec.md
@ -0,0 +1,137 @@
+# Feature Specification: Assignment Operations Observability Hardening
+
+**Feature Branch**: `094-assignment-ops-observability-hardening`  
+**Created**: 2026-02-14  
+**Status**: Draft  
+**Input**: User description: "Harden assignment operation observability and close remaining authorization inconsistencies so operations are fully traceable, diagnosable, and correctly access-controlled."
+
+## Spec Scope Fields *(mandatory)*
+
+- **Scope**: canonical-view
+- **Primary Routes**:
+  - Admin Monitoring → Operations (list + detail views)
+  - Provider Connections list
+  - Backup Sets list
+  - Restore Runs list
+  - Backup Items relationship view under a backup set
+- **Data Ownership**: operational activity records must be scoped to the correct workspace and tenant context where applicable; no new domain entities are introduced.
+- **RBAC**: all affected admin surfaces require workspace membership; actions that change configuration or trigger restores require explicit permissions as defined in the central capability registry.
+
+### Canonical-view Constraints
+
+- **Default behavior when tenant-context is active**: Monitoring → Operations defaults to showing operational records for the currently selected tenant context (if one is selected).
+- **Entitlement checks**: Monitoring → Operations MUST not reveal tenant-owned operational records unless the actor is entitled to that tenant scope within the active workspace context.
+
+## Clarifications
+
+### Session 2026-02-14
+
+- Q: For FR-005 / SC-004, what should “identity scope” mean for deduping “active” runs? → A: Dedupe per tenant and operation target/scope.
+- Q: For FR-003 (“summary counters”) what exact semantics should total / processed / failed follow? → A: total = items attempted; processed = succeeded; failed = failed.
+- Q: For FR-004 (“stable failure code”), which convention should we standardize on for assignment fetch/restore runs? → A: Operation-specific code namespaces + normalized reason_code for the cause.
+- Q: For FR-006 (identity scope = tenant + operation type + target/scope), which identifiers should define “target/scope” for these two operations? → A: Fetch targets the backup item (or policy version) identifier; restore targets the restore run identifier.
+- Q: For US1 / FR-002 (“restore is observable/auditable”), what audit log granularity do you want for assignment restore? → A: One audit log entry per assignment restore execution (per restore run).
+
+## User Scenarios & Testing *(mandatory)*
+
+### User Story 1 — Observe assignment operations end-to-end (Priority: P1)
+
+Workspace administrators need to see assignment-related operations (both read-only fetch and destructive restore) in Monitoring so they can confirm what ran, what changed, and why something failed, without relying on server logs.
+
+**Why this priority**: Assignment restore is high-risk; missing visibility creates operational and audit gaps.
+
+**Independent Test**: Trigger both an assignment fetch and an assignment restore; verify each produces a monitoring-visible run record with correct lifecycle and failure details.
+
+**Acceptance Scenarios**:
+
+1. **Given** an administrator triggers an assignment fetch, **When** the operation starts and completes, **Then** Monitoring shows a run record with start/end timestamps, final outcome, and summary counters.
+2. **Given** an administrator triggers an assignment restore, **When** the operation starts and completes, **Then** Monitoring shows a run record including a clear indication that it was a change-making operation.
+  - And exactly one audit log entry is written for the restore execution.
+3. **Given** an assignment fetch or restore fails due to an external dependency error, **When** the run completes, **Then** Monitoring shows a stable failure code and a sanitized, user-readable message.
+4. **Given** the same assignment operation is triggered multiple times concurrently for the same tenant and scope, **When** the system creates tracking records, **Then** the admin sees a single “active” run per identity (or an equivalent deduped representation).
+
+---
+
+### User Story 2 — Enforce correct access control semantics on affected admin surfaces (Priority: P2)
+
+Workspace administrators and platform operators must not be able to cross authentication “planes” accidentally, and the admin UI must not expose bypasses that let users initiate sensitive actions without authorization.
+
+**Why this priority**: Prevents cross-plane leakage and closes known authorization inconsistencies.
+
+**Independent Test**: Attempt access with the wrong authentication plane and with insufficient permissions; verify outcomes are deny-as-not-found (404) or forbidden (403) per policy.
+
+**Acceptance Scenarios**:
+
+1. **Given** a user is authenticated in a different auth plane, **When** they attempt to access workspace-scoped admin routes, **Then** the response is deny-as-not-found (404).
+2. **Given** a user is not a member of the workspace, **When** they attempt to view backup items under a backup set, **Then** the response is deny-as-not-found (404) and does not reveal record existence.
+3. **Given** a user is a workspace member but lacks the required permission, **When** they attempt a protected action (such as managing provider connections), **Then** the response is forbidden (403).
+
+---
+
+### User Story 3 — Validate assignment operations safely in non-production contexts (Priority: P3)
+
+Platform engineers need to validate that assignment operations behave correctly without requiring live external dependencies, so regressions can be caught early.
+
+**Why this priority**: Improves reliability and reduces the risk of shipping changes that only fail when external services are slow/unavailable.
+
+**Independent Test**: Run automated tests that simulate both successful and failing external interactions; verify monitoring records and authorization behaviors.
+
+**Acceptance Scenarios**:
+
+1. **Given** external interactions are simulated as “successful”, **When** the operation runs, **Then** the run is marked successful and includes expected summary counters.
+2. **Given** external interactions are simulated as “failing”, **When** the operation runs, **Then** the run is marked failed with a stable failure code and sanitized message.
+
+### Edge Cases
+
+- External dependency timeouts, throttling, or transient failures.
+- Retries and duplicate dispatches (ensure tracking remains coherent and non-spammy).
+- Missing or inconsistent tenant/workspace context (must fail safely, not leak).
+- Partial completion (some items processed, some failed): counters and failure details must remain interpretable.
+
+## Requirements *(mandatory)*
+
+### Functional Requirements
+
+- **FR-001**: The system MUST create and maintain an operational tracking record (OperationRun) for every assignment fetch operation execution.
+- **FR-002**: The system MUST create and maintain an operational tracking record (OperationRun) for every assignment restore operation execution.
+- **FR-003**: Tracking records MUST include lifecycle state (queued/running/completed), timestamps, outcome, and summary counters sufficient to understand progress and results.
+  - Counter semantics: `total` = items attempted, `processed` = succeeded, `failed` = failed.
+- **FR-004**: Failed runs MUST include a stable failure code and a sanitized, user-readable failure message.
+  - Failure convention: use operation-specific `code` namespaces for the run, and store the underlying cause as a normalized `reason_code`.
+- **FR-005**: The system MUST prevent duplicate “active” runs for the same tenant and identity scope (tenant + operation type + operation target/scope), or otherwise present a deduped representation that avoids operator confusion.
+- **FR-006**: Identity scope MUST be defined as:
+  - For assignment fetch operations: tenant + operation type + backup item identifier (or equivalent policy-version identifier).
+  - For assignment restore operations: tenant + operation type + restore run identifier.
+- **FR-007**: Monitoring pages MUST render using persisted operational data only (no outbound calls during page render).
+- **FR-008**: Cross-plane access MUST be deny-as-not-found (404) on affected routes.
+- **FR-009**: Authorization MUST follow consistent semantics:
+  - non-member / not entitled → 404 (deny-as-not-found)
+  - member without required permission → 403
+- **FR-010**: Any action that can change configuration or trigger a restore MUST be server-authorized; UI visibility MUST NOT be treated as authorization.
+- **FR-011**: Sensitive or destructive-like actions MUST require explicit confirmation.
+- **FR-012**: Each assignment restore execution MUST write exactly one audit log entry for the restore run.
+
+### Dependencies & Assumptions
+
+- Assignment fetch and assignment restore operations can be triggered by administrators or scheduled/queued execution paths.
+- Monitoring users have access to the Operations area only when they have appropriate workspace membership and permissions.
+- External dependency failures may occur and must be represented consistently (stable failure codes + sanitized messages).
+
+## UI Action Matrix *(mandatory when the admin UI is changed)*
+
+| Surface | Location | Header Actions | Inspect Affordance (List/Table) | Row Actions (max 2 visible) | Bulk Actions (grouped) | Empty-State CTA(s) | View Header Actions | Create/Edit Save+Cancel | Audit log? | Notes / Exemptions |
+|---|---|---|---|---|---|---|---|---|---|---|
+| Provider Connections | Admin → Provider Connections | Create connection (permission-gated) | View connection details | Edit / Delete (permission-gated; destructive confirms) | None | Create connection | N/A | Save + Cancel | Yes | Ensures no authorization bypass exists on header/empty-state CTAs. |
+| Backup Sets | Admin → Backups | None (unchanged) | View backup set | Actions unchanged | None | None | N/A | N/A | N/A | Only enforcement helper consistency is affected. |
+| Restore Runs | Admin → Restores | None (unchanged) | View restore run | Actions unchanged | None | None | N/A | N/A | Yes | Restore operations must be observable via Monitoring. |
+| Backup Items | Under a backup set | None | View backup item details | Actions unchanged | None | None | N/A | N/A | N/A | Membership/404 checks must occur before capability/403 checks. |
+| Monitoring → Operations | Admin → Monitoring | None | View operation run details | None | None | None | N/A | N/A | Yes | Read-only view; must not call external services during render. |
+
+## Success Criteria *(mandatory)*
+
+### Measurable Outcomes
+
+- **SC-001**: 100% of assignment fetch and assignment restore executions appear in Monitoring with a completed outcome (success or failure) and timestamps.
+- **SC-002**: For failed runs, operators can identify a stable failure code and a readable failure message from Monitoring within 60 seconds, without checking server logs.
+- **SC-003**: Automated tests verify deny-as-not-found (404) vs forbidden (403) semantics for the affected surfaces.
+- **SC-004**: Duplicate active-run confusion is eliminated: repeated triggers produce a single active run per identity scope (tenant + operation type + operation target/scope) or equivalent deduped visibility.
--- a/specs/094-assignment-ops-observability-hardening/tasks.md
+++ b/specs/094-assignment-ops-observability-hardening/tasks.md
@ -0,0 +1,172 @@
+# Tasks: 094 Assignment Operations Observability Hardening
+
+**Input**: Design documents from `specs/094-assignment-ops-observability-hardening/`
+**Prerequisites**: `specs/094-assignment-ops-observability-hardening/plan.md` (required), `specs/094-assignment-ops-observability-hardening/spec.md` (required), `specs/094-assignment-ops-observability-hardening/research.md`, `specs/094-assignment-ops-observability-hardening/data-model.md`, `specs/094-assignment-ops-observability-hardening/contracts/assignment-ops.openapi.yaml`
+
+**Tests**: Required (Pest) for runtime behavior changes.
+
+---
+
+## Phase 1: Setup (Shared Infrastructure)
+
+**Purpose**: Ensure the implementation has the correct local workflow context and that the existing conventions to extend are understood.
+
+- [X] T001 Run SpecKit prerequisites check via `.specify/scripts/bash/check-prerequisites.sh --json` and record `FEATURE_DIR` + `AVAILABLE_DOCS` for this feature
+- [X] T002 Review existing OperationRun tracking conventions in `app/Jobs/Middleware/TrackOperationRun.php` and `app/Services/OperationRunService.php`
+- [X] T003 Review existing OperationRun test patterns in `tests/Feature/TrackOperationRunMiddlewareTest.php` and `tests/Feature/OperationRunServiceTest.php`
+
+---
+
+## Phase 2: Foundational (Blocking Prerequisites)
+
+**Purpose**: Confirm the persistence + dedupe primitives exist and match the spec’s requirements before adding new run types and audits.
+
+**⚠️ CRITICAL**: Complete this phase before implementing any user story.
+
+- [X] T004 Verify `operation_runs` schema supports `run_identity_hash`, `summary_counts`, and `failure_summary` per `database/migrations/2026_01_16_180642_create_operation_runs_table.php`
+- [X] T005 Verify active-run dedupe constraints exist and align with `OperationRunService::ensureRunWithIdentity()` in `app/Services/OperationRunService.php` and `database/migrations/2026_02_10_004939_add_unique_index_for_backup_schedule_scheduled_operation_runs.php`
+- [X] T006 [P] Identify existing restore audit logging expectations to preserve by reviewing `tests/Feature/RestoreAuditLoggingTest.php` and `app/Services/Intune/AuditLogger.php`
+- [X] T034 [P] Verify active-run dedupe constraints cover the new assignment run identity shapes; if not, add a DB-enforced partial unique index/migration + regression test for dedupe behavior
+
+**Checkpoint**: Foundation ready — user story work can begin.
+
+---
+
+## Phase 3: User Story 1 — Observe assignment operations end-to-end (Priority: P1) 🎯 MVP
+
+**Goal**: Assignment fetch (during backup capture when assignments are included) and assignment restore (during restore execution) are observable via `OperationRun` with stable failure codes, normalized `reason_code`, correct counters, and exactly one audit log entry per assignment restore execution.
+
+**Independent Test**: Trigger (a) backup creation with assignments included and (b) a restore that applies assignments; verify `operation_runs` rows exist and `audit_logs` contains exactly one assignment-restore summary entry.
+
+### Tests for User Story 1 (write first)
+
+- [X] T007 [P] [US1] Add test for assignment restore OperationRun lifecycle + counters in `tests/Feature/Operations/AssignmentRestoreOperationRunTest.php`
+- [X] T008 [P] [US1] Add test for stable failure `code` + normalized `reason_code` on assignment restore failure in `tests/Feature/Operations/AssignmentRestoreOperationRunFailureTest.php`
+- [X] T009 [P] [US1] Add test that assignment restore writes exactly one audit log entry per restore execution in `tests/Feature/Audit/AssignmentRestoreAuditSummaryTest.php`
+- [X] T010 [P] [US1] Add test that backup capture with assignments included records an assignment-fetch OperationRun entry for the resulting backup item in `tests/Feature/Operations/AssignmentFetchOperationRunTest.php`
+- [X] T035 [P] [US1] Add regression test for Monitoring pages being DB-only at render time (no outbound calls) when viewing Operations list + detail in `tests/Feature/Monitoring/OperationsDbOnlyRenderTest.php`
+
+### Implementation for User Story 1
+
+- [X] T011 [US1] Add assignment-restore OperationRun creation + dedupe by `restore_run_id` in `app/Services/Intune/RestoreService.php` (use `OperationRunService::ensureRunWithIdentity()`; identity inputs include `restore_run_id`)
+- [X] T012 [US1] Update assignment-restore OperationRun summary counts (`total` attempted, `processed` succeeded, `failed` failed) in `app/Services/Intune/RestoreService.php` and `app/Services/OperationRunService.php`
+- [X] T013 [US1] Record stable failure `code` namespaces + normalized `reason_code` + sanitized messages for assignment restore failures in `app/Services/Intune/RestoreService.php` (leverage `OperationRunService` failure sanitization)
+- [X] T014 [US1] Remove per-assignment audit log emission from `app/Services/AssignmentRestoreService.php` (replace per-item `auditLogger->log(...)` calls with in-memory aggregation only)
+- [X] T015 [US1] Add exactly one audit log entry per assignment restore execution in `app/Services/Intune/RestoreService.php` using `app/Services/Intune/AuditLogger.php` (resourceType `restore_run`, resourceId = restore run id)
+- [X] T016 [US1] Add assignment-fetch OperationRun creation + dedupe keyed by `backup_item_id` in `app/Services/AssignmentBackupService.php` (wrap the Graph fetch inside `enrichWithAssignments()` using `OperationRunService::ensureRunWithIdentity()`)
+- [X] T017 [US1] Ensure assignment-related job classes can be OperationRun-tracked if they are used in the future by adding `public ?OperationRun $operationRun` + `middleware()` returning `TrackOperationRun` in `app/Jobs/FetchAssignmentsJob.php` and `app/Jobs/RestoreAssignmentsJob.php`
+- [X] T036 [US1] Identify and document all start surfaces that can trigger assignment fetch/restore, and ensure each path creates/reuses the same run identity (avoid “tracked in one path, untracked in another” gaps)
+
+**Checkpoint**: US1 delivers Monitoring visibility + auditability for assignment restore and assignment fetch during backup capture.
+
+---
+
+## Phase 4: User Story 2 — Enforce correct access control semantics on affected admin surfaces (Priority: P2)
+
+**Goal**: Cross-plane access returns 404, non-member access returns 404, and member-without-capability returns 403. Remove any authorization bypasses.
+
+**Independent Test**: Access the affected routes/surfaces with wrong guard and with insufficient permissions; assert 404 vs 403 semantics.
+
+### Tests for User Story 2 (write first)
+
+- [X] T018 [P] [US2] Add regression test for `/admin/w/{workspace}` guard enforcement (cross-plane 404) in `tests/Feature/Guards/AdminWorkspaceRoutesGuardTest.php`
+- [X] T019 [P] [US2] Add regression test ensuring Provider Connection create CTA does not bypass authorization in `tests/Feature/ProviderConnections/ProviderConnectionListAuthorizationTest.php`
+- [X] T020 [P] [US2] Add regression test for membership (404) before capability (403) in backup items relation manager in `tests/Feature/Rbac/BackupItemsRelationManagerSemanticsTest.php`
+
+### Implementation for User Story 2
+
+- [X] T021 [US2] Add missing `ensure-correct-guard:web` middleware to the `/admin/w/{workspace}` route group in `routes/web.php`
+- [X] T022 [US2] Remove the `->authorize(fn (): bool => true)` bypass from header and empty-state create actions in `app/Filament/Resources/ProviderConnectionResource/Pages/ListProviderConnections.php`
+- [X] T023 [US2] Fix membership (404) vs capability (403) ordering for backup items under a backup set in `app/Filament/Resources/BackupSetResource/RelationManagers/BackupItemsRelationManager.php`
+- [X] T024 [US2] Swap legacy enforcement helper imports to canonical RBAC helper in `app/Filament/Resources/BackupSetResource/Pages/ListBackupSets.php` and `app/Filament/Resources/RestoreRunResource/Pages/ListRestoreRuns.php`
+- [X] T037 [US2] Verify any destructive-like Provider Connections actions still require explicit confirmation (no regressions), and that server authorization remains enforced for both header and empty-state CTAs
+
+**Checkpoint**: US2 closes cross-plane leak + removes bypasses + restores correct 404/403 semantics.
+
+---
+
+## Phase 5: User Story 3 — Validate assignment operations safely in non-production contexts (Priority: P3)
+
+**Goal**: Assignment-related Graph services depend only on `GraphClientInterface`, enabling tests to run with `NullGraphClient` or fakes without concrete client coupling.
+
+**Independent Test**: Resolve the assignment Graph services from the container with Graph disabled and run a minimal flow that exercises the interface contract.
+
+### Tests for User Story 3 (write first)
+
+- [X] T025 [P] [US3] Add unit test asserting assignment Graph services resolve with `GraphClientInterface` binding in `tests/Feature/Graph/AssignmentGraphServiceResolutionTest.php`
+- [X] T026 [P] [US3] Add test using a fake `GraphClientInterface` to simulate assignment fetch failures without real HTTP in `tests/Feature/Operations/AssignmentFetchOperationRunFailureTest.php`
+
+### Implementation for User Story 3
+
+- [X] T027 [P] [US3] Replace `MicrosoftGraphClient` constructor type-hint with `GraphClientInterface` in `app/Services/Graph/AssignmentFetcher.php`
+- [X] T028 [P] [US3] Replace `MicrosoftGraphClient` constructor type-hint with `GraphClientInterface` in `app/Services/Graph/GroupResolver.php`
+- [X] T029 [P] [US3] Replace `MicrosoftGraphClient` constructor type-hint with `GraphClientInterface` in `app/Services/Graph/AssignmentFilterResolver.php`
+- [X] T030 [US3] Confirm container bindings remain canonical and no concrete client is injected directly by reviewing `app/Providers/AppServiceProvider.php`
+
+**Checkpoint**: US3 enables deterministic tests and non-prod validation for assignment operations.
+
+---
+
+## Phase 6: Polish & Cross-Cutting Concerns
+
+**Purpose**: Formatting + targeted verification commands + confidence checks.
+
+- [X] T031 [P] Run formatting for touched files via `./vendor/bin/sail bin pint --dirty` (see `specs/094-assignment-ops-observability-hardening/quickstart.md`)
+- [X] T032 Run targeted tests via `./vendor/bin/sail artisan test --compact tests/Feature/Operations tests/Feature/Rbac tests/Feature/Guards tests/Feature/Audit` (see `specs/094-assignment-ops-observability-hardening/quickstart.md`)
+- [X] T033 Run the full test suite via `./vendor/bin/sail artisan test --compact` (see `specs/094-assignment-ops-observability-hardening/quickstart.md`)
+- [X] T038 Update any requirement references in tasks/spec if FR numbering changes (keep traceability from FR-001..FR-012 to the task IDs)
+
+---
+
+## Dependencies & Execution Order
+
+### Phase Dependencies
+
+- **Setup (Phase 1)**: No dependencies.
+- **Foundational (Phase 2)**: Depends on Phase 1.
+- **US1 (Phase 3)**: Depends on Phase 2. No dependency on US2/US3.
+- **US2 (Phase 4)**: Depends on Phase 2. Independent from US1/US3.
+- **US3 (Phase 5)**: Depends on Phase 2. Independent from US1/US2.
+- **Polish (Phase 6)**: Depends on completing the desired stories.
+
+### User Story Dependencies
+
+- **US1 (P1)**: Standalone MVP.
+- **US2 (P2)**: Standalone hardening.
+- **US3 (P3)**: Standalone testability hardening.
+
+---
+
+## Parallel Example: User Story 1
+
+Parallelizable test tasks: T007, T008, T009, T010 (different files under `tests/Feature/...`).
+
+---
+
+## Parallel Example: User Story 2
+
+Parallelizable test tasks: T018, T019, T020 (different files under `tests/Feature/...`).
+
+Parallelizable implementation tasks (after tests land): T021 + T022 (different files under `routes/` vs `app/Filament/...`).
+
+---
+
+## Parallel Example: User Story 3
+
+Parallelizable implementation tasks: T027, T028, T029 (different files under `app/Services/Graph/...`).
+
+---
+
+## Implementation Strategy
+
+### MVP First (User Story 1 Only)
+
+1. Complete Phase 1 (Setup) + Phase 2 (Foundational)
+2. Complete Phase 3 (US1) including tests
+3. Validate Monitoring visibility + audit log semantics
+
+### Incremental Delivery
+
+1. US1 → deploy/demo
+2. US2 → deploy/demo
+3. US3 → deploy/demo