From d96abc65fb177d495d843db7a42d1e3b5b5a62c0 Mon Sep 17 00:00:00 2001 From: ahmido Date: Sun, 26 Apr 2026 15:43:47 +0000 Subject: [PATCH] Remove Findings lifecycle backfill operational surface (controls slice) (#280) Removes the Findings lifecycle backfill from the Operational Controls UI and OperationalControlCatalog. This patch is a safe, controls-only change; runbooks, jobs and other runtime artifacts are NOT removed yet. Follow-up work will delete the runbook service/scope, jobs, commands, and update tests. Files changed: - apps/platform/app/Filament/System/Pages/Ops/Controls.php - apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php - apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php - apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php - apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php Co-authored-by: Ahmed Darrazi Reviewed-on: https://git.cloudarix.de/ahmido/TenantAtlas/pulls/280 --- .github/skills/spec-kit-end-to-end/SKILL.md | 939 ++++++++++++++++++ .../spec-kit-next-best-one-shot/SKILL.md | 398 -------- .../skills/spec-kit-one-shot-prep/SKILL.md | 294 ------ .../TenantpilotBackfillFindingLifecycle.php | 9 + .../Commands/TenantpilotRunDeployRunbooks.php | 5 + .../FindingResource/Pages/ListFindings.php | 118 ++- .../Filament/Resources/RestoreRunResource.php | 125 ++- .../Filament/System/Pages/Ops/Controls.php | 660 ++++++++++++ .../Filament/System/Pages/Ops/Runbooks.php | 23 +- .../Models/OperationalControlActivation.php | 73 ++ .../Services/Audit/WorkspaceAuditLogger.php | 17 +- ...indingsLifecycleBackfillRunbookService.php | 182 +++- .../app/Support/Audit/AuditActionId.php | 12 + .../app/Support/Auth/PlatformCapabilities.php | 2 + .../OperationalControlBlockedException.php | 31 + .../OperationalControlCatalog.php | 56 ++ .../OperationalControlDecision.php | 81 ++ .../OperationalControlEvaluator.php | 63 ++ apps/platform/config/tenantpilot.php | 3 - .../OperationalControlActivationFactory.php | 55 + ..._operational_control_activations_table.php | 38 + .../database/seeders/PlatformUserSeeder.php | 1 + .../system/pages/ops/controls.blade.php | 120 +++ .../ops/partials/controls-header.blade.php | 6 + .../operational-control-history.blade.php | 29 + .../AdminFindingsNoMaintenanceActionsTest.php | 5 +- ...ationalControlFindingsBackfillGateTest.php | 101 ++ .../NoAdHocOperationalControlBypassTest.php | 69 ++ ...ionalControlAuthorizationSemanticsTest.php | 133 +++ ...ationalControlRestoreExecutionGateTest.php | 261 +++++ .../OperationalControlManagementTest.php | 243 +++++ .../OperationalControlRunbookGateTest.php | 89 ++ .../OperationalControlCatalogTest.php | 26 + .../OperationalControlEvaluatorTest.php | 45 + .../OperationalControlScopeResolutionTest.php | 57 ++ .../checklists/requirements.md | 34 + .../operational-controls.contract.yaml | 153 +++ specs/242-operational-controls/data-model.md | 164 +++ specs/242-operational-controls/plan.md | 232 +++++ specs/242-operational-controls/quickstart.md | 50 + specs/242-operational-controls/research.md | 133 +++ specs/242-operational-controls/spec.md | 290 ++++++ specs/242-operational-controls/tasks.md | 187 ++++ 43 files changed, 4794 insertions(+), 818 deletions(-) create mode 100644 .github/skills/spec-kit-end-to-end/SKILL.md delete mode 100644 .github/skills/spec-kit-next-best-one-shot/SKILL.md delete mode 100644 .github/skills/spec-kit-one-shot-prep/SKILL.md create mode 100644 apps/platform/app/Filament/System/Pages/Ops/Controls.php create mode 100644 apps/platform/app/Models/OperationalControlActivation.php create mode 100644 apps/platform/app/Support/OperationalControls/OperationalControlBlockedException.php create mode 100644 apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php create mode 100644 apps/platform/app/Support/OperationalControls/OperationalControlDecision.php create mode 100644 apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php create mode 100644 apps/platform/database/factories/OperationalControlActivationFactory.php create mode 100644 apps/platform/database/migrations/2026_04_26_000000_create_operational_control_activations_table.php create mode 100644 apps/platform/resources/views/filament/system/pages/ops/controls.blade.php create mode 100644 apps/platform/resources/views/filament/system/pages/ops/partials/controls-header.blade.php create mode 100644 apps/platform/resources/views/filament/system/pages/ops/partials/operational-control-history.blade.php create mode 100644 apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php create mode 100644 apps/platform/tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php create mode 100644 apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php create mode 100644 apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php create mode 100644 apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php create mode 100644 apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php create mode 100644 apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php create mode 100644 apps/platform/tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php create mode 100644 apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php create mode 100644 specs/242-operational-controls/checklists/requirements.md create mode 100644 specs/242-operational-controls/contracts/operational-controls.contract.yaml create mode 100644 specs/242-operational-controls/data-model.md create mode 100644 specs/242-operational-controls/plan.md create mode 100644 specs/242-operational-controls/quickstart.md create mode 100644 specs/242-operational-controls/research.md create mode 100644 specs/242-operational-controls/spec.md create mode 100644 specs/242-operational-controls/tasks.md diff --git a/.github/skills/spec-kit-end-to-end/SKILL.md b/.github/skills/spec-kit-end-to-end/SKILL.md new file mode 100644 index 00000000..6a1c0ef9 --- /dev/null +++ b/.github/skills/spec-kit-end-to-end/SKILL.md @@ -0,0 +1,939 @@ +--- +name: spec-kit-end-to-end +description: End-to-end Spec Kit workflow for TenantPilot/TenantAtlas: select the next suitable spec candidate from roadmap/spec-candidates when needed, create or update spec.md/plan.md/tasks.md, optionally implement the active spec, run tests, browser smoke checks where applicable, post-implementation analysis, fix confirmed findings, and repeat until no in-scope findings remain or a stop condition is reached. +--- + +# Skill: Spec Kit End-to-End Workflow + +## Purpose + +Use this skill to run an end-to-end Spec Kit workflow for TenantPilot/TenantAtlas. + +This skill supports three modes: + +1. **Preparation only**: select or scope the next suitable feature from roadmap/spec-candidates and create or update `spec.md`, `plan.md`, and `tasks.md`. +2. **Implementation only**: implement an already prepared spec, run tests/checks, run strict post-implementation analysis, fix confirmed findings, and repeat until clean or a bounded stop condition is reached. +3. **End-to-end**: select or create a spec and then implement it in the same invocation, but only when the user explicitly requests end-to-end execution. + +The intended workflow is: + +```text +feature idea / roadmap item / spec candidate / active spec +→ determine requested mode +→ inspect repo truth, constitution, roadmap, spec candidates, existing specs, and relevant code +→ create or update spec.md + plan.md + tasks.md when preparation is needed +→ evaluate quality gates +→ implement only when the user explicitly asks for implementation or end-to-end execution +→ run relevant tests/checks +→ run browser smoke test when UI/user-facing flows are affected +→ run strict post-implementation analysis +→ fix confirmed in-scope findings +→ repeat test + analysis + fix loop until clean or bounded stop condition is reached +→ final report +``` + +## When to Use + +Use this skill when the user asks for any Spec Kit workflow around TenantPilot/TenantAtlas, including: + +- selecting the next best spec candidate from `docs/product/spec-candidates.md` and roadmap sources +- turning a feature idea, roadmap item, or candidate into `spec.md`, `plan.md`, and `tasks.md` +- preparing Spec Kit artifacts in one pass +- implementing an existing or newly prepared spec +- running implementation followed by strict analysis and fix iterations +- executing a full end-to-end flow from candidate selection to implementation verification + +Typical user prompts: + +```text +Nimm den nächsten sinnvollen Spec Candidate aus Roadmap/spec-candidates und mach spec, plan und tasks. +``` + +```text +Mach daraus spec, plan und tasks in einem Rutsch, aber noch nicht implementieren. +``` + +```text +Erstelle die Spec Kit Artefakte und implementiere sie danach mit Analyse/Fix-Loop. +``` + +```text +Implementiere die aktive Spec und analysiere danach, ob alles passt. +``` + +```text +Mach Spec Kit implement und danach analyse. Behebe alle Abweichungen und wiederhole bis sauber. +``` + +```text +Run end-to-end: choose next spec, create spec/plan/tasks, implement, analyze, fix until no in-scope findings remain. +``` + +## Hard Rules + +- Work strictly repo-based. +- Use the repository's actual Spec Kit workflow, scripts, templates, branch naming rules, and generated paths when available. +- Determine the requested mode before changing files: + - preparation only + - implementation only + - end-to-end preparation plus implementation +- Do not implement application code unless the user explicitly asks for implementation, `implement`, or end-to-end execution. +- When in preparation-only mode, create or update only Spec Kit preparation artifacts unless repository conventions require additional documentation artifacts. +- When in implementation mode, implement only the active or explicitly named Spec Kit feature. +- Do not manually invent spec numbers, branch names, or spec paths if Spec Kit provides a script or command for that. +- Do not bypass Spec Kit branch mechanics. +- Do not expand scope beyond the selected feature, `spec.md`, `plan.md`, and `tasks.md`. +- Do not silently add roadmap features, adjacent UX rewrites, speculative architecture, or unrelated refactors. +- Follow the repository constitution and existing Spec Kit conventions. +- Preserve TenantPilot/TenantAtlas terminology. +- Prefer small, reviewable, implementation-ready specs and patches over broad rewrites. +- Treat repository truth as authoritative over assumptions. +- If repository truth conflicts with the user-provided draft or spec, keep repository truth and document the deviation. +- If repository truth conflicts with implementation scope, stop and report the conflict unless there is an obvious minimal correction inside active spec scope. +- Fix only confirmed findings from tests, static checks, or post-implementation analysis. +- Fix all confirmed in-scope findings, regardless of severity, when they are safe and bounded. +- Do not leave Medium/Low findings open silently. If they are not fixed, document exactly why. +- Never hide failing tests, weaken assertions, delete meaningful coverage, or mark tasks complete without implementation evidence. +- Do not run destructive commands. +- Do not force checkout, reset, stash, rebase, merge, or delete branches. +- Do not perform database-destructive actions unless the repository test workflow explicitly requires isolated test database resets. +- Do not continue analysis/fix loops indefinitely. +- Do not move from preparation to implementation unless the Spec Readiness Gate passes or the user explicitly accepts the documented readiness risks. +- Do not move from implementation to final status unless the Test Gate, Browser Smoke Test Gate where applicable, and Post-Implementation Analysis Gate have been evaluated. +- Do not claim merge-readiness unless the Merge Readiness Gate passes. + +## Required Inputs + +The user should provide at least one of: + +- feature title and short goal +- full spec candidate +- roadmap item +- rough problem statement +- UX or architecture improvement idea +- explicit spec directory such as `specs/-/` +- instruction to use the current active Spec Kit feature +- instruction to choose the next best candidate from roadmap/spec-candidates + +If the input is incomplete, proceed with the smallest reasonable interpretation and document assumptions. + +If implementation is requested but the active spec cannot be determined safely, inspect the repository Spec Kit context first. If it is still ambiguous, stop and ask for the specific spec directory. + +## Required Repository Checks + +Always check the sources relevant to the requested mode. + +For preparation mode, always check: + +1. `.specify/memory/constitution.md` +2. `.specify/templates/` +3. `.specify/scripts/` +4. existing Spec Kit command usage or repository instructions, if present +5. current branch and git status +6. `specs/` +7. `docs/product/spec-candidates.md` +8. relevant roadmap documents under `docs/product/`, especially `roadmap.md` if present +9. nearby existing specs with related terminology or scope +10. application code only as needed to avoid wrong naming, wrong architecture, duplicate concepts, impossible tasks, duplicated specs, or already-completed candidates + +For implementation mode, always check: + +1. active Spec Kit context / current branch +2. git status +3. `.specify/memory/constitution.md` +4. the active spec directory +5. `spec.md` +6. `plan.md` +7. `tasks.md` +8. relevant templates or conventions under `.specify/templates/` +9. nearby existing specs with related terminology or scope +10. application code surfaces referenced by the active spec +11. existing tests related to the changed behavior + +## Git and Branch Safety + +Before running any Spec Kit command or making implementation changes: + +1. Check the current branch. +2. Check whether the working tree is clean. +3. If there are unrelated uncommitted changes, stop and report them. Do not continue. +4. If the working tree only contains user-intended planning edits for this operation, continue cautiously. +5. Let Spec Kit create or switch to the correct feature branch when that is how the repository workflow works. +6. Do not force checkout, reset, stash, rebase, merge, or delete branches. +7. Do not overwrite existing specs. + +If the repo requires an explicit branch creation script for `specify`, use that script rather than manually creating the branch. + +## Mode Selection + +Select exactly one mode per invocation unless the user explicitly asks for end-to-end execution. + +### Preparation Only + +Use when the user asks to: + +- create spec/plan/tasks +- prepare a feature +- choose the next best spec candidate +- turn roadmap/spec-candidates into a spec +- run specify/plan/tasks/analyze without implementation +- avoid implementation + +Output is limited to Spec Kit preparation artifacts, preparation-artifact fixes, and final preparation summary. + +### Implementation Only + +Use when the user asks to: + +- implement an active spec +- run Spec Kit implement +- analyze after implementation +- fix implementation findings + +Requires an existing active or explicitly named spec. + +### End-to-End + +Use only when the user explicitly asks to: + +- choose/create the spec and then implement it +- run the full workflow +- go from candidate to implementation +- prepare and implement in one pass + +End-to-end mode must keep preparation and implementation phases clearly separated. + +End-to-end mode must pass the Candidate Selection Gate and Spec Readiness Gate before implementation begins. + +## Quality Gates + +Quality gates are mandatory checkpoints. They make the workflow safe for agentic execution without allowing uncontrolled scope expansion. + +### Gate 1: Candidate Selection Gate + +Required before creating a new spec from roadmap/spec-candidates. + +Pass criteria: + +- The selected candidate exists in roadmap/spec-candidate material or is directly provided by the user. +- The selected candidate is not already covered by an existing active or completed spec. +- The selected candidate aligns with current roadmap priorities or explicitly documented product direction. +- The candidate can be scoped as a small, reviewable, implementation-ready slice. +- Major adjacent concerns are listed as follow-up candidates instead of being hidden inside the primary scope. + +Fail behavior: + +- If no candidate satisfies the gate, stop and report the top candidates plus the reason none is ready. +- Do not invent a new roadmap direction to force progress. + +### Gate 2: Spec Readiness Gate + +Required before implementation starts, including end-to-end mode. + +Pass criteria: + +- `spec.md`, `plan.md`, and `tasks.md` exist. +- The spec has clear problem statement, user value, functional requirements, out-of-scope boundaries, acceptance criteria, assumptions, and risks. +- The plan identifies likely affected repo surfaces and does not contradict repository architecture. +- The tasks are small, ordered, verifiable, and include test/validation tasks. +- RBAC, workspace/tenant isolation, auditability, OperationRun semantics, evidence/result-truth, and UX requirements are addressed where relevant. +- No open question blocks safe implementation. +- The scope is small enough for a bounded implementation loop. + +Fail behavior: + +- In preparation-only mode, report the readiness gaps and provide the manual analysis prompt. +- In end-to-end mode, stop before implementation unless the user explicitly asked to proceed despite the documented readiness risks. +- Do not compensate for an unclear spec by inventing implementation scope. + +### Gate 3: Implementation Scope Gate + +Required before changing application code. + +Pass criteria: + +- The active spec directory is known. +- The implementation target is traceable to specific tasks in `tasks.md`. +- The affected files/surfaces are consistent with `plan.md` or clearly justified by repository truth. +- No required change would introduce unrelated product behavior. +- No required change conflicts with constitution, existing architecture, RBAC/isolation boundaries, or source-of-truth semantics. + +Fail behavior: + +- Stop before code changes and report the conflict or ambiguity. +- Suggest a minimal spec/plan/tasks correction if the issue is in the artifacts rather than the codebase. + +### Gate 4: Test Gate + +Required after implementation and after each fix iteration. + +Pass criteria: + +- Targeted tests for changed behavior pass. +- Relevant existing tests pass or failures are proven unrelated and documented. +- Static analysis, linting, formatting, or type checks used by the repository pass when applicable. +- Security/governance-relevant changes have backend, policy, or domain coverage; UI-only verification is not enough. +- Regression coverage exists for each fixed Blocker or High finding where practical. + +Fail behavior: + +- Fix in-scope failures before post-implementation analysis. +- If failures are unrelated or pre-existing, document evidence and continue only if they do not invalidate the active spec. +- Do not weaken tests to pass the gate. + +### Gate 5: Browser Smoke Test Gate + +Required before claiming implementation is ready for manual review/merge when the change affects Filament UI, Livewire interactions, navigation, forms, tables, actions, modals, dashboards, operation drilldowns, tenant/workspace context, or any user-facing flow. + +Not required for documentation-only, spec-only, backend-only, domain-only, enum-only, contract-only, or test-only changes unless those changes alter a user-facing flow. + +Pass criteria: + +- The relevant page or flow loads in a real browser or the repository's browser-testing harness. +- The primary action introduced or changed by the spec can be executed successfully. +- Expected UI states, labels, badges, actions, empty states, tables, forms, modals, and navigation are visible where relevant. +- Workspace/tenant context is preserved across the tested flow where relevant. +- RBAC/capability-dependent visibility behaves as expected where practical to verify. +- Livewire interactions complete without visible runtime errors. +- No relevant browser console errors occur. +- No failed network requests occur for the tested flow, except known unrelated development noise that is explicitly documented. +- OperationRun, audit, evidence, result, or support-diagnostic drilldowns work where relevant. +- The smoke-tested path is documented in the final response. + +Fail behavior: + +- Fix in-scope browser, UX, Livewire, navigation, or runtime failures before claiming merge-readiness. +- If a browser issue is unrelated existing debt, document evidence and residual risk. +- Do not treat a passing browser smoke test as a substitute for backend, policy, domain, security, feature, or integration tests. +- Do not expand the smoke test into a full E2E suite unless the user explicitly asks for that. + +### Gate 6: Post-Implementation Analysis Gate + +Required after implementation and after each fix iteration. + +Pass criteria: + +- The implementation has been checked against `spec.md`, `plan.md`, `tasks.md`, and constitution. +- All completed tasks have implementation evidence. +- No confirmed in-scope findings remain. +- Medium/Low findings are fixed when they are inside active spec scope, clearly bounded, and safe. +- Medium/Low findings that remain open are explicitly documented with one of these reasons: + - out of scope + - requires separate spec + - risky refactor + - existing unrelated debt + - not reproducible + - blocked by unclear product/architecture decision +- No scope expansion was introduced during fixes. + +Fail behavior: + +- Fix confirmed in-scope findings, regardless of severity, when the fix is safe and bounded. +- Stop instead of fixing when remediation would expand scope, contradict repo architecture, introduce risky refactors, or repeat the same failed fix twice. + +### Gate 7: Merge Readiness Gate + +Required before claiming the implementation is ready for manual review/merge. + +Pass criteria: + +- Spec Readiness Gate passed. +- Implementation Scope Gate passed. +- Test Gate passed. +- Browser Smoke Test Gate passed when applicable, or was explicitly marked not applicable with a reason. +- Post-Implementation Analysis Gate passed. +- `tasks.md` reflects actual completion status. +- No confirmed in-scope findings remain. +- All remaining findings are documented as out-of-scope, follow-up candidates, unrelated existing debt, or explicit residual risks. +- Final response includes changed files, tests/checks run, iterations performed, residual risks, and follow-up candidates. + +Fail behavior: + +- Do not claim merge-readiness. +- Report the failed gate, remaining risks, and the smallest recommended next action. + +## Candidate Selection Rules + +When the user asks for the next best spec from roadmap/spec-candidates: + +- Read `docs/product/spec-candidates.md`. +- Read relevant roadmap documents under `docs/product/`, especially `roadmap.md` if present. +- Check existing specs to avoid duplicates. +- Prefer candidates that align with current roadmap priorities, platform foundations, enterprise UX, RBAC/isolation, auditability, observability, and governance workflow maturity. +- Prefer candidates that unlock roadmap progress, reduce architectural drift, harden foundations, or remove known blockers. +- Prefer small, implementation-ready slices over broad platform rewrites. +- If multiple candidates are plausible, choose one primary candidate and document why it was selected. +- Add non-selected relevant candidates as follow-up spec candidates, not hidden scope. +- Do not invent a candidate if existing roadmap/spec-candidate material provides a suitable one. +- Do not pick a spec only because it is listed first. +- Evaluate the Candidate Selection Gate before creating the spec directory. + +Evaluate candidates using these criteria: + +1. **Roadmap Fit**: Does it support the current roadmap sequence or unlock the next roadmap layer? +2. **Foundation Value**: Does it strengthen reusable platform foundations such as RBAC, isolation, auditability, evidence, OperationRun observability, provider boundaries, vocabulary, baseline/control/finding semantics, or enterprise UX patterns? +3. **Dependency Unblocking**: Does it make future specs smaller, safer, or more consistent? +4. **Scope Size**: Can it be implemented as a narrow, testable slice? +5. **Repo Readiness**: Does the repo already have enough structure to implement the next slice safely? +6. **Risk Reduction**: Does it reduce current architectural or product risk? +7. **User/Product Value**: Does it produce visible operator value or make the platform more sellable without heavy scope? + +## Required Selection Output Before Spec Kit Execution + +Before running the Spec Kit flow, identify: + +- selected candidate title +- source location in roadmap/spec-candidates +- why it was selected +- why close alternatives were deferred +- roadmap relationship +- smallest viable implementation slice +- proposed concise feature description to feed into `specify` + +The feature description must be product- and behavior-oriented. It should not be a low-level implementation plan. + +## Spec Kit Preparation Flow + +Use this section when the selected mode is preparation-only or end-to-end. + +### Step 1: Determine the repository's Spec Kit command pattern + +Inspect repository instructions and scripts to identify how this repo expects Spec Kit to be run. + +Common locations to inspect: + +```text +.specify/scripts/ +.specify/templates/ +.specify/memory/constitution.md +.github/prompts/ +.github/skills/ +README.md +specs/ +``` + +Use the repo-specific mechanism if present. + +### Step 2: Run `specify` + +Run the repository's `specify` flow using the selected candidate and the smallest viable slice. + +The `specify` input should include: + +- selected candidate title +- problem statement +- operator/user value +- roadmap relationship +- out-of-scope boundaries +- key acceptance criteria +- important enterprise constraints + +Let Spec Kit create the correct branch and spec location if that is the repo's configured behavior. + +### Step 3: Run `plan` + +Run the repository's `plan` flow for the generated spec. + +The `plan` input should keep the scope tight and should require repo-based alignment with: + +- constitution +- existing architecture +- workspace/tenant isolation +- RBAC +- OperationRun/observability where relevant +- evidence/snapshot/truth semantics where relevant +- Filament/Livewire conventions where relevant +- test strategy + +### Step 4: Run `tasks` + +Run the repository's `tasks` flow for the generated plan. + +The generated tasks must be: + +- ordered +- small +- testable +- grouped by phase +- limited to the selected scope +- suitable for later implementation or manual analysis before implementation + +### Step 5: Run preparation `analyze` + +Run the repository's `analyze` flow against the generated Spec Kit artifacts when the repository supports it. + +Analyze must check: + +- consistency between `spec.md`, `plan.md`, and `tasks.md` +- constitution alignment +- roadmap alignment +- whether the selected candidate was narrowed safely +- whether tasks are complete enough for implementation +- whether tasks accidentally require scope not described in the spec +- whether plan details conflict with repository architecture or terminology +- whether implementation risks are documented instead of silently ignored + +In preparation-only mode, do not use analyze as a trigger to implement application code. + +### Step 6: Fix preparation-artifact issues only + +If preparation analyze finds issues, fix only Spec Kit preparation artifacts such as: + +- `spec.md` +- `plan.md` +- `tasks.md` +- generated Spec Kit metadata files, if the repository uses them + +Allowed fixes include: + +- clarify requirements +- tighten scope +- move out-of-scope work into follow-up candidates +- correct terminology +- add missing tasks +- remove tasks not backed by the spec +- align plan language with repository architecture +- add missing acceptance criteria or validation tasks + +Forbidden fixes in preparation-only mode include: + +- modifying application code +- creating migrations +- editing models, services, jobs, policies, Filament resources, Livewire components, tests, or commands +- running implementation or test-fix loops +- changing runtime behavior + +### Step 7: Evaluate the Spec Readiness Gate + +After preparation analyze has passed or preparation-artifact issues have been fixed, evaluate the Spec Readiness Gate. + +In preparation-only mode, stop after this gate and do not implement. + +## Spec Directory Rules + +When creating a new spec directory, use the repository's Spec Kit-generated directory or path. + +If the repository does not provide a command for spec setup, use the next valid spec number and a kebab-case slug: + +```text +specs/-/ +``` + +The exact number must be derived from the current repository state and existing numbering conventions. + +Create or update preparation artifacts inside the selected spec directory: + +```text +specs/-/spec.md +specs/-/plan.md +specs/-/tasks.md +``` + +If the repository templates require additional preparation files, create them only when this is consistent with existing Spec Kit conventions. + +## `spec.md` Requirements + +The spec must be product- and behavior-oriented. It should avoid premature implementation detail unless needed for correctness. + +Include: + +- Feature title +- Problem statement +- Business/product value +- Primary users/operators +- User stories +- Functional requirements +- Non-functional requirements +- UX requirements +- RBAC/security requirements +- Auditability/observability requirements +- Data/truth-source requirements where relevant +- Out of scope +- Acceptance criteria +- Success criteria +- Risks +- Assumptions +- Open questions + +TenantPilot/TenantAtlas specs should preserve enterprise SaaS principles: + +- workspace/tenant isolation +- capability-first RBAC +- auditability +- operation/result truth separation +- source-of-truth clarity +- calm enterprise operator UX +- progressive disclosure where useful +- no false positive calmness + +## `plan.md` Requirements + +The plan must be repo-aware and implementation-oriented, but it must not make code changes by itself. + +Include: + +- Technical approach +- Existing repository surfaces likely affected +- Domain/model implications +- UI/Filament implications +- Livewire implications where relevant +- OperationRun/monitoring implications where relevant +- RBAC/policy implications +- Audit/logging/evidence implications where relevant +- Data/migration implications where relevant +- Test strategy +- Rollout considerations +- Risk controls +- Implementation phases + +The plan should clearly distinguish where relevant: + +- execution truth +- artifact truth +- backup/snapshot truth +- recovery/evidence truth +- operator next action + +## `tasks.md` Requirements + +Tasks must be ordered, small, and verifiable. + +Include: + +- checkbox tasks +- phase grouping +- tests before or alongside implementation tasks where practical +- final validation tasks +- documentation/update tasks if needed +- explicit non-goals where useful + +Avoid vague tasks such as: + +```text +Clean up code +Refactor UI +Improve performance +Make it enterprise-ready +``` + +Prefer concrete tasks such as: + +```text +- [ ] Add a feature test covering workspace isolation for . +- [ ] Update to display . +- [ ] Add policy coverage for . +``` + +If exact file names are not known yet, phrase tasks as repo-verification tasks first rather than inventing file paths. + +## Preparation Scope Control + +If the requested feature implies multiple independent concerns, create one primary spec for the smallest valuable slice and add a `Follow-up spec candidates` section. + +Examples of follow-up candidates: + +- assigned findings +- pending approvals +- personal work queue +- notification delivery settings +- evidence pack export hardening +- operation monitoring refinements +- autonomous governance decision surfaces + +Do not force all follow-up candidates into the primary spec. + +## Implementation Loop + +Only execute this section when the selected mode is implementation-only or end-to-end. + +Execute the loop in bounded phases: + +1. Evaluate the Spec Readiness Gate. +2. Evaluate the Implementation Scope Gate before changing application code. +3. Implement the active Spec Kit feature scope. +4. Run targeted tests and relevant static/dynamic checks. +5. Evaluate the Test Gate. +6. Run a Browser Smoke Test when the change affects UI/user-facing flows. +7. Evaluate the Browser Smoke Test Gate as passed, failed, or not applicable with a reason. +8. Run strict post-implementation analysis against spec, plan, tasks, constitution, changed code, changed tests, browser smoke results where applicable, and relevant existing patterns. +9. Evaluate the Post-Implementation Analysis Gate. +10. Identify confirmed findings by severity: Blocker, High, Medium, Low. +11. Fix all confirmed in-scope findings regardless of severity when safe and bounded. +12. Do not fix findings that require scope expansion, risky unrelated refactors, or architectural/product decisions outside the active spec; document them as follow-up/residual risks with reasons. +13. Re-run relevant tests and browser smoke checks where applicable after fixes. +14. Repeat test + browser smoke + analysis + fix loop until no confirmed in-scope findings remain or a stop condition is reached. +15. Evaluate the Merge Readiness Gate. +16. Report final implementation status, changed files, tests, browser smoke result, residual risks, failed/passed gates, and manual review prompt. + +## Stop Conditions + +Stop the implementation loop when any of the following is true: + +- No confirmed in-scope findings remain. +- The same finding appears twice after attempted fixes. +- A required fix conflicts with the spec, plan, constitution, or repository architecture. +- A required fix would expand scope beyond the active spec. +- A required fix would require a risky unrelated refactor. +- A required fix depends on an unresolved product or architecture decision. +- Tests reveal an unrelated pre-existing failure that cannot be safely fixed inside the active spec. +- Browser smoke testing reveals an unrelated pre-existing UI/runtime failure that cannot be safely fixed inside the active spec. +- Three analysis/fix iterations have already been completed. +- The repository state is ambiguous enough that continuing would risk damaging architecture or data semantics. + +When stopping before full cleanliness, report exactly why the loop stopped and what remains. + +## Post-Implementation Analysis Prompt + +Use this prompt internally after implementation and after each fix iteration: + +```markdown +Du bist ein Senior Staff Software Engineer, Software Architect und Enterprise SaaS Reviewer. + +Analysiere die Implementierung der aktiven Spec streng repo-basiert. + +Ziel: +Prüfe, ob die Umsetzung vollständig, konsistent, getestet und constitution-konform ist. + +Prüfe gegen: +- spec.md +- plan.md +- tasks.md +- .specify/memory/constitution.md +- geänderte Anwendungscodes +- geänderte Tests +- Browser-Smoke-Test-Ergebnis, falls UI/user-facing Flows betroffen sind +- bestehende Repository-Patterns + +Wichtig: +- Keine Spekulation ohne Repo-Beleg. +- Keine Scope-Erweiterung. +- Keine neuen Produktideen als Pflicht-Fixes. +- Findings nach Blocker, High, Medium, Low gruppieren. +- Für jedes Finding konkrete Datei-/Code-Belege nennen. +- Für jedes Finding eine minimale Remediation nennen. +- Separat ausweisen, welche Findings innerhalb der aktiven Spec behoben werden müssen. +- Medium/Low Findings innerhalb der aktiven Spec ebenfalls zur Behebung markieren, wenn sie sicher und bounded sind. +- Bei UI-/Filament-/Livewire-Änderungen prüfen, ob ein Browser Smoke Test durchgeführt wurde und ob der getestete Operator-Flow wirklich funktioniert. +- Findings, die nicht behoben werden sollen, nur als Follow-up/Residual Risk ausweisen, wenn sie out of scope, risky refactor, unrelated existing debt, not reproducible oder durch eine offene Produkt-/Architekturentscheidung blockiert sind. +- Wenn keine bestätigten In-Scope Findings verbleiben, klare Implementierungsfreigabe geben. +``` + +## Task Completion Rules + +- Keep `tasks.md` aligned with actual implementation status. +- Check off tasks only after the implementation and test evidence exists. +- If a task is obsolete because repository truth proves a different path, update the task note with the reason instead of silently deleting it. +- If a task cannot be completed inside scope, leave it unchecked and report why. + +## Testing Rules + +- Add or update tests for all changed business behavior. +- Include RBAC and workspace/tenant isolation tests where relevant. +- Include OperationRun, audit, evidence, or result-truth tests where relevant. +- Prefer regression tests for every fixed Blocker or High finding. +- Add regression tests for Medium/Low findings when the behavior is important and testable without excessive churn. +- Do not weaken tests to pass the suite. +- Do not treat a green UI path as sufficient without backend or policy coverage when the behavior is security- or governance-relevant. + +## Browser Smoke Test Rules + +Apply these rules when the active spec changes Filament UI, Livewire interactions, navigation, forms, tables, actions, modals, dashboards, operation drilldowns, tenant/workspace context, or any user-facing flow. + +The browser smoke test should be narrow and focused. It is not a full E2E suite unless explicitly requested. + +Minimum smoke path: + +1. Open the relevant page or entry point. +2. Confirm the expected workspace/tenant context where relevant. +3. Confirm the changed or newly introduced UI element is visible. +4. Execute the primary action or interaction changed by the spec. +5. Confirm the expected result state, notification, redirect, table update, modal state, operation link, or drilldown. +6. Check for relevant console errors. +7. Check for failed network requests related to the tested flow. +8. Document the tested path in the final response. + +For TenantPilot/TenantAtlas, pay special attention to: + +- Filament actions and header actions +- Livewire polling, modals, validation, and actions +- workspace/tenant context preservation +- RBAC/capability-dependent action visibility +- OperationRun links and drilldown continuity +- audit/evidence/result/support-diagnostic drilldowns where relevant +- empty states, badges, labels, and decision guidance where relevant + +Browser smoke testing is required for UI/user-facing changes and optional for backend-only changes. + +Do not treat browser smoke success as proof that backend security, policies, domain logic, auditability, or workspace/tenant isolation are correct. Those still require automated tests or repo-based verification. + +## Failure Handling + +If a Spec Kit command, preparation analyze phase, implementation step, test phase, browser smoke phase, or post-implementation analysis fails: + +1. Stop at the relevant gate or stop condition. +2. Report the failing command or phase. +3. Summarize the error. +4. Do not attempt unrelated implementation as a workaround. +5. Suggest the smallest safe next action. + +If the branch or working tree state is unsafe: + +1. Stop before running Spec Kit commands or implementation changes. +2. Report the current branch and relevant uncommitted files. +3. Ask the user to commit, stash, or move to a clean worktree. + +## Final Response Requirements + +For preparation-only mode, respond with: + +1. Selected candidate and why it was chosen +2. Why close alternatives were deferred +3. Current branch after Spec Kit execution, if changed +4. Generated spec path +5. Files created or updated by Spec Kit +6. Preparation analyze result summary +7. Preparation-artifact fixes applied after analyze +8. Assumptions made +9. Open questions, if any +10. Quality gates evaluated and their result +11. Recommended next implementation prompt +12. Explicit statement that no application implementation was performed + +For implementation-only or end-to-end mode, respond with: + +1. Active spec directory +2. Summary of implemented changes +3. Tests/checks run and their results +4. Browser smoke test result, tested path, or not-applicable reason +5. Quality gates passed/failed and number of analysis/fix iterations performed +6. Remaining in-scope findings, if any +7. Residual risks and follow-up candidates, if relevant +8. Files changed +9. Explicit statement whether the Merge Readiness Gate passed and whether the implementation is ready for manual review/merge + +Keep the final response concise, but include enough detail for the user to continue immediately. + +## Manual Review Prompts + +For preparation-only mode, provide a ready-to-copy prompt like this, adapted to the generated spec branch/path: + +```markdown +Du bist ein Senior Staff Software Architect und Enterprise SaaS Reviewer. + +Analysiere die neu erstellte Spec `` streng repo-basiert. + +Ziel: +Prüfe, ob `spec.md`, `plan.md` und `tasks.md` vollständig, konsistent, implementierbar und constitution-konform sind. + +Wichtig: +- Keine Implementierung. +- Keine Codeänderungen. +- Keine Scope-Erweiterung. +- Prüfe nur gegen Repo-Wahrheit. +- Benenne konkrete Konflikte mit Dateien, Patterns, Datenflüssen oder bestehenden Specs. +- Schlage nur minimale Korrekturen an `spec.md`, `plan.md` und `tasks.md` vor. +- Wenn alles passt, gib eine klare Implementierungsfreigabe. +``` + +For preparation-only mode, also provide a ready-to-copy implementation prompt after analyze has passed or preparation-artifact issues have been fixed: + +```markdown +Du bist ein Senior Staff Software Engineer für TenantPilot/TenantAtlas. + +Implementiere die vorbereitete Spec `` streng anhand von `tasks.md`. + +Wichtig: +- Arbeite task-sequenziell. +- Ändere nur Dateien, die für die jeweilige Task notwendig sind. +- Halte dich an `spec.md`, `plan.md`, `tasks.md` und die Constitution. +- Keine Scope-Erweiterung. +- Keine Opportunistic Refactors. +- Führe passende Tests nach sinnvollen Task-Gruppen aus. +- Wenn eine Task unklar oder falsch ist, stoppe und dokumentiere den Konflikt statt frei zu improvisieren. +- Am Ende liefere geänderte Dateien, Teststatus, offene Risiken und nicht erledigte Tasks. +``` + +For implementation-only or end-to-end mode, provide a ready-to-copy prompt like this, adapted to the active spec number and slug: + +```markdown +Du bist ein Senior Staff Software Architect und Enterprise SaaS Reviewer. + +Führe eine finale manuelle Review der implementierten Spec `-` streng repo-basiert durch. + +Ziel: +Prüfe, ob die Implementierung nach dem Agenten-Loop wirklich merge-ready ist. + +Wichtig: +- Keine Implementierung. +- Keine Codeänderungen. +- Keine Scope-Erweiterung. +- Prüfe gegen spec.md, plan.md, tasks.md und constitution.md. +- Prüfe die geänderten Dateien, Tests, Browser-Smoke-Test-Ergebnis, RBAC, Workspace-/Tenant-Isolation, Auditability, UX und OperationRun-Semantik, soweit relevant. +- Benenne nur konkrete Findings mit Repo-Beleg. +- Gib am Ende eine klare Entscheidung: Merge-ready, merge-ready with notes, oder not merge-ready. +``` + +## Example Invocations + +User: + +```text +Nutze den Skill spec-kit-end-to-end. +Wähle aus roadmap.md und spec-candidates.md die nächste sinnvollste Spec. +Führe danach GitHub Spec Kit specify, plan, tasks und analyze in einem Rutsch aus. +Behebe alle analyze-Issues in den Spec-Kit-Artefakten. +Keine Application-Implementierung. +``` + +Expected behavior: + +1. Inspect constitution, Spec Kit scripts/templates, specs, roadmap, and spec candidates. +2. Check branch and working tree safety. +3. Compare candidate suitability. +4. Select the next best candidate. +5. Evaluate the Candidate Selection Gate. +6. Run the repository's real Spec Kit `specify` flow, letting it handle branch/spec setup. +7. Run the repository's real Spec Kit `plan` flow. +8. Run the repository's real Spec Kit `tasks` flow. +9. Run the repository's real Spec Kit preparation `analyze` flow. +10. Fix analyze issues only in Spec Kit preparation artifacts. +11. Evaluate the Spec Readiness Gate. +12. Stop before application implementation. +13. Return selection rationale, branch/path summary, artifact summary, analyze summary, fixes applied, gates, and next implementation prompt. + +User: + +```text +Implementiere die aktive Spec. Danach analyse gegen spec/plan/tasks/constitution ausführen, alle in-scope Findings beheben und wiederhole bis sauber. +``` + +Expected behavior: + +1. Inspect active Spec Kit context, constitution, spec, plan, tasks, relevant code, and relevant tests. +2. Evaluate the Spec Readiness Gate and Implementation Scope Gate. +3. Implement only the active spec scope. +4. Run targeted tests and relevant checks. +5. Evaluate the Test Gate. +6. Run and evaluate Browser Smoke Test when UI/user-facing flows are affected. +7. Run post-implementation analysis. +8. Fix all confirmed in-scope findings regardless of severity when safe and bounded. +9. Repeat test + browser smoke + analysis + fix loop up to the stop conditions. +10. Evaluate the Merge Readiness Gate. +11. Report final status, changed files, tests, browser smoke result, residual risks, gates, and manual review prompt. + +User: + +```text +Run end-to-end: wähle die nächste sinnvolle Spec aus spec-candidates/roadmap, erstelle spec/plan/tasks, implementiere sie danach und wiederhole analyse/fix bis sauber. +``` + +Expected behavior: + +1. Run preparation mode first. +2. Clearly report the selected candidate and created spec directory. +3. Continue into implementation mode only because the user explicitly requested end-to-end execution. +4. Implement only the newly created active spec scope. +5. Run tests/checks, browser smoke checks where applicable, post-implementation analysis, and bounded fix iterations. +6. Fix all confirmed in-scope findings regardless of severity when safe and bounded. +7. Report final implementation status, gates, browser smoke result, and residual risks. +``` \ No newline at end of file diff --git a/.github/skills/spec-kit-next-best-one-shot/SKILL.md b/.github/skills/spec-kit-next-best-one-shot/SKILL.md deleted file mode 100644 index 59c91d9f..00000000 --- a/.github/skills/spec-kit-next-best-one-shot/SKILL.md +++ /dev/null @@ -1,398 +0,0 @@ ---- -name: spec-kit-next-best-one-shot -description: Select the most suitable next TenantPilot/TenantAtlas spec from roadmap and spec-candidates, then run the GitHub Spec Kit preparation flow in one pass: specify, plan, tasks, and analyze. Use when the user wants the agent to choose the next best spec, execute the real Spec Kit workflow including branch/spec-directory mechanics, analyze the generated artifacts, and fix preparation issues before implementation. This skill must not implement application code. ---- - -# Skill: Spec Kit Next-Best One-Shot Preparation - -## Purpose - -Use this skill when the user wants the agent to select the most suitable next spec from existing product planning sources and then execute the real GitHub Spec Kit preparation flow in one pass: - -1. select the next best spec candidate from roadmap and spec candidates -2. run the repository's Spec Kit `specify` flow for that selected candidate -3. run the repository's Spec Kit `plan` flow for the generated spec -4. run the repository's Spec Kit `tasks` flow for the generated plan -5. run the repository's Spec Kit `analyze` flow against the generated artifacts -6. fix issues in Spec Kit preparation artifacts only (`spec.md`, `plan.md`, `tasks.md`, and related Spec Kit metadata if required) -7. stop before implementation -8. provide a concise readiness summary for the user - -This skill must use the repository's actual Spec Kit scripts, commands, templates, branch naming rules, and generated paths. It must not manually bypass Spec Kit by creating arbitrary spec folders or files. The only allowed fixes after `analyze` are preparation-artifact fixes, not application-code implementation. - -The intended workflow is: - -```text -roadmap.md + spec-candidates.md -→ select next best spec -→ run Spec Kit specify -→ run Spec Kit plan -→ run Spec Kit tasks -→ run Spec Kit analyze -→ fix preparation-artifact issues -→ explicit implementation step later -``` - -## When to Use - -Use this skill when the user asks things like: - -```text -Nimm die nächste sinnvollste Spec aus roadmap und spec-candidates und führe specify, plan, tasks und analyze aus. -``` - -```text -Wähle die nächste geeignete Spec und mach den Spec-Kit-Flow inklusive analyze in einem Rutsch. -``` - -```text -Schau in roadmap.md und spec-candidates.md und starte daraus specify, plan, tasks und analyze. -``` - -```text -Such die beste nächste Spec aus und bereite sie per GitHub Spec Kit vollständig vor. -``` - -```text -Nimm angesichts Roadmap und Spec Candidates das sinnvollste nächste Thema, aber nicht implementieren. -``` - -## Hard Rules - -- Work strictly repo-based. -- Use the repository's actual GitHub Spec Kit workflow. -- Do not manually invent spec numbers, branch names, or spec paths if Spec Kit provides a script or command for that. -- Do not manually create `spec.md`, `plan.md`, or `tasks.md` when the Spec Kit workflow can generate them. -- Do not bypass Spec Kit branch mechanics. -- Run `analyze` after `tasks` when the repository supports it. -- Fix only issues found in Spec Kit preparation artifacts and planning metadata. -- Do not treat analyze findings as permission to implement product code. -- If analyze reports implementation work as missing, record it in `tasks.md` instead of implementing it. -- Do not implement application code. -- Do not modify production code. -- Do not modify migrations, models, services, jobs, Filament resources, Livewire components, policies, commands, or tests unless the user explicitly starts a later implementation task. -- Do not execute implementation commands. -- Do not run destructive commands. -- Do not invent roadmap priorities not supported by repository documents. -- Do not pick a spec only because it is listed first. -- Do not select broad platform rewrites when a smaller dependency-unlocking spec is more appropriate. -- Prefer specs that unlock roadmap progress, reduce architectural drift, harden foundations, or remove known blockers. -- Prefer small, reviewable, implementation-ready specs over large ambiguous themes. -- Preserve TenantPilot/TenantAtlas terminology. -- Follow the repository constitution and existing Spec Kit conventions. -- If repository truth conflicts with roadmap/candidate wording, keep repository truth and document the deviation. -- If no candidate is suitable, do not run Spec Kit commands and explain why. - -## Required Repository Checks Before Selection - -Before selecting the next spec, inspect: - -1. `.specify/memory/constitution.md` -2. `.specify/templates/` -3. `.specify/scripts/` -4. existing Spec Kit command usage or repository instructions, if present -5. `specs/` -6. `docs/product/spec-candidates.md` -7. roadmap documents under `docs/product/`, especially `roadmap.md` if present -8. nearby existing specs related to top candidate areas -9. current branch and git status -10. application code only as needed to verify whether a candidate is already done, blocked, duplicated, or technically mis-scoped - -Do not edit application code. - -## Git and Branch Safety - -Before running any Spec Kit command or script: - -1. Check the current branch. -2. Check whether the working tree is clean. -3. If there are unrelated uncommitted changes, stop and report them. Do not continue. -4. If the working tree only contains user-intended planning edits for this operation, continue cautiously. -5. Let Spec Kit create or switch to the correct feature branch when that is how the repository workflow works. -6. Do not force checkout, reset, stash, rebase, merge, or delete branches. -7. Do not overwrite existing specs. - -If the repo requires an explicit branch creation script for `specify`, use that script rather than manually creating the branch. - -## Candidate Selection Criteria - -Evaluate candidate specs using these criteria. - -### 1. Roadmap Fit - -Prefer candidates that directly support the current roadmap sequence or unlock the next roadmap layer. - -Examples: - -- governance foundations before advanced compliance views -- evidence/snapshot foundations before auditor packs -- control catalog foundations before CIS/NIS2 mappings -- decision/workflow surfaces before autonomous governance -- provider/platform boundary cleanup before multi-provider expansion - -### 2. Foundation Value - -Prefer candidates that strengthen reusable platform foundations: - -- RBAC and workspace/tenant isolation -- auditability -- evidence and snapshot truth -- operation observability -- provider boundary neutrality -- canonical vocabulary -- baseline/control/finding semantics -- enterprise detail-page or decision-surface patterns - -### 3. Dependency Unblocking - -Prefer specs that unblock multiple later candidates. - -A good next spec should usually make future specs smaller, safer, or more consistent. - -### 4. Scope Size - -Prefer a candidate that can be implemented as a narrow, testable slice. - -Avoid selecting: - -- broad platform rewrites -- vague product themes -- multi-feature bundles -- speculative future-provider frameworks -- large UX redesigns without a clear first slice - -### 5. Repo Readiness - -Prefer candidates where the repository already has enough structure to implement the next slice safely. - -Check whether related models, services, UI pages, tests, or concepts already exist. - -### 6. Risk Reduction - -Prefer candidates that reduce current architectural or product risk: - -- legacy dual-world semantics -- unclear truth ownership -- inconsistent operator UX -- missing audit/evidence boundaries -- repeated manual workflow friction -- false-positive calmness in governance surfaces - -### 7. User/Product Value - -Prefer specs that produce visible operator value or make the platform more sellable without creating heavy scope. - -## Required Selection Output Before Spec Kit Execution - -Before running the Spec Kit flow, identify: - -- selected candidate title -- source location in roadmap/spec-candidates -- why it was selected -- why close alternatives were deferred -- roadmap relationship -- smallest viable implementation slice -- proposed concise feature description to feed into `specify` - -The feature description must be product- and behavior-oriented. It should not be a low-level implementation plan. - -## Spec Kit Execution Flow - -After selecting the candidate, execute the real repository Spec Kit preparation sequence, including analysis and preparation-artifact fixes. - -### Step 1: Determine the repository's Spec Kit command pattern - -Inspect repository instructions and scripts to identify how this repo expects Spec Kit to be run. - -Common locations to inspect: - -```text -.specify/scripts/ -.specify/templates/ -.specify/memory/constitution.md -.github/prompts/ -.github/skills/ -README.md -specs/ -``` - -Use the repo-specific mechanism if present. - -### Step 2: Run `specify` - -Run the repository's `specify` flow using the selected candidate and the smallest viable slice. - -The `specify` input should include: - -- selected candidate title -- problem statement -- operator/user value -- roadmap relationship -- out-of-scope boundaries -- key acceptance criteria -- important enterprise constraints - -Let Spec Kit create the correct branch and spec location if that is the repo's configured behavior. - -### Step 3: Run `plan` - -Run the repository's `plan` flow for the generated spec. - -The `plan` input should keep the scope tight and should require repo-based alignment with: - -- constitution -- existing architecture -- workspace/tenant isolation -- RBAC -- OperationRun/observability where relevant -- evidence/snapshot/truth semantics where relevant -- Filament/Livewire conventions where relevant -- test strategy - -### Step 4: Run `tasks` - -Run the repository's `tasks` flow for the generated plan. - -The generated tasks must be: - -- ordered -- small -- testable -- grouped by phase -- limited to the selected scope -- suitable for later manual analysis before implementation - -### Step 5: Run `analyze` - -Run the repository's `analyze` flow against the generated Spec Kit artifacts. - -Analyze must check: - -- consistency between `spec.md`, `plan.md`, and `tasks.md` -- constitution alignment -- roadmap alignment -- whether the selected candidate was narrowed safely -- whether tasks are complete enough for implementation -- whether tasks accidentally require scope not described in the spec -- whether plan details conflict with repository architecture or terminology -- whether implementation risks are documented instead of silently ignored - -Do not use analyze as a trigger to implement application code. - -### Step 6: Fix preparation-artifact issues only - -If analyze finds issues, fix only Spec Kit preparation artifacts such as: - -- `spec.md` -- `plan.md` -- `tasks.md` -- generated Spec Kit metadata files, if the repository uses them - -Allowed fixes include: - -- clarify requirements -- tighten scope -- move out-of-scope work into follow-up candidates -- correct terminology -- add missing tasks -- remove tasks not backed by the spec -- align plan language with repository architecture -- add missing acceptance criteria or validation tasks - -Forbidden fixes include: - -- modifying application code -- creating migrations -- editing models, services, jobs, policies, Filament resources, Livewire components, tests, or commands -- running implementation or test-fix loops -- changing runtime behavior - -### Step 7: Stop - -After `analyze` has passed or preparation-artifact issues have been fixed, stop. - -Do not implement. -Do not modify application code. -Do not run implementation tests unless the repository's Spec Kit preparation command requires a non-destructive validation. - -## Failure Handling - -If a Spec Kit command or analyze phase fails: - -1. Stop immediately. -2. Report the failing command or phase. -3. Summarize the error. -4. Do not attempt implementation as a workaround. -5. Suggest the smallest safe next action. - -If the branch or working tree state is unsafe: - -1. Stop before running Spec Kit commands. -2. Report the current branch and relevant uncommitted files. -3. Ask the user to commit, stash, or move to a clean worktree. - -## Final Response Requirements - -After the Spec Kit preparation flow completes, respond with: - -1. Selected candidate -2. Why this candidate was selected -3. Why close alternatives were deferred -4. Current branch after Spec Kit execution -5. Generated spec path -6. Files created or updated by Spec Kit -7. Analyze result summary -8. Preparation-artifact fixes applied after analyze -9. Assumptions made -10. Open questions, if any -11. Recommended next implementation prompt -12. Explicit statement that no application implementation was performed - -Keep the response concise, but include enough detail for the user to continue immediately. - -## Required Next Implementation Prompt - -Always provide a ready-to-copy implementation prompt like this, adapted to the generated spec branch/path, but only after analyze has passed or preparation-artifact issues have been fixed: - -```markdown -Du bist ein Senior Staff Software Engineer für TenantPilot/TenantAtlas. - -Implementiere die vorbereitete Spec `` streng anhand von `tasks.md`. - -Wichtig: -- Arbeite task-sequenziell. -- Ändere nur Dateien, die für die jeweilige Task notwendig sind. -- Halte dich an `spec.md`, `plan.md`, `tasks.md` und die Constitution. -- Keine Scope-Erweiterung. -- Keine Opportunistic Refactors. -- Führe passende Tests nach sinnvollen Task-Gruppen aus. -- Wenn eine Task unklar oder falsch ist, stoppe und dokumentiere den Konflikt statt frei zu improvisieren. -- Am Ende liefere geänderte Dateien, Teststatus, offene Risiken und nicht erledigte Tasks. -``` - -## Example Invocation - -User: - -```text -Nutze den Skill spec-kit-next-best-one-shot. -Wähle aus roadmap.md und spec-candidates.md die nächste sinnvollste Spec. -Führe danach GitHub Spec Kit specify, plan, tasks und analyze in einem Rutsch aus. -Behebe alle analyze-Issues in den Spec-Kit-Artefakten. -Keine Application-Implementierung. -``` - -Expected behavior: - -1. Inspect constitution, Spec Kit scripts/templates, specs, roadmap, and spec candidates. -2. Check branch and working tree safety. -3. Compare candidate suitability. -4. Select the next best candidate. -5. Run the repository's real Spec Kit `specify` flow, letting it handle branch/spec setup. -6. Run the repository's real Spec Kit `plan` flow. -7. Run the repository's real Spec Kit `tasks` flow. -8. Run the repository's real Spec Kit `analyze` flow. -9. Fix analyze issues only in Spec Kit preparation artifacts. -10. Stop before application implementation. -11. Return selection rationale, branch/path summary, artifact summary, analyze summary, fixes applied, and next implementation prompt. -``` \ No newline at end of file diff --git a/.github/skills/spec-kit-one-shot-prep/SKILL.md b/.github/skills/spec-kit-one-shot-prep/SKILL.md deleted file mode 100644 index 4358e90e..00000000 --- a/.github/skills/spec-kit-one-shot-prep/SKILL.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -name: spec-kit-one-shot-prep -description: Describe what this skill does and when to use it. Include keywords that help agents identify relevant tasks. ---- - - - -Define the functionality provided by this skill, including detailed instructions and examples ---- -name: spec-kit-one-shot-prep -description: Create Spec Kit preparation artifacts in one pass for TenantPilot/TenantAtlas features: spec.md, plan.md, and tasks.md. Use for feature ideas, roadmap items, spec candidates, governance/platform improvements, UX improvements, cleanup candidates, and repo-based preparation before manual analysis or implementation. This skill must not implement application code. ---- - -# Skill: Spec Kit One-Shot Preparation - -## Purpose - -Use this skill to create a complete Spec Kit preparation package for a new TenantPilot/TenantAtlas feature in one pass: - -1. `spec.md` -2. `plan.md` -3. `tasks.md` - -This skill prepares implementation work, but it must not perform implementation. - -The intended workflow is: - -```text -feature idea / roadmap item / spec candidate -→ one-shot spec + plan + tasks preparation -→ manual repo-based analysis/review -→ explicit implementation step later -``` - -## When to Use - -Use this skill when the user asks to create or prepare Spec Kit artifacts from: - -- a feature idea -- a spec candidate -- a roadmap item -- a product or UX requirement -- a governance/platform improvement -- an architecture cleanup candidate -- a refactoring preparation request -- a TenantPilot/TenantAtlas implementation idea that should first become a formal spec - -Typical user prompts: - -```text -Mach daraus spec, plan und tasks in einem Rutsch. -``` - -```text -Erstelle daraus eine neue Spec Kit Vorbereitung, aber noch nicht implementieren. -``` - -```text -Nimm diesen spec candidate und bereite spec/plan/tasks vor. -``` - -```text -Erzeuge die Spec Kit Artefakte, danach mache ich die Analyse manuell. -``` - -## Hard Rules - -- Work strictly repo-based. -- Do not implement application code. -- Do not modify production code. -- Do not modify migrations, models, services, jobs, Filament resources, Livewire components, policies, commands, or tests unless the user explicitly starts a later implementation task. -- Do not execute implementation commands. -- Do not run destructive commands. -- Do not expand scope beyond the provided feature idea. -- Do not invent architecture that conflicts with repository truth. -- Do not create broad platform rewrites when a smaller implementable spec is possible. -- Prefer small, reviewable, implementation-ready specs. -- Preserve TenantPilot/TenantAtlas terminology. -- Follow the repository constitution and existing Spec Kit conventions. -- If repository truth conflicts with the user-provided draft, keep repository truth and document the deviation. -- If the feature is too broad, split it into one primary spec and optional follow-up spec candidates. - -## Required Inputs - -The user should provide at least one of: - -- feature title and short goal -- full spec candidate -- roadmap item -- rough problem statement -- UX or architecture improvement idea - -If the input is incomplete, proceed with the smallest reasonable interpretation and document assumptions. Do not block on clarification unless the request is impossible to scope safely. - -## Required Repository Checks - -Before creating or updating Spec Kit artifacts, inspect the relevant repository sources. - -Always check: - -1. `.specify/memory/constitution.md` -2. `.specify/templates/` -3. `specs/` -4. `docs/product/spec-candidates.md` -5. relevant roadmap documents under `docs/product/` -6. nearby existing specs with related terminology or scope - -Check application code only as needed to avoid wrong naming, wrong architecture, or duplicate concepts. Do not edit application code. - -## Spec Directory Rules - -Create a new spec directory using the next valid spec number and a kebab-case slug: - -```text -specs/-/ -``` - -The exact number must be derived from the current repository state and existing numbering conventions. - -Create or update only these preparation artifacts inside the selected spec directory: - -```text -specs/-/spec.md -specs/-/plan.md -specs/-/tasks.md -``` - -If the repository templates require additional preparation files, create them only when this is consistent with existing Spec Kit conventions. Do not create implementation files. - -## `spec.md` Requirements - -The spec must be product- and behavior-oriented. It should avoid premature implementation detail unless needed for correctness. - -Include: - -- Feature title -- Problem statement -- Business/product value -- Primary users/operators -- User stories -- Functional requirements -- Non-functional requirements -- UX requirements -- RBAC/security requirements -- Auditability/observability requirements -- Data/truth-source requirements where relevant -- Out of scope -- Acceptance criteria -- Success criteria -- Risks -- Assumptions -- Open questions - -TenantPilot/TenantAtlas specs should preserve enterprise SaaS principles: - -- workspace/tenant isolation -- capability-first RBAC -- auditability -- operation/result truth separation -- source-of-truth clarity -- calm enterprise operator UX -- progressive disclosure where useful -- no false positive calmness - -## `plan.md` Requirements - -The plan must be repo-aware and implementation-oriented, but still must not implement. - -Include: - -- Technical approach -- Existing repository surfaces likely affected -- Domain/model implications -- UI/Filament implications -- Livewire implications where relevant -- OperationRun/monitoring implications where relevant -- RBAC/policy implications -- Audit/logging/evidence implications where relevant -- Data/migration implications where relevant -- Test strategy -- Rollout considerations -- Risk controls -- Implementation phases - -The plan should clearly distinguish: - -- execution truth -- artifact truth -- backup/snapshot truth -- recovery/evidence truth -- operator next action - -Use those distinctions only where relevant to the feature. - -## `tasks.md` Requirements - -Tasks must be ordered, small, and verifiable. - -Include: - -- checkbox tasks -- phase grouping -- tests before or alongside implementation tasks where practical -- final validation tasks -- documentation/update tasks if needed -- explicit non-goals where useful - -Avoid vague tasks such as: - -```text -Clean up code -Refactor UI -Improve performance -Make it enterprise-ready -``` - -Prefer concrete tasks such as: - -```text -- [ ] Add a feature test covering workspace isolation for . -- [ ] Update to display . -- [ ] Add policy coverage for . -``` - -If exact file names are not known yet, phrase tasks as repo-verification tasks first rather than inventing file paths. - -## Scope Control - -If the requested feature implies multiple independent concerns, create one primary spec for the smallest valuable slice and add a `Follow-up spec candidates` section. - -Examples of follow-up candidates: - -- assigned findings -- pending approvals -- personal work queue -- notification delivery settings -- evidence pack export hardening -- operation monitoring refinements -- autonomous governance decision surfaces - -Do not force all follow-up candidates into the primary spec. - -## Final Response Requirements - -After creating or updating the artifacts, respond with: - -1. Created or updated spec directory -2. Files created or updated -3. Important repo-based adjustments made -4. Assumptions made -5. Open questions, if any -6. Recommended next manual analysis prompt -7. Explicit statement that no implementation was performed - -Keep the final response concise, but include enough detail for the user to continue immediately. - -## Required Next Manual Analysis Prompt - -Always provide a ready-to-copy prompt like this, adapted to the created spec number and slug: - -```markdown -Du bist ein Senior Staff Software Architect und Enterprise SaaS Reviewer. - -Analysiere die neu erstellte Spec `-` streng repo-basiert. - -Ziel: -Prüfe, ob `spec.md`, `plan.md` und `tasks.md` vollständig, konsistent, implementierbar und constitution-konform sind. - -Wichtig: -- Keine Implementierung. -- Keine Codeänderungen. -- Keine Scope-Erweiterung. -- Prüfe nur gegen Repo-Wahrheit. -- Benenne konkrete Konflikte mit Dateien, Patterns, Datenflüssen oder bestehenden Specs. -- Schlage nur minimale Korrekturen an `spec.md`, `plan.md` und `tasks.md` vor. -- Wenn alles passt, gib eine klare Implementierungsfreigabe. -``` - -## Example Invocation - -User: - -```text -Nimm diesen Spec Candidate und mach daraus spec, plan und tasks in einem Rutsch. Danach mache ich die Analyse manuell. -``` - -Expected behavior: - -1. Inspect constitution, templates, specs, roadmap, and candidate docs. -2. Determine the next valid spec number. -3. Create `spec.md`, `plan.md`, and `tasks.md` in the new spec directory. -4. Keep scope tight. -5. Do not implement. -6. Return the summary and next manual analysis prompt. \ No newline at end of file diff --git a/apps/platform/app/Console/Commands/TenantpilotBackfillFindingLifecycle.php b/apps/platform/app/Console/Commands/TenantpilotBackfillFindingLifecycle.php index 7769fb66..995cae4e 100644 --- a/apps/platform/app/Console/Commands/TenantpilotBackfillFindingLifecycle.php +++ b/apps/platform/app/Console/Commands/TenantpilotBackfillFindingLifecycle.php @@ -7,6 +7,7 @@ use App\Models\Tenant; use App\Services\Runbooks\FindingsLifecycleBackfillRunbookService; use App\Services\Runbooks\FindingsLifecycleBackfillScope; +use App\Support\OperationalControls\OperationalControlBlockedException; use Illuminate\Console\Command; use Illuminate\Validation\ValidationException; @@ -51,6 +52,14 @@ public function handle(FindingsLifecycleBackfillRunbookService $runbookService): reason: null, source: 'cli', ); + } catch (OperationalControlBlockedException $e) { + $this->error(sprintf( + 'Backfill paused for tenant %d: %s', + (int) $tenant->getKey(), + $e->getMessage(), + )); + + return self::FAILURE; } catch (ValidationException $e) { $errors = $e->errors(); diff --git a/apps/platform/app/Console/Commands/TenantpilotRunDeployRunbooks.php b/apps/platform/app/Console/Commands/TenantpilotRunDeployRunbooks.php index fbc34280..1bbce8cb 100644 --- a/apps/platform/app/Console/Commands/TenantpilotRunDeployRunbooks.php +++ b/apps/platform/app/Console/Commands/TenantpilotRunDeployRunbooks.php @@ -7,6 +7,7 @@ use App\Services\Runbooks\FindingsLifecycleBackfillRunbookService; use App\Services\Runbooks\FindingsLifecycleBackfillScope; use App\Services\Runbooks\RunbookReason; +use App\Support\OperationalControls\OperationalControlBlockedException; use Illuminate\Console\Command; use Illuminate\Validation\ValidationException; @@ -31,6 +32,10 @@ public function handle(FindingsLifecycleBackfillRunbookService $runbookService): $this->info('Deploy runbooks started (if needed).'); + return self::SUCCESS; + } catch (OperationalControlBlockedException $e) { + $this->info('Deploy runbooks paused: '.$e->getMessage()); + return self::SUCCESS; } catch (ValidationException $e) { $errors = $e->errors(); diff --git a/apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php b/apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php index ade68e16..d1b19989 100644 --- a/apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php +++ b/apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php @@ -6,17 +6,18 @@ use App\Filament\Resources\FindingResource; use App\Filament\Widgets\Tenant\BaselineCompareCoverageBanner; use App\Filament\Widgets\Tenant\FindingStatsOverview; -use App\Jobs\BackfillFindingLifecycleJob; use App\Models\Finding; use App\Models\Tenant; use App\Models\User; use App\Services\Findings\FindingWorkflowService; -use App\Services\OperationRunService; +use App\Services\Runbooks\FindingsLifecycleBackfillRunbookService; +use App\Services\Runbooks\FindingsLifecycleBackfillScope; use App\Support\Auth\Capabilities; use App\Support\Filament\CanonicalAdminTenantFilterState; use App\Support\OperationRunLinks; use App\Support\OpsUx\OperationUxPresenter; use App\Support\OpsUx\OpsUxBrowserEvents; +use App\Support\OperationalControls\OperationalControlBlockedException; use App\Support\Rbac\UiEnforcement; use App\Support\Rbac\UiTooltips; use Filament\Actions; @@ -107,83 +108,76 @@ protected function getHeaderActions(): array { $actions = []; - if ((bool) config('tenantpilot.allow_admin_maintenance_actions', false)) { - $actions[] = UiEnforcement::forAction( - Actions\Action::make('backfill_lifecycle') - ->label('Backfill findings lifecycle') - ->icon('heroicon-o-wrench-screwdriver') - ->color('gray') - ->requiresConfirmation() - ->modalHeading('Backfill findings lifecycle') - ->modalDescription('This will backfill legacy Findings data (lifecycle fields, SLA due dates, and drift duplicate consolidation) for the current tenant. The operation runs in the background.') - ->action(function (OperationRunService $operationRuns): void { - $user = auth()->user(); + $actions[] = UiEnforcement::forAction( + Actions\Action::make('backfill_lifecycle') + ->label('Backfill findings lifecycle') + ->icon('heroicon-o-wrench-screwdriver') + ->color('gray') + ->requiresConfirmation() + ->modalHeading('Backfill findings lifecycle') + ->modalDescription('This will backfill legacy Findings data (lifecycle fields, SLA due dates, and drift duplicate consolidation) for the current tenant. The operation runs in the background.') + ->action(function (FindingsLifecycleBackfillRunbookService $runbookService): void { + $user = auth()->user(); - if (! $user instanceof User) { - abort(403); - } + if (! $user instanceof User) { + abort(403); + } - $tenant = static::resolveTenantContextForCurrentPanel(); + $tenant = static::resolveTenantContextForCurrentPanel(); - if (! $tenant instanceof Tenant) { - abort(404); - } + if (! $tenant instanceof Tenant) { + abort(404); + } - $opRun = $operationRuns->ensureRunWithIdentity( - tenant: $tenant, - type: 'findings.lifecycle.backfill', - identityInputs: [ - 'tenant_id' => (int) $tenant->getKey(), - 'trigger' => 'backfill', - ], - context: [ - 'workspace_id' => (int) $tenant->workspace_id, - 'initiator_user_id' => (int) $user->getKey(), - ], + try { + $opRun = $runbookService->start( + scope: FindingsLifecycleBackfillScope::singleTenant((int) $tenant->getKey()), initiator: $user, + reason: null, + source: 'tenant_ui', ); + } catch (OperationalControlBlockedException $exception) { + Notification::make() + ->title($exception->title()) + ->body($exception->getMessage()) + ->warning() + ->send(); - $runUrl = OperationRunLinks::view($opRun, $tenant); + throw new \Filament\Support\Exceptions\Halt; + } - if ($opRun->wasRecentlyCreated === false) { - OpsUxBrowserEvents::dispatchRunEnqueued($this); - - OperationUxPresenter::alreadyQueuedToast((string) $opRun->type) - ->actions([ - Actions\Action::make('view_run') - ->label('Open operation') - ->url($runUrl), - ]) - ->send(); - - return; - } - - $operationRuns->dispatchOrFail($opRun, function () use ($tenant, $user): void { - BackfillFindingLifecycleJob::dispatch( - tenantId: (int) $tenant->getKey(), - workspaceId: (int) $tenant->workspace_id, - initiatorUserId: (int) $user->getKey(), - ); - }); + $runUrl = OperationRunLinks::view($opRun, $tenant); + if ($opRun->wasRecentlyCreated === false) { OpsUxBrowserEvents::dispatchRunEnqueued($this); - OperationUxPresenter::queuedToast((string) $opRun->type) - ->body('The backfill will run in the background. You can continue working while it completes.') + OperationUxPresenter::alreadyQueuedToast((string) $opRun->type) ->actions([ Actions\Action::make('view_run') ->label('Open operation') ->url($runUrl), ]) ->send(); - }) - ) - ->preserveVisibility() - ->requireCapability(Capabilities::TENANT_MANAGE) - ->tooltip(UiTooltips::INSUFFICIENT_PERMISSION) - ->apply(); - } + + return; + } + + OpsUxBrowserEvents::dispatchRunEnqueued($this); + + OperationUxPresenter::queuedToast((string) $opRun->type) + ->body('The backfill will run in the background. You can continue working while it completes.') + ->actions([ + Actions\Action::make('view_run') + ->label('Open operation') + ->url($runUrl), + ]) + ->send(); + }) + ) + ->preserveVisibility() + ->requireCapability(Capabilities::TENANT_MANAGE) + ->tooltip(UiTooltips::INSUFFICIENT_PERMISSION) + ->apply(); $actions[] = UiEnforcement::forAction( Actions\Action::make('triage_all_matching') diff --git a/apps/platform/app/Filament/Resources/RestoreRunResource.php b/apps/platform/app/Filament/Resources/RestoreRunResource.php index ad63d385..e1e8a781 100644 --- a/apps/platform/app/Filament/Resources/RestoreRunResource.php +++ b/apps/platform/app/Filament/Resources/RestoreRunResource.php @@ -18,7 +18,9 @@ use App\Models\RestoreRun; use App\Models\Tenant; use App\Models\User; +use App\Models\Workspace; use App\Rules\SkipOrUuidRule; +use App\Services\Audit\WorkspaceAuditLogger; use App\Services\Auth\CapabilityResolver; use App\Services\Directory\EntraGroupLabelResolver; use App\Services\Intune\AuditLogger; @@ -31,14 +33,18 @@ use App\Services\Providers\ProviderOperationStartResult; use App\Support\Auth\Capabilities; use App\Support\BackupQuality\BackupQualityResolver; +use App\Support\Audit\AuditActionId; use App\Support\Badges\BadgeDomain; use App\Support\Badges\BadgeRenderer; use App\Support\Filament\FilterOptionCatalog; use App\Support\Filament\FilterPresets; +use App\Support\OperationCatalog; use App\Support\OperationRunLinks; use App\Support\OpsUx\OperationUxPresenter; use App\Support\OpsUx\OpsUxBrowserEvents; use App\Support\OpsUx\ProviderOperationStartResultPresenter; +use App\Support\OperationalControls\OperationalControlBlockedException; +use App\Support\OperationalControls\OperationalControlEvaluator; use App\Support\Rbac\UiEnforcement; use App\Support\RestoreRunIdempotency; use App\Support\RestoreRunStatus; @@ -1921,16 +1927,26 @@ public static function createRestoreRun(array $data): RestoreRun ->executionSafetySnapshot($tenant, $user, $data) ->toArray(); - [$result, $restoreRun] = static::startQueuedRestoreExecution( - tenant: $tenant, - backupSet: $backupSet, - selectedItemIds: $selectedItemIds, - preview: $preview, - metadata: $metadata, - groupMapping: $groupMapping, - actorEmail: $actorEmail, - actorName: $actorName, - ); + try { + [$result, $restoreRun] = static::startQueuedRestoreExecution( + tenant: $tenant, + backupSet: $backupSet, + selectedItemIds: $selectedItemIds, + preview: $preview, + metadata: $metadata, + groupMapping: $groupMapping, + actorEmail: $actorEmail, + actorName: $actorName, + ); + } catch (OperationalControlBlockedException $exception) { + Notification::make() + ->title($exception->title()) + ->body($exception->getMessage()) + ->warning() + ->send(); + + throw new \Filament\Support\Exceptions\Halt; + } app(ProviderOperationStartResultPresenter::class) ->notification( @@ -1978,6 +1994,13 @@ private static function startQueuedRestoreExecution( $initiator = auth()->user(); $initiator = $initiator instanceof User ? $initiator : null; + static::guardRestoreExecutionOperationalControl( + tenant: $tenant, + backupSet: $backupSet, + selectedItemIds: $selectedItemIds, + initiator: $initiator, + ); + $queuedRestoreRun = null; $dispatcher = function (OperationRun $run) use ( @@ -2097,6 +2120,58 @@ private static function startQueuedRestoreExecution( return [$result, $queuedRestoreRun?->refresh()]; } + /** + * @param array|null $selectedItemIds + */ + private static function guardRestoreExecutionOperationalControl( + Tenant $tenant, + BackupSet $backupSet, + ?array $selectedItemIds, + ?User $initiator, + ): void { + $workspace = $tenant->workspace; + + if (! $workspace instanceof Workspace) { + throw new \RuntimeException('Restore execution requires a workspace context.'); + } + + $decision = app(OperationalControlEvaluator::class)->evaluate('restore.execute', $workspace); + + if (! $decision->isPaused()) { + return; + } + + app(WorkspaceAuditLogger::class)->log( + workspace: $workspace, + action: AuditActionId::OperationalControlExecutionBlocked, + context: [ + 'metadata' => array_filter([ + 'control_key' => $decision->controlKey, + 'scope_type' => $decision->matchedScopeType, + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => $decision->reasonText, + 'expires_at' => $decision->expiresAt?->toIso8601String(), + 'actor_id' => $initiator?->getKey(), + 'backup_set_id' => (int) $backupSet->getKey(), + 'selected_item_count' => is_array($selectedItemIds) ? count($selectedItemIds) : null, + 'requested_scope' => 'restore.execute', + ], static fn (mixed $value): bool => $value !== null && $value !== ''), + ], + actor: $initiator, + status: 'blocked', + resourceType: 'operational_control', + resourceId: $decision->sourceActivationId !== null ? (string) $decision->sourceActivationId : null, + targetLabel: OperationCatalog::label('restore.execute'), + summary: 'Restore execution blocked by operational control', + tenant: $tenant, + ); + + throw OperationalControlBlockedException::forDecision( + decision: $decision, + actionLabel: OperationCatalog::label('restore.execute'), + ); + } + /** * @param array|null $selectedItemIds */ @@ -2529,16 +2604,26 @@ private static function rerunActionWithGate(): Actions\Action|BulkAction $metadata['rerun_of_restore_run_id'] = $record->id; - [$result, $newRun] = static::startQueuedRestoreExecution( - tenant: $tenant, - backupSet: $backupSet, - selectedItemIds: $selectedItemIds, - preview: $preview, - metadata: $metadata, - groupMapping: $groupMapping, - actorEmail: $actorEmail, - actorName: $actorName, - ); + try { + [$result, $newRun] = static::startQueuedRestoreExecution( + tenant: $tenant, + backupSet: $backupSet, + selectedItemIds: $selectedItemIds, + preview: $preview, + metadata: $metadata, + groupMapping: $groupMapping, + actorEmail: $actorEmail, + actorName: $actorName, + ); + } catch (OperationalControlBlockedException $exception) { + Notification::make() + ->title($exception->title()) + ->body($exception->getMessage()) + ->warning() + ->send(); + + return; + } if (in_array($result->status, ['started', 'deduped', 'scope_busy'], true)) { OpsUxBrowserEvents::dispatchRunEnqueued($livewire); diff --git a/apps/platform/app/Filament/System/Pages/Ops/Controls.php b/apps/platform/app/Filament/System/Pages/Ops/Controls.php new file mode 100644 index 00000000..a7c02a0e --- /dev/null +++ b/apps/platform/app/Filament/System/Pages/Ops/Controls.php @@ -0,0 +1,660 @@ +user(); + + if (! $user instanceof PlatformUser) { + return false; + } + + return $user->hasCapability(PlatformCapabilities::ACCESS_SYSTEM_PANEL) + && $user->hasCapability(PlatformCapabilities::OPS_CONTROLS_MANAGE); + } + + public function mount(): void + { + abort_unless(static::canAccess(), 403); + } + + public function getHeader(): ?View + { + return view('filament.system.pages.ops.partials.controls-header', [ + 'breadcrumbs' => filament()->hasBreadcrumbs() ? $this->getBreadcrumbs() : [], + 'heading' => $this->getHeading(), + 'subheading' => $this->getSubheading(), + ]); + } + + /** + * @return array + */ + protected function getHeaderActions(): array + { + return [ + $this->pauseRestoreExecuteAction(), + $this->resumeRestoreExecuteAction(), + $this->viewHistoryRestoreExecuteAction(), + ]; + } + + /** + * @return array> + */ + public function controlCards(): array + { + $catalog = app(OperationalControlCatalog::class); + + return array_map( + fn (string $controlKey): array => $this->controlSummary($controlKey), + $catalog->keys(), + ); + } + + /** + * @return array + */ + public function controlSummary(string $controlKey): array + { + $definition = app(OperationalControlCatalog::class)->definition($controlKey); + $activations = $this->activeActivationsForControl($controlKey); + + $effectiveState = $activations->isEmpty() ? 'enabled' : 'paused'; + $stateLabel = match (true) { + $activations->contains(fn (OperationalControlActivation $activation): bool => $activation->scope_type === 'global') => 'Paused globally', + $activations->isNotEmpty() => sprintf('Workspace pauses active (%d)', $activations->where('scope_type', 'workspace')->count()), + default => 'Enabled', + }; + + return [ + 'control_key' => $controlKey, + 'action_slug' => $this->actionSlug($controlKey), + 'label' => (string) $definition['label'], + 'effective_state' => $effectiveState, + 'state_label' => $stateLabel, + 'supported_scopes' => $definition['supported_scopes'], + 'affected_surfaces' => $definition['affected_surfaces'], + 'active_activations' => $activations + ->map(fn (OperationalControlActivation $activation): array => $this->activationSummary($activation)) + ->values() + ->all(), + 'history_count' => $this->recentAuditEventsForControl($controlKey)->count(), + ]; + } + + /** + * @return array{control_key: string, scope_type: string, workspace_id: ?int, workspace_count: int, tenant_count: int, summary: string} + */ + public function scopeImpactPreview(string $controlKey, string $scopeType, ?int $workspaceId): array + { + $label = app(OperationalControlCatalog::class)->label($controlKey); + + if ($scopeType === 'workspace') { + $workspace = is_int($workspaceId) + ? Workspace::query()->whereKey($workspaceId)->first() + : null; + + if (! $workspace instanceof Workspace) { + return [ + 'control_key' => $controlKey, + 'scope_type' => $scopeType, + 'workspace_id' => null, + 'workspace_count' => 0, + 'tenant_count' => 0, + 'summary' => 'Select a workspace to preview the scope impact.', + ]; + } + + $tenantCount = Tenant::query() + ->where('workspace_id', (int) $workspace->getKey()) + ->where('external_id', '!=', 'platform') + ->count(); + + return [ + 'control_key' => $controlKey, + 'scope_type' => $scopeType, + 'workspace_id' => (int) $workspace->getKey(), + 'workspace_count' => 1, + 'tenant_count' => $tenantCount, + 'summary' => sprintf('%s will affect workspace %s and %d %s.', $label, $workspace->name, $tenantCount, $tenantCount === 1 ? 'tenant' : 'tenants'), + ]; + } + + $tenantCount = Tenant::query() + ->where('external_id', '!=', 'platform') + ->count(); + + $workspaceCount = Tenant::query() + ->where('external_id', '!=', 'platform') + ->distinct('workspace_id') + ->count('workspace_id'); + + return [ + 'control_key' => $controlKey, + 'scope_type' => 'global', + 'workspace_id' => null, + 'workspace_count' => $workspaceCount, + 'tenant_count' => $tenantCount, + 'summary' => sprintf('%s will affect %d %s across %d %s.', $label, $workspaceCount, $workspaceCount === 1 ? 'workspace' : 'workspaces', $tenantCount, $tenantCount === 1 ? 'tenant' : 'tenants'), + ]; + } + + public function pauseRestoreExecuteAction(): Action + { + return $this->pauseActionFor('restore.execute'); + } + + public function resumeRestoreExecuteAction(): Action + { + return $this->resumeActionFor('restore.execute'); + } + + public function viewHistoryRestoreExecuteAction(): Action + { + return $this->historyActionFor('restore.execute'); + } + + private function pauseActionFor(string $controlKey): Action + { + $label = app(OperationalControlCatalog::class)->label($controlKey); + + return Action::make('pause_'.$this->actionSlug($controlKey)) + ->label('Pause '.$label) + ->icon('heroicon-o-pause') + ->color('danger') + ->requiresConfirmation() + ->modalHeading('Pause '.$label) + ->modalDescription('Review the scope impact, reason, and optional expiry before confirming this control change.') + ->form($this->pauseFormSchema($controlKey)) + ->action(function (array $data, AuditRecorder $auditRecorder, WorkspaceAuditLogger $workspaceAuditLogger) use ($controlKey, $label): void { + $actor = $this->controlsActor(); + [$scopeType, $workspace, $reasonText, $expiresAt] = $this->normalizePauseInput($data); + + $scopeQuery = $this->activationScopeQuery($controlKey, $scopeType, $workspace); + + (clone $scopeQuery) + ->whereNotNull('expires_at') + ->where('expires_at', '<=', now()) + ->delete(); + + $activation = (clone $scopeQuery)->notExpired()->first(); + $auditAction = $activation instanceof OperationalControlActivation + ? AuditActionId::OperationalControlUpdated + : AuditActionId::OperationalControlPaused; + + if ($activation instanceof OperationalControlActivation) { + $activation->fill([ + 'reason_text' => $reasonText, + 'expires_at' => $expiresAt, + 'updated_by_platform_user_id' => (int) $actor->getKey(), + ])->save(); + } else { + $activation = OperationalControlActivation::query()->create([ + 'control_key' => $controlKey, + 'scope_type' => $scopeType, + 'workspace_id' => $workspace instanceof Workspace ? (int) $workspace->getKey() : null, + 'reason_text' => $reasonText, + 'expires_at' => $expiresAt, + 'created_by_platform_user_id' => (int) $actor->getKey(), + ]); + } + + $this->recordControlMutation( + auditAction: $auditAction, + activation: $activation, + actor: $actor, + auditRecorder: $auditRecorder, + workspaceAuditLogger: $workspaceAuditLogger, + ); + + Notification::make() + ->title(sprintf('%s %s', $label, $auditAction === AuditActionId::OperationalControlPaused ? 'paused' : 'updated')) + ->success() + ->send(); + }); + } + + private function resumeActionFor(string $controlKey): Action + { + $label = app(OperationalControlCatalog::class)->label($controlKey); + + return Action::make('resume_'.$this->actionSlug($controlKey)) + ->label('Resume '.$label) + ->icon('heroicon-o-play') + ->color('gray') + ->requiresConfirmation() + ->modalHeading('Resume '.$label) + ->modalDescription('Remove the selected pause so new starts can proceed again.') + ->form($this->resumeFormSchema($controlKey)) + ->action(function (array $data, AuditRecorder $auditRecorder, WorkspaceAuditLogger $workspaceAuditLogger) use ($controlKey, $label): void { + $actor = $this->controlsActor(); + [$scopeType, $workspace] = $this->normalizeResumeInput($data); + + $activation = $this->activationScopeQuery($controlKey, $scopeType, $workspace) + ->notExpired() + ->first(); + + if (! $activation instanceof OperationalControlActivation) { + Notification::make() + ->title(sprintf('%s already enabled', $label)) + ->warning() + ->send(); + + return; + } + + $activationSnapshot = $activation->replicate(); + $activationSnapshot->forceFill($activation->getAttributes()); + $activation->delete(); + + $this->recordControlMutation( + auditAction: AuditActionId::OperationalControlResumed, + activation: $activationSnapshot, + actor: $actor, + auditRecorder: $auditRecorder, + workspaceAuditLogger: $workspaceAuditLogger, + ); + + Notification::make() + ->title($label.' resumed') + ->success() + ->send(); + }); + } + + private function historyActionFor(string $controlKey): Action + { + $label = app(OperationalControlCatalog::class)->label($controlKey); + + return Action::make('view_history_'.$this->actionSlug($controlKey)) + ->label('View '.$label.' history') + ->link() + ->modalHeading($label.' history') + ->modalSubmitAction(false) + ->modalCancelActionLabel('Close') + ->modalContent(fn () => view('filament.system.pages.ops.partials.operational-control-history', [ + 'events' => $this->recentAuditEventsForControl($controlKey), + 'label' => $label, + ])); + } + + /** + * @return array + */ + private function pauseFormSchema(string $controlKey): array + { + return [ + Radio::make('scope_type') + ->label('Scope') + ->options([ + 'global' => 'Global', + 'workspace' => 'One workspace', + ]) + ->default('global') + ->live() + ->required(), + + Select::make('workspace_id') + ->label('Workspace') + ->searchable() + ->visible(fn (callable $get): bool => $get('scope_type') === 'workspace') + ->required(fn (callable $get): bool => $get('scope_type') === 'workspace') + ->live() + ->getSearchResultsUsing(function (string $search): array { + return Workspace::query() + ->where('name', 'like', "%{$search}%") + ->orderBy('name') + ->limit(25) + ->pluck('name', 'id') + ->all(); + }) + ->getOptionLabelUsing(function ($value): ?string { + if (! is_numeric($value)) { + return null; + } + + return Workspace::query()->whereKey((int) $value)->value('name'); + }), + + Textarea::make('reason_text') + ->label('Reason') + ->required() + ->minLength(5) + ->maxLength(500) + ->rows(4), + + DateTimePicker::make('expires_at') + ->label('Expires at') + ->seconds(false) + ->nullable(), + + Placeholder::make('scope_preview') + ->label('Scope impact preview') + ->content(function (callable $get) use ($controlKey): string { + $preview = $this->scopeImpactPreview( + $controlKey, + (string) ($get('scope_type') ?? 'global'), + is_numeric($get('workspace_id')) ? (int) $get('workspace_id') : null, + ); + + return (string) $preview['summary']; + }), + ]; + } + + /** + * @return array + */ + private function resumeFormSchema(string $controlKey): array + { + return [ + Radio::make('scope_type') + ->label('Scope') + ->options([ + 'global' => 'Global', + 'workspace' => 'One workspace', + ]) + ->default('global') + ->live() + ->required(), + + Select::make('workspace_id') + ->label('Workspace') + ->searchable() + ->visible(fn (callable $get): bool => $get('scope_type') === 'workspace') + ->required(fn (callable $get): bool => $get('scope_type') === 'workspace') + ->getSearchResultsUsing(function (string $search): array { + return Workspace::query() + ->where('name', 'like', "%{$search}%") + ->orderBy('name') + ->limit(25) + ->pluck('name', 'id') + ->all(); + }) + ->getOptionLabelUsing(function ($value): ?string { + if (! is_numeric($value)) { + return null; + } + + return Workspace::query()->whereKey((int) $value)->value('name'); + }), + + Placeholder::make('scope_preview') + ->label('Resume impact preview') + ->content(function (callable $get) use ($controlKey): string { + $preview = $this->scopeImpactPreview( + $controlKey, + (string) ($get('scope_type') ?? 'global'), + is_numeric($get('workspace_id')) ? (int) $get('workspace_id') : null, + ); + + return (string) $preview['summary']; + }), + ]; + } + + private function controlsActor(): PlatformUser + { + $actor = auth('platform')->user(); + + if (! $actor instanceof PlatformUser) { + abort(403); + } + + if (! $actor->hasCapability(PlatformCapabilities::OPS_CONTROLS_MANAGE)) { + abort(403); + } + + return $actor; + } + + /** + * @return array{0: string, 1: ?Workspace, 2: string, 3: ?CarbonInterface} + */ + private function normalizePauseInput(array $data): array + { + [$scopeType, $workspace] = $this->resolveScopeInput($data); + $reasonText = trim((string) ($data['reason_text'] ?? '')); + + if ($reasonText === '') { + throw ValidationException::withMessages([ + 'reason_text' => 'A reason is required.', + ]); + } + + $expiresAt = null; + + if (filled($data['expires_at'] ?? null)) { + $expiresAt = Carbon::parse((string) $data['expires_at']); + + if ($expiresAt->lessThanOrEqualTo(now())) { + throw ValidationException::withMessages([ + 'expires_at' => 'Expiry must be in the future.', + ]); + } + } + + return [$scopeType, $workspace, $reasonText, $expiresAt]; + } + + /** + * @return array{0: string, 1: ?Workspace} + */ + private function normalizeResumeInput(array $data): array + { + return $this->resolveScopeInput($data); + } + + /** + * @return array{0: string, 1: ?Workspace} + */ + private function resolveScopeInput(array $data): array + { + $scopeType = (string) ($data['scope_type'] ?? 'global'); + + if (! in_array($scopeType, ['global', 'workspace'], true)) { + throw ValidationException::withMessages([ + 'scope_type' => 'Invalid scope selected.', + ]); + } + + if ($scopeType === 'global') { + return [$scopeType, null]; + } + + $workspaceId = $data['workspace_id'] ?? null; + + if (! is_numeric($workspaceId)) { + throw ValidationException::withMessages([ + 'workspace_id' => 'A workspace is required for workspace scope.', + ]); + } + + $workspace = Workspace::query()->whereKey((int) $workspaceId)->first(); + + if (! $workspace instanceof Workspace) { + throw ValidationException::withMessages([ + 'workspace_id' => 'The selected workspace could not be found.', + ]); + } + + return [$scopeType, $workspace]; + } + + private function activationScopeQuery(string $controlKey, string $scopeType, ?Workspace $workspace): \Illuminate\Database\Eloquent\Builder + { + $query = OperationalControlActivation::query() + ->forControl($controlKey) + ->where('scope_type', $scopeType); + + if ($scopeType === 'workspace') { + $query->where('workspace_id', (int) $workspace?->getKey()); + } else { + $query->whereNull('workspace_id'); + } + + return $query; + } + + private function recordControlMutation( + AuditActionId $auditAction, + OperationalControlActivation $activation, + PlatformUser $actor, + AuditRecorder $auditRecorder, + WorkspaceAuditLogger $workspaceAuditLogger, + ): void { + $label = app(OperationalControlCatalog::class)->label((string) $activation->control_key); + $summary = sprintf('%s %s', $label, match ($auditAction) { + AuditActionId::OperationalControlPaused => 'paused', + AuditActionId::OperationalControlUpdated => 'updated', + AuditActionId::OperationalControlResumed => 'resumed', + default => 'changed', + }); + + $metadata = array_filter([ + 'control_key' => (string) $activation->control_key, + 'scope_type' => (string) $activation->scope_type, + 'workspace_id' => is_numeric($activation->workspace_id) ? (int) $activation->workspace_id : null, + 'reason_text' => $activation->reason_text, + 'expires_at' => $activation->expires_at?->toIso8601String(), + 'actor_id' => (int) $actor->getKey(), + ], static fn (mixed $value): bool => $value !== null && $value !== ''); + + if ((string) $activation->scope_type === 'global') { + $auditRecorder->record( + action: $auditAction, + context: ['metadata' => $metadata], + actor: AuditActorSnapshot::platform($actor), + target: new AuditTargetSnapshot( + type: 'operational_control', + id: (string) $activation->getKey(), + label: $label, + ), + outcome: 'success', + summary: $summary, + ); + + return; + } + + $workspace = Workspace::query()->whereKey((int) $activation->workspace_id)->firstOrFail(); + + $workspaceAuditLogger->log( + workspace: $workspace, + action: $auditAction, + context: ['metadata' => $metadata], + actor: $actor, + status: 'success', + resourceType: 'operational_control', + resourceId: (string) $activation->getKey(), + targetLabel: $label, + summary: $summary, + ); + } + + /** + * @return Collection + */ + private function activeActivationsForControl(string $controlKey): Collection + { + return OperationalControlActivation::query() + ->forControl($controlKey) + ->notExpired() + ->with(['workspace', 'createdBy', 'updatedBy']) + ->orderByRaw("CASE WHEN scope_type = 'global' THEN 0 ELSE 1 END") + ->orderBy('workspace_id') + ->orderBy('id') + ->get(); + } + + /** + * @return array + */ + private function activationSummary(OperationalControlActivation $activation): array + { + $owner = $activation->updatedBy ?? $activation->createdBy; + $workspaceName = $activation->workspace?->name; + + return [ + 'id' => (int) $activation->getKey(), + 'scope_type' => (string) $activation->scope_type, + 'scope_label' => (string) $activation->scope_type === 'global' + ? 'Global' + : sprintf('Workspace: %s', $workspaceName ?? '#'.(int) $activation->workspace_id), + 'workspace_id' => is_numeric($activation->workspace_id) ? (int) $activation->workspace_id : null, + 'workspace_name' => $workspaceName, + 'reason_text' => (string) $activation->reason_text, + 'expires_at' => $activation->expires_at?->toIso8601String(), + 'expires_label' => $activation->expires_at?->diffForHumans() ?? 'No expiry', + 'owner_name' => $owner?->name ?: $owner?->email ?: 'Unknown operator', + ]; + } + + /** + * @return Collection + */ + private function recentAuditEventsForControl(string $controlKey): Collection + { + return AuditLog::query() + ->where('metadata->control_key', $controlKey) + ->whereIn('action', [ + AuditActionId::OperationalControlPaused->value, + AuditActionId::OperationalControlUpdated->value, + AuditActionId::OperationalControlResumed->value, + AuditActionId::OperationalControlExecutionBlocked->value, + ]) + ->latestFirst() + ->limit(10) + ->get(); + } + + private function actionSlug(string $controlKey): string + { + return str_replace('.', '_', $controlKey); + } +} \ No newline at end of file diff --git a/apps/platform/app/Filament/System/Pages/Ops/Runbooks.php b/apps/platform/app/Filament/System/Pages/Ops/Runbooks.php index 47a45401..89cc1c02 100644 --- a/apps/platform/app/Filament/System/Pages/Ops/Runbooks.php +++ b/apps/platform/app/Filament/System/Pages/Ops/Runbooks.php @@ -14,6 +14,7 @@ use App\Services\System\AllowedTenantUniverse; use App\Support\Auth\PlatformCapabilities; use App\Support\OpsUx\OperationUxPresenter; +use App\Support\OperationalControls\OperationalControlBlockedException; use App\Support\System\SystemOperationRunLinks; use Filament\Actions\Action; use Filament\Forms\Components\Radio; @@ -168,12 +169,22 @@ protected function getHeaderActions(): array 'reason_text' => $data['reason_text'] ?? null, ]); - $run = $runbookService->start( - scope: $scope, - initiator: $user, - reason: $reason, - source: 'system_ui', - ); + try { + $run = $runbookService->start( + scope: $scope, + initiator: $user, + reason: $reason, + source: 'system_ui', + ); + } catch (OperationalControlBlockedException $exception) { + Notification::make() + ->title($exception->title()) + ->body($exception->getMessage()) + ->warning() + ->send(); + + throw new \Filament\Support\Exceptions\Halt; + } $viewUrl = SystemOperationRunLinks::view($run); diff --git a/apps/platform/app/Models/OperationalControlActivation.php b/apps/platform/app/Models/OperationalControlActivation.php new file mode 100644 index 00000000..1f217e33 --- /dev/null +++ b/apps/platform/app/Models/OperationalControlActivation.php @@ -0,0 +1,73 @@ + */ + use HasFactory; + + protected $guarded = []; + + protected $casts = [ + 'expires_at' => 'datetime', + ]; + + protected static function newFactory(): OperationalControlActivationFactory + { + return OperationalControlActivationFactory::new(); + } + + public function workspace(): BelongsTo + { + return $this->belongsTo(Workspace::class); + } + + public function createdBy(): BelongsTo + { + return $this->belongsTo(PlatformUser::class, 'created_by_platform_user_id'); + } + + public function updatedBy(): BelongsTo + { + return $this->belongsTo(PlatformUser::class, 'updated_by_platform_user_id'); + } + + public function scopeForControl(Builder $query, string $controlKey): Builder + { + return $query->where('control_key', trim($controlKey)); + } + + public function scopeForGlobalScope(Builder $query): Builder + { + return $query->where('scope_type', 'global'); + } + + public function scopeForWorkspaceScope(Builder $query, int|Workspace $workspace): Builder + { + $workspaceId = $workspace instanceof Workspace + ? (int) $workspace->getKey() + : (int) $workspace; + + return $query + ->where('scope_type', 'workspace') + ->where('workspace_id', $workspaceId); + } + + public function scopeNotExpired(Builder $query): Builder + { + return $query->where(function (Builder $query): void { + $query + ->whereNull('expires_at') + ->orWhere('expires_at', '>', now()); + }); + } +} \ No newline at end of file diff --git a/apps/platform/app/Services/Audit/WorkspaceAuditLogger.php b/apps/platform/app/Services/Audit/WorkspaceAuditLogger.php index 71bd480e..958ebb86 100644 --- a/apps/platform/app/Services/Audit/WorkspaceAuditLogger.php +++ b/apps/platform/app/Services/Audit/WorkspaceAuditLogger.php @@ -6,6 +6,7 @@ use App\Models\Tenant; use App\Models\OperationRun; +use App\Models\PlatformUser; use App\Models\User; use App\Models\Workspace; use App\Support\Audit\AuditActionId; @@ -24,7 +25,7 @@ public function log( Workspace $workspace, string|AuditActionId $action, array $context = [], - ?User $actor = null, + User|PlatformUser|null $actor = null, string $status = 'success', ?string $resourceType = null, ?string $resourceId = null, @@ -37,14 +38,16 @@ public function log( ?int $operationRunId = null, ?Tenant $tenant = null, ): \App\Models\AuditLog { - $resolvedActor = $actor instanceof User - ? AuditActorSnapshot::human($actor) - : AuditActorSnapshot::fromLegacy( + $resolvedActor = match (true) { + $actor instanceof User => AuditActorSnapshot::human($actor), + $actor instanceof PlatformUser => AuditActorSnapshot::platform($actor), + default => AuditActorSnapshot::fromLegacy( type: $actorType ?? AuditActorType::infer($action instanceof AuditActionId ? $action->value : $action, $actorId, $actorEmail, $actorName, $context), id: $actorId, email: $actorEmail, label: $actorName, - ); + ), + }; return $this->auditRecorder->record( action: $action, @@ -71,7 +74,7 @@ public function logTenantLifecycleAction( Tenant $tenant, string|AuditActionId $action, array $context = [], - ?User $actor = null, + User|PlatformUser|null $actor = null, string $status = 'success', ?string $summary = null, ): \App\Models\AuditLog { @@ -96,7 +99,7 @@ public function logSupportDiagnosticsOpened( Tenant $tenant, string $contextType, array $bundle, - ?User $actor = null, + User|PlatformUser|null $actor = null, ?OperationRun $operationRun = null, ): \App\Models\AuditLog { $sectionCount = is_array($bundle['sections'] ?? null) ? count($bundle['sections']) : 0; diff --git a/apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php b/apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php index 37e8f735..8302ff85 100644 --- a/apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php +++ b/apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php @@ -10,15 +10,24 @@ use App\Models\OperationRun; use App\Models\PlatformUser; use App\Models\Tenant; +use App\Models\User; use App\Models\Workspace; use App\Notifications\OperationRunCompleted; use App\Services\Alerts\AlertDispatchService; +use App\Services\Audit\AuditRecorder; +use App\Services\Audit\WorkspaceAuditLogger; use App\Services\Auth\BreakGlassSession; use App\Services\Intune\AuditLogger; use App\Services\OperationRunService; use App\Services\System\AllowedTenantUniverse; +use App\Support\Audit\AuditActionId; +use App\Support\Audit\AuditActorSnapshot; +use App\Support\Audit\AuditTargetSnapshot; +use App\Support\OperationCatalog; use App\Support\OperationRunOutcome; use App\Support\OperationRunStatus; +use App\Support\OperationalControls\OperationalControlBlockedException; +use App\Support\OperationalControls\OperationalControlEvaluator; use App\Support\System\SystemOperationRunLinks; use Illuminate\Support\Facades\Cache; use Illuminate\Support\Facades\DB; @@ -35,6 +44,9 @@ public function __construct( private readonly OperationRunService $operationRunService, private readonly AuditLogger $auditLogger, private readonly AlertDispatchService $alertDispatchService, + private readonly OperationalControlEvaluator $operationalControls, + private readonly AuditRecorder $auditRecorder, + private readonly WorkspaceAuditLogger $workspaceAuditLogger, ) {} /** @@ -48,6 +60,7 @@ public function preflight(FindingsLifecycleBackfillScope $scope): array action: 'platform.ops.runbooks.preflight', scope: $scope, operationRunId: null, + initiator: null, context: [ 'preflight' => $result, ], @@ -58,7 +71,7 @@ public function preflight(FindingsLifecycleBackfillScope $scope): array public function start( FindingsLifecycleBackfillScope $scope, - ?PlatformUser $initiator, + User|PlatformUser|null $initiator, ?RunbookReason $reason, string $source, ): OperationRun { @@ -88,13 +101,41 @@ public function start( ]); } - $platformTenant = $this->platformTenant(); - $workspace = $platformTenant->workspace; + $workspace = null; + $tenant = null; + + if ($scope->isSingleTenant()) { + $tenant = Tenant::query()->whereKey((int) $scope->tenantId)->firstOrFail(); + $this->allowedTenantUniverse->ensureAllowed($tenant); + + $workspace = $tenant->workspace; + } else { + $platformTenant = $this->platformTenant(); + $workspace = $platformTenant->workspace; + } if (! $workspace instanceof Workspace) { throw new \RuntimeException('Platform tenant is missing its workspace.'); } + $decision = $this->operationalControls->evaluate(self::RUNBOOK_KEY, $workspace); + + if ($decision->isPaused()) { + $this->auditBlockedStart( + decision: $decision, + scope: $scope, + workspace: $workspace, + tenant: $tenant, + initiator: $initiator, + source: $source, + ); + + throw OperationalControlBlockedException::forDecision( + decision: $decision, + actionLabel: OperationCatalog::label(self::RUNBOOK_KEY), + ); + } + if ($scope->isAllTenants()) { $lockKey = sprintf('tenantpilot:runbooks:%s:workspace:%d', self::RUNBOOK_KEY, (int) $workspace->getKey()); $lock = Cache::lock($lockKey, 900); @@ -120,7 +161,7 @@ public function start( } return $this->startSingleTenant( - tenantId: (int) $scope->tenantId, + tenant: $tenant, initiator: $initiator, reason: $reason, preflight: $preflight, @@ -327,7 +368,7 @@ private function countDriftDuplicateConsolidations(Tenant $tenant): int private function startAllTenants( Workspace $workspace, - ?PlatformUser $initiator, + User|PlatformUser|null $initiator, ?RunbookReason $reason, array $preflight, string $source, @@ -349,7 +390,7 @@ private function startAllTenants( source: $source, isBreakGlassActive: $isBreakGlassActive, ), - initiator: null, + initiator: $initiator instanceof User ? $initiator : null, ); if ($initiator instanceof PlatformUser && $run->wasRecentlyCreated) { @@ -361,6 +402,7 @@ private function startAllTenants( action: 'platform.ops.runbooks.start', scope: FindingsLifecycleBackfillScope::allTenants(), operationRunId: (int) $run->getKey(), + initiator: $initiator, context: [ 'preflight' => $preflight, 'is_break_glass' => $isBreakGlassActive, @@ -382,15 +424,16 @@ private function startAllTenants( } private function startSingleTenant( - int $tenantId, - ?PlatformUser $initiator, + ?Tenant $tenant, + User|PlatformUser|null $initiator, ?RunbookReason $reason, array $preflight, string $source, bool $isBreakGlassActive, ): OperationRun { - $tenant = Tenant::query()->whereKey($tenantId)->firstOrFail(); - $this->allowedTenantUniverse->ensureAllowed($tenant); + if (! $tenant instanceof Tenant) { + throw new \RuntimeException('Target tenant is required for single-tenant runs.'); + } $run = $this->operationRunService->ensureRunWithIdentity( tenant: $tenant, @@ -408,7 +451,7 @@ private function startSingleTenant( source: $source, isBreakGlassActive: $isBreakGlassActive, ), - initiator: null, + initiator: $initiator instanceof User ? $initiator : null, ); if ($initiator instanceof PlatformUser && $run->wasRecentlyCreated) { @@ -420,6 +463,7 @@ private function startSingleTenant( action: 'platform.ops.runbooks.start', scope: FindingsLifecycleBackfillScope::singleTenant((int) $tenant->getKey()), operationRunId: (int) $run->getKey(), + initiator: $initiator, context: [ 'preflight' => $preflight, 'is_break_glass' => $isBreakGlassActive, @@ -458,7 +502,7 @@ private function platformTenant(): Tenant private function buildRunContext( int $workspaceId, FindingsLifecycleBackfillScope $scope, - ?PlatformUser $initiator, + User|PlatformUser|null $initiator, ?RunbookReason $reason, array $preflight, string $source, @@ -490,6 +534,12 @@ private function buildRunContext( 'name' => (string) $initiator->name, 'is_break_glass' => $isBreakGlassActive, ]; + } elseif ($initiator instanceof User) { + $context['tenant_initiator'] = [ + 'user_id' => (int) $initiator->getKey(), + 'email' => (string) $initiator->email, + 'name' => (string) $initiator->name, + ]; } return $context; @@ -514,23 +564,10 @@ private function auditSafely( string $action, FindingsLifecycleBackfillScope $scope, ?int $operationRunId, + User|PlatformUser|null $initiator, array $context = [], ): void { try { - $platformTenant = $this->platformTenant(); - - $actor = auth('platform')->user(); - - $actorId = null; - $actorEmail = null; - $actorName = null; - - if ($actor instanceof PlatformUser) { - $actorId = (int) $actor->getKey(); - $actorEmail = (string) $actor->email; - $actorName = (string) $actor->name; - } - $metadata = [ 'runbook_key' => self::RUNBOOK_KEY, 'scope' => $scope->mode, @@ -540,6 +577,37 @@ private function auditSafely( 'user_agent' => request()->userAgent(), ]; + if ($initiator instanceof User && $scope->isSingleTenant()) { + $tenant = Tenant::query()->whereKey((int) $scope->tenantId)->first(); + + if ($tenant instanceof Tenant) { + $this->auditLogger->log( + tenant: $tenant, + action: $action, + context: [ + 'metadata' => array_filter($metadata, static fn (mixed $value): bool => $value !== null), + ] + $context, + actorId: (int) $initiator->getKey(), + actorEmail: (string) $initiator->email, + actorName: (string) $initiator->name, + status: 'success', + resourceType: 'operation_run', + resourceId: $operationRunId !== null ? (string) $operationRunId : null, + ); + + return; + } + } + + $platformTenant = $this->platformTenant(); + $platformActor = $initiator instanceof PlatformUser + ? $initiator + : auth('platform')->user(); + + $actorId = $platformActor instanceof PlatformUser ? (int) $platformActor->getKey() : null; + $actorEmail = $platformActor instanceof PlatformUser ? (string) $platformActor->email : null; + $actorName = $platformActor instanceof PlatformUser ? (string) $platformActor->name : null; + $this->auditLogger->log( tenant: $platformTenant, action: $action, @@ -558,6 +626,68 @@ private function auditSafely( } } + private function auditBlockedStart( + \App\Support\OperationalControls\OperationalControlDecision $decision, + FindingsLifecycleBackfillScope $scope, + Workspace $workspace, + ?Tenant $tenant, + User|PlatformUser|null $initiator, + string $source, + ): void { + try { + $metadata = array_filter([ + 'control_key' => $decision->controlKey, + 'scope_type' => $decision->matchedScopeType, + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => $decision->reasonText, + 'expires_at' => $decision->expiresAt?->toIso8601String(), + 'actor_id' => $initiator instanceof User || $initiator instanceof PlatformUser ? (int) $initiator->getKey() : null, + 'requested_scope' => $scope->mode, + 'target_tenant_id' => $scope->tenantId, + 'source' => $source, + 'runbook_key' => self::RUNBOOK_KEY, + ], static fn (mixed $value): bool => $value !== null && $value !== ''); + + $summary = sprintf('%s blocked by operational control', OperationCatalog::label(self::RUNBOOK_KEY)); + + if ($scope->isAllTenants()) { + $this->auditRecorder->record( + action: AuditActionId::OperationalControlExecutionBlocked, + context: ['metadata' => $metadata], + actor: $initiator instanceof PlatformUser ? AuditActorSnapshot::platform($initiator) : null, + target: new AuditTargetSnapshot( + type: 'operational_control', + id: $decision->sourceActivationId, + label: OperationCatalog::label(self::RUNBOOK_KEY), + ), + outcome: 'blocked', + summary: $summary, + ); + + return; + } + + if (! $tenant instanceof Tenant) { + return; + } + + $this->workspaceAuditLogger->log( + workspace: $workspace, + action: AuditActionId::OperationalControlExecutionBlocked, + context: ['metadata' => $metadata], + actor: $initiator, + status: 'blocked', + resourceType: 'operational_control', + resourceId: $decision->sourceActivationId !== null ? (string) $decision->sourceActivationId : null, + targetLabel: OperationCatalog::label(self::RUNBOOK_KEY), + summary: $summary, + tenant: $tenant, + ); + } catch (Throwable) { + // Audit is fail-safe (must not crash runbooks). + } + } + private function notifyInitiatorSafely(OperationRun $run): void { try { diff --git a/apps/platform/app/Support/Audit/AuditActionId.php b/apps/platform/app/Support/Audit/AuditActionId.php index 094366f7..1e1f6140 100644 --- a/apps/platform/app/Support/Audit/AuditActionId.php +++ b/apps/platform/app/Support/Audit/AuditActionId.php @@ -100,6 +100,10 @@ enum AuditActionId: string case TenantTriageReviewMarkedFollowUpNeeded = 'tenant_triage_review.marked_follow_up_needed'; case SupportDiagnosticsOpened = 'support_diagnostics.opened'; + case OperationalControlPaused = 'operational_control.paused'; + case OperationalControlUpdated = 'operational_control.updated'; + case OperationalControlResumed = 'operational_control.resumed'; + case OperationalControlExecutionBlocked = 'operational_control.execution_blocked'; // Workspace selection / switch events (Spec 107). case WorkspaceAutoSelected = 'workspace.auto_selected'; @@ -237,6 +241,10 @@ private static function labels(): array self::TenantTriageReviewMarkedReviewed->value => 'Triage review marked reviewed', self::TenantTriageReviewMarkedFollowUpNeeded->value => 'Triage review marked follow-up needed', self::SupportDiagnosticsOpened->value => 'Support diagnostics opened', + self::OperationalControlPaused->value => 'Operational control paused', + self::OperationalControlUpdated->value => 'Operational control updated', + self::OperationalControlResumed->value => 'Operational control resumed', + self::OperationalControlExecutionBlocked->value => 'Operational control blocked execution', 'baseline.capture.started' => 'Baseline capture started', 'baseline.capture.completed' => 'Baseline capture completed', 'baseline.capture.failed' => 'Baseline capture failed', @@ -319,6 +327,10 @@ private static function summaries(): array self::TenantReviewExported->value => 'Tenant review exported', self::TenantReviewSuccessorCreated->value => 'Tenant review next cycle created', self::SupportDiagnosticsOpened->value => 'Support diagnostics opened', + self::OperationalControlPaused->value => 'Operational control paused', + self::OperationalControlUpdated->value => 'Operational control updated', + self::OperationalControlResumed->value => 'Operational control resumed', + self::OperationalControlExecutionBlocked->value => 'Operational control blocked execution', ]; } diff --git a/apps/platform/app/Support/Auth/PlatformCapabilities.php b/apps/platform/app/Support/Auth/PlatformCapabilities.php index ca7ced3c..e1575640 100644 --- a/apps/platform/app/Support/Auth/PlatformCapabilities.php +++ b/apps/platform/app/Support/Auth/PlatformCapabilities.php @@ -30,6 +30,8 @@ class PlatformCapabilities public const RUNBOOKS_FINDINGS_LIFECYCLE_BACKFILL = 'platform.runbooks.findings.lifecycle_backfill'; + public const OPS_CONTROLS_MANAGE = 'platform.ops.controls.manage'; + /** * @return array */ diff --git a/apps/platform/app/Support/OperationalControls/OperationalControlBlockedException.php b/apps/platform/app/Support/OperationalControls/OperationalControlBlockedException.php new file mode 100644 index 00000000..fd0db5ca --- /dev/null +++ b/apps/platform/app/Support/OperationalControls/OperationalControlBlockedException.php @@ -0,0 +1,31 @@ +reasonText ?? ''); + + parent::__construct($message !== '' + ? sprintf('%s is currently paused. %s', $actionLabel, $message) + : sprintf('%s is currently paused.', $actionLabel)); + } + + public static function forDecision(OperationalControlDecision $decision, string $actionLabel): self + { + return new self($decision, $actionLabel); + } + + public function title(): string + { + return sprintf('%s paused', $this->actionLabel); + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php b/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php new file mode 100644 index 00000000..414ea34f --- /dev/null +++ b/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php @@ -0,0 +1,56 @@ +, operation_types: array, affected_surfaces: array}> + */ + private const DEFINITIONS = [ + 'restore.execute' => [ + 'key' => 'restore.execute', + 'label' => 'Restore execution', + 'supported_scopes' => ['global', 'workspace'], + 'operation_types' => ['restore.execute'], + 'affected_surfaces' => ['tenant.restore_runs.create'], + ], + ]; + + /** + * @return array + */ + public function keys(): array + { + return array_keys(self::DEFINITIONS); + } + + /** + * @return array> + */ + public function definitions(): array + { + return self::DEFINITIONS; + } + + /** + * @return array{key: string, label: string, supported_scopes: array, operation_types: array, affected_surfaces: array} + */ + public function definition(string $controlKey): array + { + $controlKey = trim($controlKey); + + if (! array_key_exists($controlKey, self::DEFINITIONS)) { + throw new \InvalidArgumentException("Unknown operational control [{$controlKey}]."); + } + + return self::DEFINITIONS[$controlKey]; + } + + public function label(string $controlKey): string + { + return $this->definition($controlKey)['label']; + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/OperationalControls/OperationalControlDecision.php b/apps/platform/app/Support/OperationalControls/OperationalControlDecision.php new file mode 100644 index 00000000..14d0df90 --- /dev/null +++ b/apps/platform/app/Support/OperationalControls/OperationalControlDecision.php @@ -0,0 +1,81 @@ +effectiveState === 'enabled'; + } + + public function isPaused(): bool + { + return $this->effectiveState === 'paused'; + } + + public function hasWorkspaceScope(): bool + { + return $this->matchedScopeType === 'workspace' && $this->workspaceId !== null; + } + + public function scopeLabel(): string + { + return match ($this->matchedScopeType) { + 'global' => 'Global', + 'workspace' => $this->workspaceId !== null ? 'Workspace #'.$this->workspaceId : 'Workspace', + default => 'No active pause', + }; + } + + public function expiresAtIso8601(): ?string + { + return $this->expiresAt?->toIso8601String(); + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php b/apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php new file mode 100644 index 00000000..3dc65fdb --- /dev/null +++ b/apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php @@ -0,0 +1,63 @@ +catalog->definition($controlKey); + $workspaceId = $workspace instanceof Workspace + ? (int) $workspace->getKey() + : (is_int($workspace) ? $workspace : null); + + $globalActivation = OperationalControlActivation::query() + ->forControl($definition['key']) + ->forGlobalScope() + ->notExpired() + ->latest('id') + ->first(); + + if ($globalActivation instanceof OperationalControlActivation) { + return OperationalControlDecision::paused( + controlKey: $definition['key'], + matchedScopeType: 'global', + workspaceId: null, + reasonText: $globalActivation->reason_text, + expiresAt: $globalActivation->expires_at, + sourceActivationId: (int) $globalActivation->getKey(), + ); + } + + if ($workspaceId !== null) { + $workspaceActivation = OperationalControlActivation::query() + ->forControl($definition['key']) + ->forWorkspaceScope($workspaceId) + ->notExpired() + ->latest('id') + ->first(); + + if ($workspaceActivation instanceof OperationalControlActivation) { + return OperationalControlDecision::paused( + controlKey: $definition['key'], + matchedScopeType: 'workspace', + workspaceId: $workspaceId, + reasonText: $workspaceActivation->reason_text, + expiresAt: $workspaceActivation->expires_at, + sourceActivationId: (int) $workspaceActivation->getKey(), + ); + } + } + + return OperationalControlDecision::enabled($definition['key']); + } +} \ No newline at end of file diff --git a/apps/platform/config/tenantpilot.php b/apps/platform/config/tenantpilot.php index ce4edfa3..d4ef8f1c 100644 --- a/apps/platform/config/tenantpilot.php +++ b/apps/platform/config/tenantpilot.php @@ -149,9 +149,6 @@ ], ], ], - - 'allow_admin_maintenance_actions' => (bool) env('ALLOW_ADMIN_MAINTENANCE_ACTIONS', false), - 'supported_policy_types' => [ [ 'type' => 'deviceConfiguration', diff --git a/apps/platform/database/factories/OperationalControlActivationFactory.php b/apps/platform/database/factories/OperationalControlActivationFactory.php new file mode 100644 index 00000000..ee938560 --- /dev/null +++ b/apps/platform/database/factories/OperationalControlActivationFactory.php @@ -0,0 +1,55 @@ + + */ +class OperationalControlActivationFactory extends Factory +{ + protected $model = OperationalControlActivation::class; + + /** + * @return array + */ + public function definition(): array + { + return [ + 'control_key' => 'restore.execute', + 'scope_type' => 'global', + 'workspace_id' => null, + 'reason_text' => fake()->sentence(), + 'expires_at' => null, + 'created_by_platform_user_id' => PlatformUser::factory(), + 'updated_by_platform_user_id' => null, + ]; + } + + public function forControl(string $controlKey): static + { + return $this->state(fn (): array => [ + 'control_key' => $controlKey, + ]); + } + + public function forGlobalScope(): static + { + return $this->state(fn (): array => [ + 'scope_type' => 'global', + 'workspace_id' => null, + ]); + } + + public function workspaceScoped(): static + { + return $this->state(fn (): array => [ + 'scope_type' => 'workspace', + 'workspace_id' => Workspace::factory(), + ]); + } +} \ No newline at end of file diff --git a/apps/platform/database/migrations/2026_04_26_000000_create_operational_control_activations_table.php b/apps/platform/database/migrations/2026_04_26_000000_create_operational_control_activations_table.php new file mode 100644 index 00000000..291ea7f0 --- /dev/null +++ b/apps/platform/database/migrations/2026_04_26_000000_create_operational_control_activations_table.php @@ -0,0 +1,38 @@ +id(); + $table->string('control_key'); + $table->string('scope_type'); + $table->foreignId('workspace_id')->nullable()->constrained('workspaces')->cascadeOnDelete(); + $table->text('reason_text'); + $table->timestampTz('expires_at')->nullable(); + $table->foreignId('created_by_platform_user_id')->constrained('platform_users')->restrictOnDelete(); + $table->foreignId('updated_by_platform_user_id')->nullable()->constrained('platform_users')->nullOnDelete(); + $table->timestamps(); + + $table->index(['control_key', 'scope_type']); + $table->index(['workspace_id', 'control_key']); + $table->index('expires_at'); + }); + + DB::statement("CREATE UNIQUE INDEX operational_control_activations_global_unique ON operational_control_activations (control_key) WHERE scope_type = 'global'"); + DB::statement("CREATE UNIQUE INDEX operational_control_activations_workspace_unique ON operational_control_activations (control_key, workspace_id) WHERE scope_type = 'workspace' AND workspace_id IS NOT NULL"); + } + + public function down(): void + { + Schema::dropIfExists('operational_control_activations'); + } +}; \ No newline at end of file diff --git a/apps/platform/database/seeders/PlatformUserSeeder.php b/apps/platform/database/seeders/PlatformUserSeeder.php index 060fe037..3585c630 100644 --- a/apps/platform/database/seeders/PlatformUserSeeder.php +++ b/apps/platform/database/seeders/PlatformUserSeeder.php @@ -42,6 +42,7 @@ public function run(): void PlatformCapabilities::RUNBOOKS_VIEW, PlatformCapabilities::RUNBOOKS_RUN, PlatformCapabilities::RUNBOOKS_FINDINGS_LIFECYCLE_BACKFILL, + PlatformCapabilities::OPS_CONTROLS_MANAGE, ], 'is_active' => true, ], diff --git a/apps/platform/resources/views/filament/system/pages/ops/controls.blade.php b/apps/platform/resources/views/filament/system/pages/ops/controls.blade.php new file mode 100644 index 00000000..567dcf8e --- /dev/null +++ b/apps/platform/resources/views/filament/system/pages/ops/controls.blade.php @@ -0,0 +1,120 @@ +@php + $controls = $this->controlCards(); +@endphp + + +
+ +
+ + +
+

Runtime safety controls

+

+ Use these bounded operational controls to pause risky starts without hiding the underlying surface. Global pauses win over workspace-specific pauses. +

+
+
+
+ +
+ @foreach ($controls as $control) + + + {{ $control['label'] }} + + + + {{ implode(', ', $control['affected_surfaces']) }} + + + + + {{ $control['state_label'] }} + + + + @php + $pauseActionName = 'pause_'.$control['action_slug']; + $resumeActionName = 'resume_'.$control['action_slug']; + $historyActionName = 'view_history_'.$control['action_slug']; + @endphp + +
+
+ @foreach ($control['supported_scopes'] as $scope) + + {{ ucfirst($scope) }} + + @endforeach +
+ +
+ @if ($control['effective_state'] === 'paused') + + Resume + + @else + + Pause + + @endif + + + History + +
+ + @if ($control['active_activations'] !== []) +
+ @foreach ($control['active_activations'] as $activation) +
+
+ + {{ $activation['scope_label'] }} + + + + Owner: {{ $activation['owner_name'] }} + + + + {{ $activation['expires_label'] }} + +
+ +

+ {{ $activation['reason_text'] }} +

+
+ @endforeach +
+ @else +
+ No active pauses. New starts are currently enabled. +
+ @endif + +

+ Use the card actions to pause, resume, or inspect audit history for this control. +

+
+
+ @endforeach +
+
+
\ No newline at end of file diff --git a/apps/platform/resources/views/filament/system/pages/ops/partials/controls-header.blade.php b/apps/platform/resources/views/filament/system/pages/ops/partials/controls-header.blade.php new file mode 100644 index 00000000..1be4d1c7 --- /dev/null +++ b/apps/platform/resources/views/filament/system/pages/ops/partials/controls-header.blade.php @@ -0,0 +1,6 @@ + \ No newline at end of file diff --git a/apps/platform/resources/views/filament/system/pages/ops/partials/operational-control-history.blade.php b/apps/platform/resources/views/filament/system/pages/ops/partials/operational-control-history.blade.php new file mode 100644 index 00000000..e6813e71 --- /dev/null +++ b/apps/platform/resources/views/filament/system/pages/ops/partials/operational-control-history.blade.php @@ -0,0 +1,29 @@ +
+ @if ($events->isEmpty()) +

+ No audit history exists yet for {{ $label }}. +

+ @else + @foreach ($events as $event) +
+
+ + {{ \App\Support\Audit\AuditActionId::labelFor((string) $event->action) }} + + + + {{ $event->recorded_at?->diffForHumans() ?? 'Unknown time' }} + + + + {{ $event->actorDisplayLabel() }} + +
+ +

+ {{ $event->summaryText() }} +

+
+ @endforeach + @endif +
\ No newline at end of file diff --git a/apps/platform/tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php b/apps/platform/tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php index 5309f923..de5815ba 100644 --- a/apps/platform/tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php +++ b/apps/platform/tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php @@ -9,12 +9,13 @@ uses(RefreshDatabase::class); -it('does not expose maintenance actions in /admin findings list by default', function () { +it('exposes the findings lifecycle backfill action for entitled tenant operators', function () { [$user, $tenant] = createUserWithTenant(role: 'owner'); $this->actingAs($user); Filament::setTenant($tenant, true); Livewire::test(ListFindings::class) - ->assertActionDoesNotExist('backfill_lifecycle'); + ->assertActionExists('backfill_lifecycle') + ->assertActionEnabled('backfill_lifecycle'); }); diff --git a/apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php b/apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php new file mode 100644 index 00000000..89545345 --- /dev/null +++ b/apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php @@ -0,0 +1,101 @@ +create([ + 'tenant_id' => (int) $tenant->getKey(), + 'due_at' => null, + ]); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'findings.lifecycle.backfill', + 'workspace_id' => (int) $tenant->workspace_id, + 'reason_text' => 'Workspace-specific pause.', + ]); + + $this->actingAs($user); + Filament::setTenant($tenant, true); + + Livewire::test(ListFindings::class) + ->assertActionExists('backfill_lifecycle') + ->assertActionEnabled('backfill_lifecycle') + ->callAction('backfill_lifecycle') + ->assertNotified('Findings lifecycle backfill paused'); + + expect(OperationRun::query()->where('type', 'findings.lifecycle.backfill')->count())->toBe(0); + + $audit = AuditLog::query() + ->where('action', AuditActionId::OperationalControlExecutionBlocked->value) + ->latest('id') + ->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->workspace_id)->toBe((int) $tenant->workspace_id) + ->and($audit?->tenant_id)->toBe((int) $tenant->getKey()) + ->and($audit?->status)->toBe('blocked') + ->and($audit?->metadata['control_key'] ?? null)->toBe('findings.lifecycle.backfill') + ->and($audit?->metadata['workspace_id'] ?? null)->toBe((int) $tenant->workspace_id); +}); + +it('does not block findings backfill for a different workspace when the pause is workspace-scoped', function (): void { + Queue::fake(); + + [$blockedUser, $blockedTenant] = createUserWithTenant(role: 'owner'); + [$allowedUser, $allowedTenant] = createUserWithTenant(role: 'owner'); + + Finding::factory()->create([ + 'tenant_id' => (int) $allowedTenant->getKey(), + 'due_at' => null, + ]); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'findings.lifecycle.backfill', + 'workspace_id' => (int) $blockedTenant->workspace_id, + 'reason_text' => 'Paused only for the blocked workspace.', + ]); + + $this->actingAs($allowedUser); + Filament::setTenant($allowedTenant, true); + + Livewire::test(ListFindings::class) + ->assertActionExists('backfill_lifecycle') + ->assertActionEnabled('backfill_lifecycle') + ->callAction('backfill_lifecycle'); + + $run = OperationRun::query() + ->where('type', 'findings.lifecycle.backfill') + ->where('tenant_id', (int) $allowedTenant->getKey()) + ->latest('id') + ->first(); + + expect($run)->not->toBeNull(); + + Queue::assertPushed(BackfillFindingLifecycleJob::class, function (BackfillFindingLifecycleJob $job) use ($allowedTenant): bool { + return $job->tenantId === (int) $allowedTenant->getKey() + && $job->workspaceId === (int) $allowedTenant->workspace_id; + }); + + expect(AuditLog::query() + ->where('action', AuditActionId::OperationalControlExecutionBlocked->value) + ->where('tenant_id', (int) $allowedTenant->getKey()) + ->exists())->toBeFalse(); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php b/apps/platform/tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php new file mode 100644 index 00000000..ee22abed --- /dev/null +++ b/apps/platform/tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php @@ -0,0 +1,69 @@ + $root.'/app/Filament/Resources/FindingResource/Pages/ListFindings.php', + 'required' => [ + 'FindingsLifecycleBackfillRunbookService', + 'OperationalControlBlockedException', + 'FindingsLifecycleBackfillScope::singleTenant(', + ], + 'forbidden' => [ + "config('tenantpilot.allow_admin_maintenance_actions'", + 'allow_admin_maintenance_actions', + 'OperationalControlActivation::', + ], + ], + [ + 'file' => $root.'/app/Filament/System/Pages/Ops/Runbooks.php', + 'required' => [ + 'FindingsLifecycleBackfillRunbookService', + 'OperationalControlBlockedException', + '$runbookService->start(', + ], + 'forbidden' => [ + 'OperationalControlActivation::', + "config('tenantpilot.allow_admin_maintenance_actions'", + ], + ], + [ + 'file' => $root.'/app/Filament/Resources/RestoreRunResource.php', + 'required' => [ + 'guardRestoreExecutionOperationalControl(', + 'OperationalControlEvaluator::class', + 'OperationalControlBlockedException', + ], + 'forbidden' => [ + 'OperationalControlActivation::', + "config('tenantpilot.allow_admin_maintenance_actions'", + ], + ], + [ + 'file' => $root.'/config/tenantpilot.php', + 'required' => [], + 'forbidden' => [ + 'allow_admin_maintenance_actions', + 'ALLOW_ADMIN_MAINTENANCE_ACTIONS', + ], + ], + ]; + + foreach ($checks as $check) { + $source = SourceFileScanner::read($check['file']); + + foreach ($check['required'] as $needle) { + expect($source)->toContain($needle); + } + + foreach ($check['forbidden'] as $needle) { + expect($source)->not->toContain($needle); + } + } +})->group('surface-guard'); \ No newline at end of file diff --git a/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php b/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php new file mode 100644 index 00000000..6173e288 --- /dev/null +++ b/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php @@ -0,0 +1,133 @@ +create([ + 'tenant_id' => fake()->uuid(), + 'name' => 'Authorization Tenant', + 'rbac_status' => 'ok', + 'rbac_last_checked_at' => now(), + ]); + + $tenant->makeCurrent(); + ensureDefaultProviderConnection($tenant, 'microsoft'); + + $policy = Policy::create([ + 'tenant_id' => $tenant->id, + 'external_id' => fake()->uuid(), + 'policy_type' => 'deviceConfiguration', + 'display_name' => 'Authorization Restore Policy', + 'platform' => 'windows', + ]); + + $backupSet = BackupSet::create([ + 'tenant_id' => $tenant->id, + 'name' => 'Authorization Backup', + 'status' => 'completed', + 'item_count' => 1, + ]); + + $backupItem = BackupItem::create([ + 'tenant_id' => $tenant->id, + 'backup_set_id' => $backupSet->id, + 'policy_id' => $policy->id, + 'policy_identifier' => $policy->external_id, + 'policy_type' => $policy->policy_type, + 'platform' => $policy->platform, + 'payload' => ['id' => $policy->external_id], + 'metadata' => ['displayName' => 'Authorization Restore Policy'], + ]); + + Filament::setTenant($tenant, true); + + return [$tenant, $backupSet, $backupItem]; +} + +it('keeps non-members at 404 even when restore execution is paused', function (): void { + [$tenant] = seedRestoreAuthorizationContext(); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $tenant->workspace_id, + 'reason_text' => 'Paused while access is under review.', + ]); + + $user = User::factory()->create(); + + $this->actingAs($user) + ->get(RestoreRunResource::getUrl('create', panel: 'tenant', tenant: $tenant)) + ->assertNotFound(); +}); + +it('keeps members without tenant-manage at 403 even when restore execution is paused', function (): void { + [$tenant] = seedRestoreAuthorizationContext(); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $tenant->workspace_id, + 'reason_text' => 'Paused while access is under review.', + ]); + + [$user] = createUserWithTenant(tenant: $tenant, role: 'operator'); + + $this->actingAs($user) + ->get(RestoreRunResource::getUrl('create', panel: 'tenant', tenant: $tenant)) + ->assertForbidden(); +}); + +it('shows paused-state feedback only to entitled users blocked by an operational control', function (): void { + [$tenant, $backupSet, $backupItem] = seedRestoreAuthorizationContext(); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $tenant->workspace_id, + 'reason_text' => 'Paused for tenant-safe validation.', + ]); + + [$user] = createUserWithTenant(tenant: $tenant, role: 'owner'); + + $this->actingAs($user); + + Livewire::test(CreateRestoreRun::class) + ->fillForm([ + 'backup_set_id' => $backupSet->id, + ]) + ->goToNextWizardStep() + ->fillForm([ + 'scope_mode' => 'selected', + 'backup_item_ids' => [$backupItem->id], + ]) + ->goToNextWizardStep() + ->callFormComponentAction('check_results', 'run_restore_checks') + ->goToNextWizardStep() + ->callFormComponentAction('preview_diffs', 'run_restore_preview') + ->goToNextWizardStep() + ->fillForm([ + 'is_dry_run' => false, + 'acknowledged_impact' => true, + 'tenant_confirm' => 'Authorization Tenant', + ]) + ->call('create') + ->assertNotified('Restore execution paused'); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php b/apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php new file mode 100644 index 00000000..5444396a --- /dev/null +++ b/apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php @@ -0,0 +1,261 @@ +create(['name' => 'Restore Workspace']); + + $tenant = Tenant::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'tenant_id' => fake()->uuid(), + 'name' => 'Restore Tenant', + 'rbac_status' => 'ok', + 'rbac_last_checked_at' => now(), + ]); + + $tenant->makeCurrent(); + + if ($withProviderConnection) { + ensureDefaultProviderConnection($tenant, 'microsoft'); + } + + $policy = Policy::create([ + 'tenant_id' => $tenant->id, + 'external_id' => fake()->uuid(), + 'policy_type' => 'deviceConfiguration', + 'display_name' => 'Restore Policy', + 'platform' => 'windows', + ]); + + $backupSet = BackupSet::create([ + 'tenant_id' => $tenant->id, + 'name' => 'Restore Backup', + 'status' => 'completed', + 'item_count' => 1, + ]); + + $backupItem = BackupItem::create([ + 'tenant_id' => $tenant->id, + 'backup_set_id' => $backupSet->id, + 'policy_id' => $policy->id, + 'policy_identifier' => $policy->external_id, + 'policy_type' => $policy->policy_type, + 'platform' => $policy->platform, + 'payload' => ['id' => $policy->external_id], + 'metadata' => ['displayName' => 'Restore Policy'], + ]); + + $user = User::factory()->create([ + 'email' => fake()->unique()->safeEmail(), + 'name' => 'Restore Operator', + ]); + + $user->tenants()->syncWithoutDetaching([ + $tenant->getKey() => ['role' => 'owner'], + ]); + + Filament::setTenant($tenant, true); + + return [$tenant, $backupSet, $backupItem, $user, $workspace]; +} + +it('blocks restore execution before any operation run, restore run, job, or provider start is created', function (): void { + Bus::fake(); + + [$tenant, $backupSet, $backupItem, $user] = seedOperationalRestoreExecutionContext(); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $tenant->workspace_id, + 'reason_text' => 'Paused during restore safety review.', + ]); + + $this->actingAs($user); + + Livewire::test(CreateRestoreRun::class) + ->fillForm([ + 'backup_set_id' => $backupSet->id, + ]) + ->goToNextWizardStep() + ->fillForm([ + 'scope_mode' => 'selected', + 'backup_item_ids' => [$backupItem->id], + ]) + ->goToNextWizardStep() + ->callFormComponentAction('check_results', 'run_restore_checks') + ->goToNextWizardStep() + ->callFormComponentAction('preview_diffs', 'run_restore_preview') + ->goToNextWizardStep() + ->fillForm([ + 'is_dry_run' => false, + 'acknowledged_impact' => true, + 'tenant_confirm' => 'Restore Tenant', + ]) + ->call('create') + ->assertNotified('Restore execution paused'); + + expect(RestoreRun::query()->count())->toBe(0) + ->and(OperationRun::query()->where('type', 'restore.execute')->count())->toBe(0); + + $audit = AuditLog::query() + ->where('action', AuditActionId::OperationalControlExecutionBlocked->value) + ->latest('id') + ->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->workspace_id)->toBe((int) $tenant->workspace_id) + ->and($audit?->tenant_id)->toBe((int) $tenant->getKey()) + ->and($audit?->status)->toBe('blocked') + ->and($audit?->metadata['control_key'] ?? null)->toBe('restore.execute'); + + Bus::assertNotDispatched(ExecuteRestoreRunJob::class); +}); + +it('does not retroactively mutate already accepted restore execution runs when a later pause is activated', function (): void { + [$tenant, $backupSet, $backupItem, $user, $workspace] = seedOperationalRestoreExecutionContext(withProviderConnection: false); + + $operationRun = OperationRun::factory() + ->forTenant($tenant) + ->queued() + ->create([ + 'type' => 'restore.execute', + 'outcome' => 'pending', + 'initiator_name' => $user->name, + 'context' => [ + 'backup_set_id' => (int) $backupSet->getKey(), + 'target_scope' => ['entra_tenant_id' => $tenant->graphTenantId()], + ], + ]); + + $restoreRun = RestoreRun::query()->create([ + 'tenant_id' => (int) $tenant->getKey(), + 'backup_set_id' => (int) $backupSet->getKey(), + 'operation_run_id' => (int) $operationRun->getKey(), + 'requested_by' => $user->email, + 'is_dry_run' => false, + 'status' => RestoreRunStatus::Queued->value, + 'idempotency_key' => 'accepted-before-pause', + 'requested_items' => [(int) $backupItem->getKey()], + 'preview' => ['summary' => []], + 'metadata' => ['confirmed_by' => $user->email], + 'group_mapping' => [], + ]); + + $platformUser = PlatformUser::factory()->create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + PlatformCapabilities::OPS_CONTROLS_MANAGE, + ], + 'is_active' => true, + ]); + + Filament::setCurrentPanel('system'); + Filament::bootCurrentPanel(); + $this->actingAs($platformUser, 'platform'); + + Livewire::test(Controls::class) + ->callAction('pause_restore_execute', data: [ + 'scope_type' => 'workspace', + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => 'Pause after the run was already accepted.', + 'expires_at' => now()->addHour()->toDateTimeString(), + ]) + ->assertNotified('Restore execution paused'); + + expect($operationRun->fresh()) + ->not->toBeNull() + ->and($operationRun->fresh()?->status)->toBe('queued') + ->and($operationRun->fresh()?->outcome)->toBe('pending'); + + expect($restoreRun->fresh()) + ->not->toBeNull() + ->and($restoreRun->fresh()?->status)->toBe(RestoreRunStatus::Queued->value) + ->and((int) ($restoreRun->fresh()?->operation_run_id ?? 0))->toBe((int) $operationRun->getKey()); +}); + +it('does not block restore execution for a different workspace when the pause is workspace-scoped', function (): void { + Bus::fake(); + + $blockedWorkspace = Workspace::factory()->create(['name' => 'Blocked Workspace']); + $allowedWorkspace = Workspace::factory()->create(['name' => 'Allowed Workspace']); + + [$blockedTenant] = seedOperationalRestoreExecutionContext(workspace: $blockedWorkspace); + [$allowedTenant, $backupSet, $backupItem, $user] = seedOperationalRestoreExecutionContext(workspace: $allowedWorkspace); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $blockedTenant->workspace_id, + 'reason_text' => 'Paused only for the blocked workspace.', + ]); + + $this->actingAs($user); + Filament::setTenant($allowedTenant, true); + + Livewire::test(CreateRestoreRun::class) + ->fillForm([ + 'backup_set_id' => $backupSet->id, + ]) + ->goToNextWizardStep() + ->fillForm([ + 'scope_mode' => 'selected', + 'backup_item_ids' => [$backupItem->id], + ]) + ->goToNextWizardStep() + ->callFormComponentAction('check_results', 'run_restore_checks') + ->goToNextWizardStep() + ->callFormComponentAction('preview_diffs', 'run_restore_preview') + ->goToNextWizardStep() + ->fillForm([ + 'is_dry_run' => false, + 'acknowledged_impact' => true, + 'tenant_confirm' => 'Restore Tenant', + ]) + ->call('create') + ->assertHasNoFormErrors(); + + $restoreRun = RestoreRun::query() + ->where('tenant_id', (int) $allowedTenant->getKey()) + ->latest('id') + ->first(); + + $operationRun = OperationRun::query() + ->where('tenant_id', (int) $allowedTenant->getKey()) + ->where('type', 'restore.execute') + ->latest('id') + ->first(); + + expect($restoreRun)->not->toBeNull() + ->and($operationRun)->not->toBeNull(); + + Bus::assertDispatched(ExecuteRestoreRunJob::class); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php b/apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php new file mode 100644 index 00000000..26248450 --- /dev/null +++ b/apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php @@ -0,0 +1,243 @@ +create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + PlatformCapabilities::OPS_CONTROLS_MANAGE, + ], + 'is_active' => true, + ]); +} + +it('returns 403 for platform users missing the operational controls capability', function (): void { + $user = PlatformUser::factory()->create([ + 'capabilities' => [PlatformCapabilities::ACCESS_SYSTEM_PANEL], + 'is_active' => true, + ]); + + $this->actingAs($user, 'platform') + ->get(Controls::getUrl(panel: 'system')) + ->assertForbidden(); +}); + +it('renders compact card actions and only shows the action that matches the current control state', function (): void { + $user = makeControlsManager(); + $this->actingAs($user, 'platform'); + + $this->get(Controls::getUrl(panel: 'system')) + ->assertSuccessful() + ->assertSee("mountAction('pause_restore_execute')", escape: false) + ->assertDontSee('Findings lifecycle backfill') + ->assertDontSee("mountAction('pause_findings_lifecycle_backfill')", escape: false) + ->assertDontSee("mountAction('resume_findings_lifecycle_backfill')", escape: false) + ->assertDontSee("mountAction('view_history_findings_lifecycle_backfill')", escape: false) + ->assertDontSee('Pause Restore execution') + ->assertDontSee('Resume Restore execution'); + + OperationalControlActivation::factory()->forGlobalScope()->create([ + 'control_key' => 'restore.execute', + 'reason_text' => 'Paused for compact action rendering coverage.', + ]); + + $this->get(Controls::getUrl(panel: 'system')) + ->assertSuccessful() + ->assertSee("mountAction('resume_restore_execute')", escape: false) + ->assertDontSee("mountAction('pause_restore_execute')", escape: false) + ->assertDontSee('Findings lifecycle backfill'); +}); + +it('previews, pauses, updates, resumes, and exposes on-demand history for restore execution', function (): void { + $workspaceA = Workspace::factory()->create(['name' => 'Acme']); + $workspaceB = Workspace::factory()->create(['name' => 'Bravo']); + + Tenant::factory()->count(2)->create(['workspace_id' => (int) $workspaceA->getKey()]); + Tenant::factory()->count(1)->create(['workspace_id' => (int) $workspaceB->getKey()]); + + $user = makeControlsManager(); + $this->actingAs($user, 'platform'); + + $component = Livewire::test(Controls::class) + ->assertActionExists('pause_restore_execute', fn (Action $action): bool => $action->isConfirmationRequired()) + ->assertActionExists('resume_restore_execute', fn (Action $action): bool => $action->isConfirmationRequired()) + ->assertActionExists('view_history_restore_execute', fn (Action $action): bool => $action->getLabel() === 'View Restore execution history'); + + $preview = $component->instance()->scopeImpactPreview('restore.execute', 'global', null); + + expect($preview['workspace_count'])->toBe(2) + ->and($preview['tenant_count'])->toBe(3) + ->and($preview['summary'])->toContain('2 workspaces') + ->and($preview['summary'])->toContain('3 tenants'); + + $component + ->callAction('pause_restore_execute', data: [ + 'scope_type' => 'global', + 'reason_text' => 'Paused for incident review.', + 'expires_at' => now()->addDay()->toDateTimeString(), + ]) + ->assertNotified('Restore execution paused'); + + $activation = OperationalControlActivation::query() + ->forControl('restore.execute') + ->forGlobalScope() + ->first(); + + expect($activation)->not->toBeNull() + ->and($activation?->reason_text)->toBe('Paused for incident review.'); + + $summary = $component->instance()->controlSummary('restore.execute'); + + expect($summary['effective_state'])->toBe('paused') + ->and($summary['active_activations'])->toHaveCount(1) + ->and($summary['active_activations'][0]['owner_name'])->toBe($user->name); + + $component + ->callAction('pause_restore_execute', data: [ + 'scope_type' => 'global', + 'reason_text' => 'Updated incident review scope.', + 'expires_at' => now()->addDays(2)->toDateTimeString(), + ]) + ->assertNotified('Restore execution updated'); + + expect($activation?->fresh()?->reason_text)->toBe('Updated incident review scope.'); + + $component + ->callAction('resume_restore_execute', data: [ + 'scope_type' => 'global', + ]) + ->assertNotified('Restore execution resumed'); + + expect(OperationalControlActivation::query() + ->forControl('restore.execute') + ->forGlobalScope() + ->count())->toBe(0); + + $audits = AuditLog::query() + ->whereIn('action', [ + AuditActionId::OperationalControlPaused->value, + AuditActionId::OperationalControlUpdated->value, + AuditActionId::OperationalControlResumed->value, + ]) + ->where('metadata->control_key', 'restore.execute') + ->orderBy('id') + ->get(); + + expect($audits)->toHaveCount(3) + ->and($audits->pluck('workspace_id')->unique()->all())->toBe([null]) + ->and($audits->pluck('tenant_id')->unique()->all())->toBe([null]) + ->and($audits[0]->action)->toBe(AuditActionId::OperationalControlPaused->value) + ->and($audits[1]->action)->toBe(AuditActionId::OperationalControlUpdated->value) + ->and($audits[2]->action)->toBe(AuditActionId::OperationalControlResumed->value); + + $component + ->mountAction('view_history_restore_execute') + ->assertActionMounted('view_history_restore_execute'); +}); + +it('supports workspace-scoped pauses and removes expired conflicting activations before replacement writes', function (): void { + $workspaceA = Workspace::factory()->create(['name' => 'Acme']); + $workspaceB = Workspace::factory()->create(['name' => 'Bravo']); + + Tenant::factory()->count(2)->create(['workspace_id' => (int) $workspaceA->getKey()]); + Tenant::factory()->count(1)->create(['workspace_id' => (int) $workspaceB->getKey()]); + + $expired = OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $workspaceA->getKey(), + 'reason_text' => 'Expired pause.', + 'expires_at' => now()->subHour(), + ]); + + $user = makeControlsManager(); + $this->actingAs($user, 'platform'); + + $component = Livewire::test(Controls::class) + ->assertActionExists('pause_restore_execute', fn (Action $action): bool => $action->isConfirmationRequired()) + ->assertActionExists('resume_restore_execute', fn (Action $action): bool => $action->isConfirmationRequired()); + + $preview = $component->instance()->scopeImpactPreview('restore.execute', 'workspace', (int) $workspaceA->getKey()); + + expect($preview['workspace_count'])->toBe(1) + ->and($preview['tenant_count'])->toBe(2) + ->and($preview['summary'])->toContain('Acme'); + + $component + ->callAction('pause_restore_execute', data: [ + 'scope_type' => 'workspace', + 'workspace_id' => (int) $workspaceA->getKey(), + 'reason_text' => 'Paused for workspace restore maintenance.', + 'expires_at' => now()->addDay()->toDateTimeString(), + ]) + ->assertNotified('Restore execution paused'); + + expect(OperationalControlActivation::query()->whereKey((int) $expired->getKey())->exists())->toBeFalse(); + + $activation = OperationalControlActivation::query() + ->forControl('restore.execute') + ->forWorkspaceScope((int) $workspaceA->getKey()) + ->notExpired() + ->first(); + + expect($activation)->not->toBeNull() + ->and((int) ($activation?->workspace_id ?? 0))->toBe((int) $workspaceA->getKey()); + + $component + ->callAction('pause_restore_execute', data: [ + 'scope_type' => 'workspace', + 'workspace_id' => (int) $workspaceA->getKey(), + 'reason_text' => 'Updated workspace restore maintenance.', + 'expires_at' => now()->addDays(3)->toDateTimeString(), + ]) + ->assertNotified('Restore execution updated'); + + expect($activation?->fresh()?->reason_text)->toBe('Updated workspace restore maintenance.'); + + $component + ->callAction('resume_restore_execute', data: [ + 'scope_type' => 'workspace', + 'workspace_id' => (int) $workspaceA->getKey(), + ]) + ->assertNotified('Restore execution resumed'); + + expect(OperationalControlActivation::query() + ->forControl('restore.execute') + ->forWorkspaceScope((int) $workspaceA->getKey()) + ->count())->toBe(0); + + $audits = AuditLog::query() + ->whereIn('action', [ + AuditActionId::OperationalControlPaused->value, + AuditActionId::OperationalControlUpdated->value, + AuditActionId::OperationalControlResumed->value, + ]) + ->where('metadata->control_key', 'restore.execute') + ->orderBy('id') + ->get(); + + expect($audits)->toHaveCount(3) + ->and($audits[0]->workspace_id)->toBe((int) $workspaceA->getKey()) + ->and($audits[0]->tenant_id)->toBeNull(); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php b/apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php new file mode 100644 index 00000000..3c61ef99 --- /dev/null +++ b/apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php @@ -0,0 +1,89 @@ +create([ + 'tenant_id' => null, + 'external_id' => 'platform', + 'name' => 'Platform', + ]); +}); + +it('blocks all-tenant findings lifecycle runbooks when the control is globally paused', function (): void { + Queue::fake(); + + $platformTenant = Tenant::query()->where('external_id', 'platform')->firstOrFail(); + + $tenant = Tenant::factory()->create([ + 'workspace_id' => (int) $platformTenant->workspace_id, + ]); + + Finding::factory()->create([ + 'tenant_id' => (int) $tenant->getKey(), + 'due_at' => null, + ]); + + OperationalControlActivation::factory()->forGlobalScope()->create([ + 'control_key' => 'findings.lifecycle.backfill', + 'reason_text' => 'Paused during incident response.', + ]); + + $user = PlatformUser::factory()->create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + PlatformCapabilities::OPS_VIEW, + PlatformCapabilities::RUNBOOKS_VIEW, + PlatformCapabilities::RUNBOOKS_RUN, + PlatformCapabilities::RUNBOOKS_FINDINGS_LIFECYCLE_BACKFILL, + ], + 'is_active' => true, + ]); + + $this->actingAs($user, 'platform'); + + Livewire::test(Runbooks::class) + ->callAction('preflight', data: [ + 'scope_mode' => 'all_tenants', + ]) + ->assertSet('preflight.affected_count', 1) + ->callAction('run', data: [ + 'typed_confirmation' => 'BACKFILL', + 'reason_code' => 'DATA_REPAIR', + 'reason_text' => 'Attempt blocked by control', + ]) + ->assertNotified('Findings lifecycle backfill paused'); + + expect(OperationRun::query()->where('type', 'findings.lifecycle.backfill')->count())->toBe(0); + + $audit = AuditLog::query() + ->where('action', AuditActionId::OperationalControlExecutionBlocked->value) + ->latest('id') + ->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->workspace_id)->toBeNull() + ->and($audit?->tenant_id)->toBeNull() + ->and($audit?->status)->toBe('blocked') + ->and($audit?->metadata['control_key'] ?? null)->toBe('findings.lifecycle.backfill') + ->and($audit?->metadata['requested_scope'] ?? null)->toBe('all_tenants'); +}); \ No newline at end of file diff --git a/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php new file mode 100644 index 00000000..2074291c --- /dev/null +++ b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php @@ -0,0 +1,26 @@ +keys())->toBe(['restore.execute']) + ->and($catalog->definition('restore.execute'))->toMatchArray([ + 'key' => 'restore.execute', + 'label' => 'Restore execution', + 'supported_scopes' => ['global', 'workspace'], + 'operation_types' => ['restore.execute'], + ]); +}); + +it('rejects removed or unknown control keys', function (): void { + $catalog = app(OperationalControlCatalog::class); + + expect(fn (): array => $catalog->definition('findings.lifecycle.backfill')) + ->toThrow(\InvalidArgumentException::class) + ->and(fn (): array => $catalog->definition('tenant.review.compose')) + ->toThrow(\InvalidArgumentException::class); +}); \ No newline at end of file diff --git a/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php new file mode 100644 index 00000000..32f1f634 --- /dev/null +++ b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php @@ -0,0 +1,45 @@ +create(); + + $decision = app(OperationalControlEvaluator::class)->evaluate('restore.execute', $workspace); + + expect($decision->isEnabled())->toBeTrue() + ->and($decision->effectiveState)->toBe('enabled') + ->and($decision->scopeLabel())->toBe('No active pause') + ->and($decision->matchedScopeType)->toBe('none') + ->and($decision->workspaceId)->toBeNull() + ->and($decision->reasonText)->toBeNull() + ->and($decision->sourceActivationId)->toBeNull(); +}); + +it('returns the matching workspace pause when present', function (): void { + $workspace = Workspace::factory()->create(); + + $activation = OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => 'Restore execution is paused for this workspace.', + ]); + + $decision = app(OperationalControlEvaluator::class)->evaluate('restore.execute', $workspace); + + expect($decision->isPaused())->toBeTrue() + ->and($decision->effectiveState)->toBe('paused') + ->and($decision->hasWorkspaceScope())->toBeTrue() + ->and($decision->scopeLabel())->toBe('Workspace #'.(int) $workspace->getKey()) + ->and($decision->matchedScopeType)->toBe('workspace') + ->and($decision->workspaceId)->toBe((int) $workspace->getKey()) + ->and($decision->reasonText)->toBe('Restore execution is paused for this workspace.') + ->and($decision->sourceActivationId)->toBe((int) $activation->getKey()); +}); \ No newline at end of file diff --git a/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php new file mode 100644 index 00000000..f69096ec --- /dev/null +++ b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php @@ -0,0 +1,57 @@ +create(); + + OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => 'Workspace pause.', + ]); + + $globalActivation = OperationalControlActivation::factory()->forGlobalScope()->create([ + 'control_key' => 'restore.execute', + 'reason_text' => 'Global incident pause.', + ]); + + $decision = app(OperationalControlEvaluator::class)->evaluate('restore.execute', $workspace); + + expect($decision->isPaused())->toBeTrue() + ->and($decision->matchedScopeType)->toBe('global') + ->and($decision->workspaceId)->toBeNull() + ->and($decision->reasonText)->toBe('Global incident pause.') + ->and($decision->sourceActivationId)->toBe((int) $globalActivation->getKey()); +}); + +it('ignores expired global activations when resolving the effective state', function (): void { + $workspace = Workspace::factory()->create(); + + OperationalControlActivation::factory()->forGlobalScope()->create([ + 'control_key' => 'restore.execute', + 'reason_text' => 'Expired global pause.', + 'expires_at' => now()->subMinute(), + ]); + + $workspaceActivation = OperationalControlActivation::factory()->workspaceScoped()->create([ + 'control_key' => 'restore.execute', + 'workspace_id' => (int) $workspace->getKey(), + 'reason_text' => 'Active workspace pause.', + ]); + + $decision = app(OperationalControlEvaluator::class)->evaluate('restore.execute', $workspace); + + expect($decision->isPaused())->toBeTrue() + ->and($decision->matchedScopeType)->toBe('workspace') + ->and($decision->workspaceId)->toBe((int) $workspace->getKey()) + ->and($decision->reasonText)->toBe('Active workspace pause.') + ->and($decision->sourceActivationId)->toBe((int) $workspaceActivation->getKey()); +}); \ No newline at end of file diff --git a/specs/242-operational-controls/checklists/requirements.md b/specs/242-operational-controls/checklists/requirements.md new file mode 100644 index 00000000..af534d3c --- /dev/null +++ b/specs/242-operational-controls/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: Operational Controls + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-04-26 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +- Selection rationale and scope narrowing are documented directly in `spec.md` so planning can proceed without a separate clarification pass. \ No newline at end of file diff --git a/specs/242-operational-controls/contracts/operational-controls.contract.yaml b/specs/242-operational-controls/contracts/operational-controls.contract.yaml new file mode 100644 index 00000000..d840f5de --- /dev/null +++ b/specs/242-operational-controls/contracts/operational-controls.contract.yaml @@ -0,0 +1,153 @@ +version: 1 +kind: operational-controls + +catalog: + control_keys: + findings.lifecycle.backfill: + label: Findings lifecycle backfill + supported_scopes: + - global + - workspace + operation_types: + - findings.lifecycle.backfill + affected_surfaces: + - system.ops.runbooks + - tenant.findings.list + restore.execute: + label: Restore execution + supported_scopes: + - global + - workspace + operation_types: + - restore.execute + affected_surfaces: + - tenant.restore_runs.create + +activation_record: + table: operational_control_activations + fields: + id: integer + control_key: string + scope_type: + type: string + allowed: + - global + - workspace + workspace_id: + type: integer + nullable: true + reason_text: string + expires_at: + type: datetime + nullable: true + created_by_platform_user_id: integer + updated_by_platform_user_id: + type: integer + nullable: true + display_rules: + owner_actor: updated_by_platform_user_id when present, otherwise created_by_platform_user_id + invariants: + - one active row per control_key + scope_type + workspace_id + - workspace_id is null for global rows + - enabled state is derived from no active matching row + persistence_notes: + - enforce one active global row per control_key with a partial unique index where scope_type = global + - enforce one active workspace row per control_key + workspace_id with a partial unique index where scope_type = workspace + - delete expired conflicting rows before inserting a new activation for the same control/scope + - do not use this table as an archive of expired activations + +management_commands: + pause_control: + required_platform_capabilities: + - platform.access_system_panel + - platform.ops.controls.manage + safety_flow: + - configure scope and reason + - preview scope impact + - confirm mutation + input: + control_key: string + scope_type: global|workspace + workspace_id: integer|null + reason_text: string + expires_at: datetime|null + outcome: + activation_created_or_updated: true + audit_action: operational_control.paused|operational_control.updated + + resume_control: + required_platform_capabilities: + - platform.access_system_panel + - platform.ops.controls.manage + safety_flow: + - review current scope impact + - confirm mutation + input: + control_key: string + scope_type: global|workspace + workspace_id: integer|null + outcome: + activation_removed: true + audit_action: operational_control.resumed + +decision_output: + fields: + control_key: string + effective_state: enabled|paused + matched_scope_type: none|global|workspace + workspace_id: integer|null + reason_text: string|null + expires_at: datetime|null + source_activation_id: integer|null + guarantees: + - returned before any in-scope start is allowed to continue + - blocked decisions create no queued execution OperationRun, no queued execution RestoreRun, no queued job, and no provider-backed execution + - control activation governs new starts only and does not mutate previously accepted runs + +evaluation_rules: + precedence: + - active global activation wins over any workspace activation for the same control key + - workspace activation applies only when no active global activation matches + expiry: + - expired activations are ignored + disclosure: + - tenant/admin surfaces disclose control-state details only after membership and capability scope are resolved + +enforcement_targets: + - control_key: findings.lifecycle.backfill + target: + seam: service.runbooks.findings_lifecycle_backfill.start + callers: + - system.ops.runbooks + - tenant.findings.list + - console.tenantpilot.findings.backfill-lifecycle + - console.tenantpilot.run-deploy-runbooks + action: Start findings lifecycle backfill + operation_type: findings.lifecycle.backfill + - control_key: restore.execute + target: + surface: tenant.restore_runs.create + action: Execute restore + operation_type: restore.execute + +audit_expectations: + action_ids: + - operational_control.paused + - operational_control.updated + - operational_control.resumed + - operational_control.execution_blocked + required_metadata: + - control_key + - scope_type + - workspace_id + - reason_text + - expires_at + - actor_id + event_specific_metadata: + blocked_system_all_tenant_execution_events: + - requested_scope + ownership: + global_control_changes: platform-plane event with null workspace_id and null tenant_id + workspace_control_changes: workspace-scoped event + blocked_execution_events: scoped to the affected workspace and tenant when a tenant is in context + blocked_system_all_tenant_execution_events: platform-plane event with null workspace_id and null tenant_id plus requested_scope metadata \ No newline at end of file diff --git a/specs/242-operational-controls/data-model.md b/specs/242-operational-controls/data-model.md new file mode 100644 index 00000000..95aa0164 --- /dev/null +++ b/specs/242-operational-controls/data-model.md @@ -0,0 +1,164 @@ +# Data Model — Operational Controls + +**Spec**: [spec.md](spec.md) + +The first operational-controls slice adds one persisted runtime-safety record and two derived runtime concepts. It reuses existing execution and audit truth. + +## Existing Canonical Entities Reused + +### Workspace (`workspaces`) + +**Purpose**: Existing workspace boundary for targeted operational-control scope. + +**Key fields (existing)**: +- `id` +- `name` + +**Feature use**: +- Identifies the workspace targeted by a workspace-scoped control activation. +- Continues to anchor workspace isolation and audit scope. + +### Tenant (`tenants`) + +**Purpose**: Existing tenant boundary for the affected execution surfaces. + +**Key fields (existing)**: +- `id` +- `workspace_id` +- `name` +- `external_id` + +**Feature use**: +- Supplies workspace context for findings and restore execution checks. +- Does not own control records in this slice. + +### PlatformUser (`platform_users` or equivalent platform-authenticated user model) + +**Purpose**: Existing platform-plane actor for control management. + +**Feature use**: +- Owns pause/resume actions in the system plane. +- Supplies actor identity for audit and attribution on control changes. + +### OperationRun (`operation_runs`) + +**Purpose**: Existing canonical execution truth for in-scope starts when execution is allowed. + +**Key fields (existing)**: +- `id` +- `workspace_id` +- `tenant_id` +- `type` +- `status` +- `outcome` +- `context` + +**Feature use**: +- Remains the only execution truth for allowed starts. +- Must not be created when an in-scope start is blocked by an active control. +- Existing queued or historical `OperationRun` records remain unchanged when a later control activation blocks only new starts. + +### RestoreRun (`restore_runs`) + +**Purpose**: Existing restore execution truth for queued restore work. + +**Feature use**: +- No new queued execution `RestoreRun` is created by a blocked `restore.execute` start path. +- Continues to link to `OperationRun` only when execution is allowed. + +### AuditLog (`audit_logs`) + +**Purpose**: Existing audit truth for control changes and blocked execution evidence. + +**Feature use**: +- Records pause, update, resume, and blocked-execution events with stable action IDs. +- Avoids introducing a second historical record model for the first slice. + +## New Persisted Entity + +### OperationalControlActivation (`operational_control_activations`) + +**Purpose**: The active runtime-safety record that pauses one bounded control key for either all workspaces or one specific workspace. + +**Key fields**: +- `id` +- `control_key` — bounded to the first-slice catalog keys `findings.lifecycle.backfill` and `restore.execute` +- `scope_type` — `global` or `workspace` +- `workspace_id` — nullable; required when `scope_type = workspace` +- `reason_text` +- `expires_at` — nullable +- `created_by_platform_user_id` +- `updated_by_platform_user_id` — nullable +- `created_at` +- `updated_at` + +**Display rule**: +- `owner` on the controls surface resolves to `updated_by_platform_user_id` when present, otherwise `created_by_platform_user_id`. + +**Constraints**: +- At most one active row per `control_key + scope_type + workspace_id` combination. +- `workspace_id` must be null for `global` scope and present for `workspace` scope. +- Expired rows are ignored by the evaluator. +- PostgreSQL uniqueness is enforced with partial unique indexes: one active global row per `control_key` where `scope_type = global`, and one active workspace row per `control_key + workspace_id` where `scope_type = workspace`. +- Writes must delete expired conflicting rows before inserting a new activation so ignored expired rows do not block a new active pause. + +**Lifecycle**: +- Created when a control is paused. +- Updated when reason or expiry changes. +- Expired rows are deleted by the write path before a replacement activation for the same control/scope is inserted. +- Removed when the control is resumed. +- No explicit `enabled` rows are stored; enabled is derived from no active matching row. + +**Relationships**: +- Optionally `belongsTo Workspace` +- `createdBy` / `updatedBy` platform-user relations if the existing platform-user model supports them + +## Derived Runtime Entities + +### OperationalControlDefinition (derived, not persisted) + +**Purpose**: Catalog metadata for one controllable risky action. + +**Proposed runtime fields**: +- `key` +- `label` +- `supported_scopes` +- `operation_types` +- `affected_surfaces` +- `default_state` (derived `enabled`) + +**Feature use**: +- Drives the controls page and evaluator without turning the catalog into a user-managed taxonomy. + +### OperationalControlDecision (derived, not persisted) + +**Purpose**: The evaluated result returned to an affected surface or service start seam. + +**Proposed runtime fields**: +- `control_key` +- `effective_state` — `enabled` or `paused` +- `matched_scope_type` — `global`, `workspace`, or `none` +- `workspace_id` — nullable +- `reason_text` — nullable when enabled +- `expires_at` — nullable +- `source_activation_id` — nullable + +**Feature use**: +- Tells a surface whether execution may proceed. +- Supplies one shared reason for blocked-state messaging and audit context. + +## Evaluation Rules + +- The evaluator resolves workspace context before checking control scope. +- A matching global activation wins over a workspace activation in v1. Workspace-scoped activations only take effect when no active global activation exists for the same control. +- Expired activations do not block execution. +- Missing entitlement or missing capability is resolved before control-state disclosure on tenant/admin surfaces. + +## Data Ownership Notes + +- No tenant-owned control records are introduced in the first slice. +- Control activations are platform-operated runtime-safety truth. +- Global control changes audit as platform-plane events with null workspace/tenant ownership. +- Workspace-targeted changes and blocked execution events with concrete workspace/tenant context retain truthful workspace/tenant audit scope. +- Blocked system-plane all-tenant attempts audit as platform-plane events with null workspace/tenant ownership plus requested-scope metadata. +- Tenant/admin surfaces consume only the derived decision, never direct activation editing. \ No newline at end of file diff --git a/specs/242-operational-controls/plan.md b/specs/242-operational-controls/plan.md new file mode 100644 index 00000000..a5817f68 --- /dev/null +++ b/specs/242-operational-controls/plan.md @@ -0,0 +1,232 @@ +# Implementation Plan: Operational Controls + +**Branch**: `242-operational-controls` | **Date**: 2026-04-26 | **Spec**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/spec.md` +**Input**: Feature specification from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/spec.md` + +**Note**: This template is filled in by the `/speckit.plan` command. See `.specify/scripts/` for helper scripts. + +## Summary + +- Replace the ad-hoc `allow_admin_maintenance_actions` environment gate with one product-owned operational-control path for the first-slice keys `findings.lifecycle.backfill` and `restore.execute`. +- Introduce one platform-operated activation record plus one shared evaluator that plugs into the existing system runbook, tenant findings-maintenance, and restore-execution start seams without becoming a generic experimentation platform. +- Reuse existing enforcement and UX seams - `UiEnforcement`, `ProviderOperationStartGate`, `OperationRunService`, `OperationUxPresenter`, `ProviderOperationStartResultPresenter`, `AuditRecorder`, `WorkspaceAuditLogger`, and `AuditActionId` - so the slice stays small, auditable, and server-side enforced. + +## Technical Context + +**Language/Version**: PHP 8.4 (Laravel 12) +**Primary Dependencies**: Laravel 12 + Filament v5 + Livewire v4 + Pest; existing `UiEnforcement`, `ProviderOperationStartGate`, `OperationRunService`, `AuditRecorder`, `WorkspaceAuditLogger`, `AuditActionId`, `PlatformCapabilities` +**Storage**: PostgreSQL via existing product tables plus one new platform-operated `operational_control_activations` table; no tenant-owned control tables +**Testing**: Pest unit + feature tests only +**Validation Lanes**: fast-feedback, confidence +**Target Platform**: Sail-backed Laravel admin surfaces under `/admin/t/{tenant}` and system surfaces under `/system` +**Project Type**: web +**Performance Goals**: effective-control resolution remains DB-only and cheap at action start time, adds no outbound HTTP, and blocks in-scope starts before queue or provider execution begins +**Constraints**: no generic feature-flag platform, no new browser or heavy-governance suite, no break-glass bypass in v1, no parallel env gate for in-scope controls, global pauses win over workspace pauses, preserve 404 vs 403 semantics, keep provider-specific restore behavior out of platform-core control vocabulary +**Scale/Scope**: 2 control keys, 2 scope levels (global and workspace), 1 system management surface, and 3 concrete enforcement families across 4 touched UI surfaces + +## UI / Surface Guardrail Plan + +- **Guardrail scope**: changed surfaces +- **Native vs custom classification summary**: native Filament + shared start/result primitives +- **Shared-family relevance**: header actions, runbook launch actions, provider-backed start results, audit-backed control changes +- **State layers in scope**: page, detail, action/modal +- **Handling modes by drift class or surface**: review-mandatory +- **Repository-signal treatment**: review-mandatory +- **Special surface test profiles**: standard-native-filament, monitoring-state-page +- **Required tests or manual smoke**: functional-core, state-contract +- **Exception path and spread control**: none; v1 must not allow a second local runtime-control dialect +- **Active feature PR close-out entry**: Guardrail + +## Shared Pattern & System Fit + +- **Cross-cutting feature marker**: yes +- **Systems touched**: `App\Filament\System\Pages\Ops\Runbooks`, new system ops controls page, `App\Filament\Resources\FindingResource\Pages\ListFindings`, `App\Filament\Resources\RestoreRunResource`, `App\Support\Rbac\UiEnforcement`, `App\Services\Providers\ProviderOperationStartGate`, `App\Support\OpsUx\OperationUxPresenter`, `App\Support\OpsUx\ProviderOperationStartResultPresenter`, `App\Services\Audit\AuditRecorder`, `App\Services\Audit\WorkspaceAuditLogger`, `App\Support\Audit\AuditActionId` +- **Shared abstractions reused**: `UiEnforcement`, `ProviderOperationStartGate`, `ProviderOperationStartResultPresenter`, `OperationRunService`, `OperationUxPresenter`, `OpsUxBrowserEvents`, `OperationRunLinks`, `SystemOperationRunLinks`, `AuditRecorder`, `WorkspaceAuditLogger` +- **New abstraction introduced? why?**: one bounded `OperationalControlCatalog` plus one `OperationalControlEvaluator` are justified because the feature now has two real concrete control keys that must evaluate consistently across system-plane and tenant-plane start paths. No registry lattice, provider strategy system, or customer-facing flag DSL is introduced. +- **Why the existing abstraction was sufficient or insufficient**: existing abstractions already own auth, queue start UX, and audit writing; they are insufficient because none presently carries a reusable runtime-safety decision that can pause an action before it starts, and `WorkspaceAuditLogger` alone cannot truthfully own global platform-plane mutations. +- **Bounded deviation / spread control**: no deviation is allowed for in-scope controls; every affected surface must route through the shared evaluator rather than direct `config(...)` reads or page-local booleans. + +## OperationRun UX Impact + +- **Touches OperationRun start/completion/link UX?**: yes +- **Central contract reused**: shared OperationRun start UX plus provider-start result helpers +- **Delegated UX behaviors**: queued toast, `Open operation` / `View run` links, run-enqueued browser event, dedupe-or-blocked messaging, and tenant/workspace-safe URL resolution remain on existing shared paths +- **Surface-owned behavior kept local**: initiation inputs, confirmation copy, and control-management forms only +- **Queued DB-notification policy**: unchanged explicit opt-in only +- **Terminal notification path**: existing central lifecycle mechanism for starts that are allowed +- **Exception path**: none + +## Provider Boundary & Portability Fit + +- **Shared provider/platform boundary touched?**: yes +- **Provider-owned seams**: provider-backed `restore.execute` dispatch, provider binding resolution, provider reason translation, existing restore safety and dry-run behavior +- **Platform-core seams**: operational-control vocabulary, scope/effective-state evaluation, control management surface, audit labels, blocked-state semantics +- **Neutral platform terms / contracts preserved**: operational control, activation, effective state, scope, reason, expiry, blocked execution +- **Retained provider-specific semantics and why**: `restore.execute` remains Microsoft-specific provider behavior in the current release because the control feature governs only start allowance, not provider execution semantics +- **Bounded extraction or follow-up path**: none in this slice; future catalog growth or provider-neutral expansions require a follow-up spec instead of implicit widening here + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- Read/write separation: PASS - control management is an explicit platform-plane mutation with confirmation, audit, and focused tests; blocked execution paths remain non-mutating except for audit logging. +- RBAC-UX: PASS - platform management stays on `/system`; tenant/admin execution surfaces stay on `/admin/t/{tenant}`; cross-plane access remains 404; entitled-but-paused users get explicit control feedback while membership and capability failures keep 404/403 semantics. +- Workspace isolation / tenant isolation: PASS - workspace-targeted controls apply only within the chosen workspace; tenant surfaces still resolve tenant/workspace entitlement before control-state disclosure. +- Run observability / Ops-UX: PASS - allowed starts reuse existing `OperationRun` paths; blocked starts create no run and no new lifecycle dialect; later control activation does not retroactively mutate already accepted runs; shared start/result helpers remain authoritative. +- Shared path reuse / `XCUT-001`: PASS - the design extends existing UI enforcement, provider-start gating, audit logging, and operation start UX instead of introducing page-local flags. +- Provider boundary / `PROV-001`: PASS - control language stays provider-neutral while restore execution remains provider-owned. +- Proportionality / `PROP-001` and `ABSTR-001`: PASS - the only new structure is justified by two current-release controls and three existing enforcement surfaces; no experimentation platform or generalized remote-config system is planned. +- Persisted truth / `PERSIST-001`: PASS - active control activations represent independent runtime-safety truth with their own scope, reason, expiry, and audit obligations; convenience UI state remains derived. +- Behavioral state / `STATE-001`: PASS - paused/enabled semantics change whether execution may start and therefore justify one bounded effective-state model. +- Filament-native UI / `UI-FIL-001`: PASS - all touched surfaces remain native Filament pages/resources/actions; no custom UI framework is introduced. +- Global search rule: N/A - no new globally searchable resource is added. +- Panel/provider registration: PASS - Filament v5 remains on Livewire v4 and no new panel/provider registration is required; Laravel 12 provider registration stays in `bootstrap/providers.php` if any provider change becomes necessary. +- Test governance / `TEST-GOV-001`: PASS - proof stays in focused unit and feature lanes with no browser or heavy-governance expansion. + +## Test Governance Check + +- **Test purpose / classification by changed surface**: Unit for catalog/evaluator/scope precedence/expiry logic; Feature for system control management, runbook enforcement, findings header-action enforcement, restore-execution enforcement, audit logging, and `404`/`403` semantics +- **Affected validation lanes**: fast-feedback, confidence +- **Why this lane mix is the narrowest sufficient proof**: the business truth is server-side effective-state resolution plus enforcement at existing Filament and service seams. Browser tests would duplicate modal choreography without proving additional runtime safety truth. +- **Narrowest proving command(s)**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` +- **Fixture / helper / factory / seed / context cost risks**: add one local factory for active control activations plus platform-user and workspace-scoped setup helpers reused only by operational-control tests; avoid new shared browser or provider-fixture defaults +- **Expensive defaults or shared helper growth introduced?**: no; control fixtures stay opt-in and local to the new test family +- **Heavy-family additions, promotions, or visibility changes**: none +- **Surface-class relief / special coverage rule**: standard-native-filament and monitoring-state-page relief are sufficient; assert disabled/blocked behavior and no side effects instead of browser-only choreography +- **Closing validation and reviewer handoff**: reviewers should rerun the targeted unit/feature commands, verify the env gate is removed from the in-scope findings action, confirm restore execution is blocked before queue/provider start, confirm blocked-execution audit entries exist for runbook/findings/restore paths, confirm global control changes audit without false workspace ownership, confirm `/system/ops/controls` returns 403 for system users missing `platform.ops.controls.manage`, and confirm non-members still receive 404 while missing capabilities still receive 403 with the existing capability-denied UX rather than paused-state helper text +- **Budget / baseline / trend follow-up**: low-to-moderate increase in focused unit/feature coverage only +- **Review-stop questions**: did implementation add a second control persistence shape, leave the env gate in place, introduce a local blocked-state dialect, or widen into browser/heavy-governance lanes? +- **Escalation path**: `reject-or-split` if the implementation widens into generic feature-flagging or customer-managed controls; `document-in-feature` for small shared-helper extensions that remain local to this slice +- **Active feature PR close-out entry**: Guardrail +- **Why no dedicated follow-up spec is needed**: the planned new model, evaluator, and tests stay local to the first-slice control family; recurring growth beyond the two bounded control keys would require its own follow-up spec + +## Project Structure + +### Documentation (this feature) + +```text +specs/242-operational-controls/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── checklists/ +│ └── requirements.md +├── contracts/ +│ └── operational-controls.contract.yaml +└── tasks.md +``` + +### Source Code (repository root) + +```text +apps/platform/ +├── app/ +│ ├── Filament/System/Pages/Ops/ +│ │ ├── Controls.php +│ │ └── Runbooks.php +│ ├── Filament/Resources/FindingResource/Pages/ListFindings.php +│ ├── Filament/Resources/RestoreRunResource.php +│ ├── Models/ +│ │ └── OperationalControlActivation.php +│ ├── Services/Audit/AuditRecorder.php +│ ├── Services/Audit/WorkspaceAuditLogger.php +│ ├── Services/Providers/ProviderOperationStartGate.php +│ ├── Support/Audit/AuditActionId.php +│ ├── Support/Auth/PlatformCapabilities.php +│ └── Support/OperationalControls/ +│ ├── OperationalControlCatalog.php +│ ├── OperationalControlDecision.php +│ └── OperationalControlEvaluator.php +├── database/ +│ ├── factories/ +│ │ └── OperationalControlActivationFactory.php +│ └── migrations/ +│ └── *_create_operational_control_activations_table.php +└── tests/ + ├── Feature/ + │ ├── Findings/OperationalControlFindingsBackfillGateTest.php + │ ├── OperationalControls/ + │ │ ├── NoAdHocOperationalControlBypassTest.php + │ │ └── OperationalControlAuthorizationSemanticsTest.php + │ ├── Restore/OperationalControlRestoreExecutionGateTest.php + │ ├── System/OpsControls/OperationalControlManagementTest.php + │ └── System/OpsRunbooks/OperationalControlRunbookGateTest.php + └── Unit/Support/OperationalControls/ + ├── OperationalControlCatalogTest.php + ├── OperationalControlEvaluatorTest.php + └── OperationalControlScopeResolutionTest.php +``` + +**Structure Decision**: Single Laravel web application. The feature adds one bounded platform-operated model and one small support namespace for operational-control evaluation, then plugs that into existing system and tenant Filament surfaces. + +## Complexity Tracking + +No unapproved constitution violations are required. The only new persistence and abstraction are the justified control-activation record plus evaluator/catalog pair described below. + +## Proportionality Review + +- **Current operator problem**: founders and platform operators need a safe runtime way to pause already-existing risky actions without editing environment variables or relying on inconsistent per-surface logic. +- **Existing structure is insufficient because**: `UiEnforcement` decides RBAC, `ProviderOperationStartGate` decides provider readiness, and env flags decide hidden page-local runtime behavior. None of those alone gives one auditable runtime-safety truth across both system and tenant surfaces. +- **Narrowest correct implementation**: persist only explicit active control activations, derive the enabled state from absence of an activation, evaluate one effective decision through a shared catalog/evaluator, and wire that into the three concrete existing start paths. +- **Ownership cost created**: one new table/model/factory, one small support namespace, one system page, new audit action IDs and capability constants, and focused unit/feature coverage. +- **Alternative intentionally rejected**: keep env/config flags, reuse workspace settings, or build a generalized feature-flag system. Env/config flags are invisible product truth, workspace settings do not cleanly represent one global control truth, and a generic flag platform is far too broad. +- **Release truth**: current-release truth + +## Phase 0 — Research (output: `research.md`) + +See: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/research.md` + +Goals: +- Confirm the narrowest persistence shape for runtime-safety truth and explicitly reject env-only or workspace-settings-only alternatives. +- Confirm the smallest shared seam where control evaluation belongs for system runbooks, tenant findings lifecycle backfill, and provider-backed restore execution. +- Define v1 scoping, global-first precedence, expiry, and audit expectations without inventing a generic flag taxonomy. +- Document the v1 decision that break-glass and broad platform capabilities do not bypass an active operational control. + +## Phase 1 — Design & Contracts (outputs: `data-model.md`, `contracts/`, `quickstart.md`) + +See: +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/data-model.md` +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/contracts/operational-controls.contract.yaml` +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/quickstart.md` + +Design focus: +- Add one platform-operated activation record that can pause a control globally or for one workspace, with optional expiry, auditable reason, global-first precedence, and partial unique indexes that enforce one active global row per control and one active workspace row per control/workspace pair; the write path deletes expired conflicting rows before inserting a new activation, and this table is not used as an archive. +- Add one new system ops controls page that lists the two bounded control keys, their effective state, scope, owner, expiry, change actions, and on-demand audit history links, and uses a staged scope-impact preview before control mutations are confirmed. +- Use `OperationalControlDecision` as the shared control-state presentation primitive for controls, runbooks, findings, and restore surfaces. +- Route `findings.lifecycle.backfill` through the new evaluator in both `ListFindings` and `Runbooks`, removing the existing env gate. +- Route `findings.lifecycle.backfill` through `FindingsLifecycleBackfillRunbookService::start()` so the system runbooks page, tenant findings page, CLI command, and deploy-hook command all honor the same control decision. +- Route `restore.execute` through the same evaluator before provider-backed or non-provider-backed queued restore execution is created. +- Add dedicated audit action IDs and a dedicated platform capability for control management, using `AuditRecorder` for global control changes and blocked system-plane all-tenant attempts, and `WorkspaceAuditLogger` for workspace/tenant-scoped changes and blocked-execution evidence with concrete scope. +- Keep blocked-state messaging on existing shared start/result helpers and avoid custom control-state UI frameworks. + +## Phase 1 — Agent Context Update + +After Phase 1 artifacts are generated, update Copilot context from the plan: + +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/.specify/scripts/bash/update-agent-context.sh copilot` + +## Phase 2 — Implementation Outline (tasks created in `/speckit.tasks`) + +- Add the `operational_control_activations` persistence, model, and local factory for active pause records. +- Introduce the bounded operational-controls support namespace (`OperationalControlCatalog`, `OperationalControlDecision`, `OperationalControlEvaluator`) and keep enabled-state derived from active rows. +- Add the dedicated controls-manage capability and its local grant path in the seeded platform operator setup. +- Add the system-plane controls page and wire it into the existing system ops navigation with staged preview-plus-confirm pause/resume actions, audit logging, and on-demand audit history links. +- Replace the findings env gate with evaluator-driven control checks on the tenant findings header action and the system runbooks start path. +- Integrate the same evaluator into restore execution before any queued execution `OperationRun`, queued execution `RestoreRun`, queue dispatch, or provider-backed execution starts. +- Add focused unit and feature tests, plus a guard test that blocks new ad-hoc runtime-control bypasses for in-scope controls and one proving path that activating a control does not rewrite previously accepted runs. + +## Constitution Check (Post-Design) + +Re-check target: PASS. The post-design shape must still use one bounded control catalog, one active-row persistence model, one evaluator, existing auth/start/audit helpers, and no second runtime-control dialect. + +## Implementation Close-out + +- Delivered the bounded operational-controls slice end-to-end: one `operational_control_activations` truth model, one catalog/evaluator/decision support path, a new `/system/ops/controls` management page, findings lifecycle enforcement through `FindingsLifecycleBackfillRunbookService::start()`, and restore execution blocking before any queued execution `OperationRun`, queued execution `RestoreRun`, job dispatch, or provider-backed start. +- Runtime cleanup landed with the in-scope findings env gate removed from `config/tenantpilot.php`, a source-scanning guard against ad-hoc bypasses, and workspace-isolation proof showing a workspace-scoped pause blocks only the targeted workspace while a second workspace remains unaffected. +- Validation passed on the narrow feature lane: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` with `20 passed (253 assertions)`. +- Formatting passed with `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent`. +- Manual smoke passed in the integrated browser: the staged pause/resume flow on `/system/ops/controls` for `Findings lifecycle backfill` rendered scope-impact previews, applied the global pause, and returned to `Enabled` inside the SC-001 budget after bringing the local database up to date. diff --git a/specs/242-operational-controls/quickstart.md b/specs/242-operational-controls/quickstart.md new file mode 100644 index 00000000..bcfbdd53 --- /dev/null +++ b/specs/242-operational-controls/quickstart.md @@ -0,0 +1,50 @@ +# Quickstart — Operational Controls + +## Prereqs + +- Docker running +- Laravel Sail dependencies installed +- A platform user able to access `/system` +- Existing workspace, tenant, findings, restore-run, and operation-run factories available for tests + +## Run locally + +- Start containers: `cd apps/platform && ./vendor/bin/sail up -d` +- Run migrations for the new activation table: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan migrate --no-interaction` +- Refresh the seeded local platform operator after the new capability is added: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan db:seed --class=PlatformUserSeeder --no-interaction` +- Run targeted tests after implementation: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` + - Full narrow suite: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` +- Format after implementation: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent` + +## Manual smoke after implementation + +1. Sign in to `/system` as a platform operator with `platform.access_system_panel` and the new operational-controls management capability. +2. Sign in as a system user without the operational-controls management capability and verify `/system/ops/controls` returns 403 with the existing capability-denied UX rather than paused-state helper text. +3. Open `/system/ops/controls`, begin pausing `Findings lifecycle backfill` globally, verify the modal shows scope-impact preview before confirmation, then confirm and verify the control card exposes on-demand change history or an audit link for that change. +4. Open `/system/ops/runbooks`, choose the all-tenants findings-lifecycle path, and verify the runbook path shows an explicit paused-state message and does not start a run. +5. Open `/admin/t/{tenant}/findings` as an entitled tenant user and verify `Backfill findings lifecycle` is still presented truthfully for entitled users but blocked with the same control reason. +6. Invoke `tenantpilot:findings:backfill-lifecycle --tenant={tenant_id}` and verify the shared findings lifecycle service blocks the start with the same control state. +7. Pause `Restore execution` for one workspace only, then verify an entitled tenant in that workspace cannot start restore execution, no queued execution `RestoreRun` or `OperationRun` is created by the blocked start path, and a blocked-execution audit entry is recorded. +8. Verify an entitled tenant in a different workspace remains unaffected for `Restore execution`. +9. Resume both controls and confirm the normal start paths return without a deploy or env edit. +10. Verify audit entries exist for global pause/resume, workspace-targeted pause/resume, and blocked execution on the runbook, findings, and restore paths; confirm the blocked all-tenants runbook attempt is recorded as a platform-plane event with requested-scope metadata. +11. Time one pause or resume flow on `/system/ops/controls` and confirm the staged preview-plus-confirm path completes in under 1 minute. + +## Notes + +- Filament v5 remains on Livewire v4.0+ in this repo; the slice stays on native Filament pages/resources/actions. +- No panel provider registration changes are planned; Laravel 12 provider registration remains in `bootstrap/providers.php` if any provider change becomes necessary. +- No global-search behavior changes are involved because the slice adds no new searchable resource. +- The state-changing control actions are destructive-like and must use `->requiresConfirmation()`. +- Global pauses win over workspace-specific pauses in v1; no narrower workspace record re-enables a globally paused control. +- No new frontend asset pipeline is expected; no new `filament:assets` deploy step is needed unless implementation adds registered assets later. + +## Implementation Close-out + +- Guardrail result: `tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` passed after narrowing the forbidden config check to the retired `tenantpilot.allow_admin_maintenance_actions` path instead of unrelated `tenantpilot` reads. +- Latest targeted validation passed: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php tests/Feature/Filament/Spec113/AdminFindingsNoMaintenanceActionsTest.php tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` with `20 passed (253 assertions)`. +- Shared-helper note: `OperationalControlDecision` now exposes workspace-aware presentation helpers, the findings path routes through `FindingsLifecycleBackfillRunbookService::start()`, and restore execution is blocked before any queued execution `OperationRun`, queued execution `RestoreRun`, queue dispatch, or provider call. +- Manual smoke status: passed in the integrated browser on `http://localhost/system/ops/controls` after seeding the local platform operator and running the pending operational-controls migration; the staged global pause and resume flow for `Findings lifecycle backfill` completed successfully within the SC-001 budget. \ No newline at end of file diff --git a/specs/242-operational-controls/research.md b/specs/242-operational-controls/research.md new file mode 100644 index 00000000..3b0c0eb3 --- /dev/null +++ b/specs/242-operational-controls/research.md @@ -0,0 +1,133 @@ +# Research — Operational Controls + +**Date**: 2026-04-26 +**Spec**: [spec.md](spec.md) + +This document captures design decisions and supporting rationale for the first operational-controls slice. All decisions are grounded in current repository truth and the TenantPilot Constitution. + +## Decision 1 — Persist only active pause records, derive the enabled state, and let global pauses win + +**Decision**: Store only explicit active control activations that pause a control. Do not persist `enabled` rows or a broader multi-state lifecycle. The effective `enabled` state is derived from the absence of an active matching activation, and a matching global pause wins over a narrower workspace pause in v1. + +**Rationale**: +- The operator problem is safe runtime pause control, not a new workflow state machine. +- Constitution `PERSIST-001` and `STATE-001` favor the smallest persisted truth that changes behavior. +- Deriving `enabled` avoids importing a second layer of default-state maintenance. +- Global-first precedence is the safest bounded rule because a platform-wide incident pause must not be narrowed by a workspace-specific row in this first slice. + +**Evidence**: +- The current code gap is an env-gated yes/no maintenance switch in `apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php`. +- The first slice only needs to answer one question at execution time: may this action start right now for this scope? +- The first slice does not support workspace-specific allow overrides, so no narrower row should reopen a globally paused control. + +**Alternatives considered**: +- Persist both `enabled` and `paused` rows. + - Rejected: unnecessary state duplication; absence of an active pause already means enabled. +- Add a larger status family such as draft, scheduled, paused, forced, emergency. + - Rejected: too broad for current-release truth. + +## Decision 2 — Use one platform-operated activation table instead of env flags or workspace settings + +**Decision**: Introduce one platform-operated `operational_control_activations` table that can represent either a global pause or a workspace-scoped pause. Do not split truth across env flags, platform config, and workspace settings. + +**Rationale**: +- The spec requires one auditable control truth across system and tenant surfaces. +- Existing workspace settings infrastructure is workspace-only and cannot represent one global platform-wide safety state cleanly. +- Env flags are invisible product truth and require deploy-time coordination. + +**Evidence**: +- Existing workspace settings writer only manages workspace-scoped settings in `apps/platform/app/Services/Settings/SettingsWriter.php`. +- The current env gate lives in `apps/platform/config/tenantpilot.php` and is consumed directly in `ListFindings`. + +**Alternatives considered**: +- Reuse workspace settings for workspace overrides and keep a global env flag. + - Rejected: split truth, inconsistent audit semantics, and no single effective-state evaluator. +- Use env flags only. + - Rejected: not operator-visible or auditable in-product. + +## Decision 3 — Evaluate controls at the start seam, not only in UI visibility + +**Decision**: Integrate control evaluation at the concrete start seams that already own execution decisions: `FindingsLifecycleBackfillRunbookService::start()` for all findings lifecycle backfill callers, and queued restore execution before `OperationRun` or provider dispatch begins. + +**Rationale**: +- UI-only hiding would fail the safety requirement because direct requests or stale page state could still start execution. +- The repo already has clear start seams where action or service logic decides whether a run begins. +- This keeps blocked-state truth server-side and shared. + +**Evidence**: +- Findings lifecycle backfill starts in `apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php` and is called from the system runbooks page, tenant findings page, `tenantpilot:findings:backfill-lifecycle`, and `tenantpilot:run-deploy-runbooks`. +- Restore execution starts in `apps/platform/app/Filament/Resources/RestoreRunResource.php` and already routes provider-backed starts through `apps/platform/app/Services/Providers/ProviderOperationStartGate.php`. + +**Alternatives considered**: +- Hide or disable actions in UI only. + - Rejected: violates the server-side enforcement requirement. + +## Decision 4 — Add one system ops controls page instead of surface-local toggles + +**Decision**: Manage the first-slice controls from one dedicated system ops page under `/system/ops/controls`. Do not add per-page toggles or bury control changes inside each affected surface. The page shows effective-state summaries by default and exposes change history through on-demand audit links instead of creating a second history surface. + +**Rationale**: +- Operators need one place to make the runtime-safety decision itself. +- Constitution `DECIDE-001` and the spec’s decision-role table require a primary decision surface for control management. +- A shared control center prevents drift between runbooks, findings, and restore surfaces. + +**Evidence**: +- The repo already groups ops surfaces under `apps/platform/app/Filament/System/Pages/Ops/`. +- Existing runbooks and run viewers are already system-plane ops surfaces, so a sibling controls page fits the current information architecture. + +**Alternatives considered**: +- Add a toggle to the runbooks page only. + - Rejected: restore execution is not owned by that page and the control decision would stay fragmented. + +## Decision 5 — Break-glass does not bypass operational controls in v1 + +**Decision**: Break-glass sessions do not automatically bypass active operational controls in the first slice. + +**Rationale**: +- Operational controls are introduced as runtime-safety truth, not as optional UI friction. +- An implicit bypass would make incident behavior ambiguous and weaken auditability. +- The first slice stays safer by forcing an explicit resume action before execution. + +**Evidence**: +- The system runbook page already has break-glass-aware reason requirements via `BreakGlassSession`, but operational controls are a distinct safety layer. + +**Alternatives considered**: +- Let break-glass ignore controls. + - Rejected: too risky for v1 and not required by current operator pain. + +## Decision 6 — Reuse existing audit and start-result helpers, but keep global audits platform-scoped + +**Decision**: Keep workspace-targeted changes and blocked execution evidence with concrete workspace or tenant context on `WorkspaceAuditLogger` plus `AuditActionId`, but record global control changes and blocked system-plane all-tenant attempts through `AuditRecorder` directly so they stay platform-plane events without false workspace ownership. Include requested-scope metadata on those platform-plane blocked attempts. Keep blocked/allowed execution messaging on the existing operation/provider start-result helpers. + +**Rationale**: +- Constitution `XCUT-001` requires reuse of existing shared interaction paths. +- The repo already has shared primitives for queued toasts, dedupe messaging, and audit summaries. +- This avoids a second language for blocked execution. +- `WorkspaceAuditLogger` requires a `Workspace`, while `AuditRecorder` already supports null workspace and null tenant for truthful system-plane events. + +**Evidence**: +- Audit logging lives in `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php`. +- Global system-plane audit support lives in `apps/platform/app/Services/Audit/AuditRecorder.php`. +- Canonical audit IDs live in `apps/platform/app/Support/Audit/AuditActionId.php`. +- Provider-backed start messaging already routes through `ProviderOperationStartResultPresenter` and `OperationUxPresenter`. + +**Alternatives considered**: +- Emit page-local notifications and free-form audit action strings. + - Rejected: immediate drift risk and weaker reviewability. + +## Decision 7 — Proof stays in Unit + Feature lanes only + +**Decision**: Keep proof in focused unit and feature tests. Do not introduce browser tests or heavy-governance coverage for this first slice. + +**Rationale**: +- The business truth is effective-state evaluation, audit recording, and blocked/no-side-effect execution. +- Browser coverage would mostly duplicate existing Filament modal behavior. +- Constitution `TEST-GOV-001` requires the narrowest proving lane mix. + +**Evidence**: +- Existing system runbooks and restore features already have focused feature coverage patterns in the repo. +- The new logic is server-side and deterministic. + +**Alternatives considered**: +- Add browser smoke for pause/resume flows. + - Rejected: not needed to prove the core runtime-safety semantics of this slice. \ No newline at end of file diff --git a/specs/242-operational-controls/spec.md b/specs/242-operational-controls/spec.md new file mode 100644 index 00000000..ee2dc051 --- /dev/null +++ b/specs/242-operational-controls/spec.md @@ -0,0 +1,290 @@ +# Feature Specification: Operational Controls + +**Feature Branch**: `242-operational-controls` +**Created**: 2026-04-26 +**Status**: Draft +**Input**: User description: "Operational Controls & Feature Flags: create a narrow first slice that replaces ad-hoc environment-gated risky admin maintenance actions with a central audited operational control path. Reuse the existing system panel, platform capabilities, audit logging, and server-side action/service enforcement to let operators pause or disable selected high-risk features with explicit disabled-state messaging that is distinct from authorization failure, without turning this into a generic experimentation or entitlement platform." + +## Spec Candidate Check *(mandatory - SPEC-GATE-001)* + +- **Problem**: TenantPilot already has risky actions that can only be paused through local environment flags, deploy-time changes, or ad-hoc code decisions instead of one product-owned operational control contract. +- **Today's failure**: During an incident or rollout concern, operators cannot centrally pause in-scope risky flows such as findings lifecycle backfill or restore execution with consistent UX, auditable ownership, and server-side enforcement. The current `allow_admin_maintenance_actions` environment gate makes one tenant admin action appear or disappear outside the product, while similar runbook and provider-backed actions have no shared pause contract. +- **User-visible improvement**: Platform operators can pause selected high-risk actions from a system-plane control surface, and affected admin/operator surfaces show explicit paused-state messaging instead of disappearing silently, looking unauthorized, or requiring a deployment to change runtime behavior. +- **Smallest enterprise-capable version**: Introduce one bounded operational-control contract for two concrete first-slice controls - `findings.lifecycle.backfill` and `restore.execute` - with global and workspace-targeted activation, one system-plane management surface with on-demand audit history, and server-side enforcement on the existing runbook, findings-maintenance, and restore-execution start paths. +- **Explicit non-goals**: No A/B testing, no customer-managed feature flags, no generic remote-config platform, no entitlement/billing replacement, no tenant-scoped self-service flags, no broad maintenance-mode replacement for the whole app, and no speculative control catalog for every future feature. +- **Permanent complexity imported**: One operational-control catalog, one persisted control-activation record family with explicit scope and reason, one evaluation service at action/service boundaries, a small amount of shared paused-state copy/presentation, audit action IDs, and focused unit/feature/guard coverage. +- **Why now**: The repo already exposes the control gap in live code through `config('tenantpilot.allow_admin_maintenance_actions')`, while live pilots and founder-operated support increase the need for safe runtime pause controls before more onboarding, support, AI, and customer-facing workflows land. +- **Why not local**: A local config flag or page-specific guard cannot safely cover both system-plane runbooks and tenant-plane provider-backed execution, cannot produce one auditable truth, and teaches parallel runtime-control semantics across surfaces. +- **Approval class**: Core Enterprise +- **Red flags triggered**: New meta-infrastructure, foundation-sounding scope. Defense: the slice is intentionally limited to two real existing high-risk controls, one management surface, and one shared evaluator instead of a universal experimentation or entitlement platform. +- **Score**: Nutzen: 2 | Dringlichkeit: 2 | Scope: 2 | Komplexitaet: 1 | Produktnaehe: 2 | Wiederverwendung: 2 | **Gesamt: 11/12** +- **Decision**: approve + +## Spec Scope Fields *(mandatory)* + +- **Scope**: platform, workspace, tenant +- **Primary Routes**: + - New system-plane operational controls surface under `/system/ops/controls` + - Existing system runbook launcher at `/system/ops/runbooks` + - Existing tenant findings register at `/admin/t/{tenant}/findings` + - Existing restore execution start flow in the restore-run create surface under `/admin/t/{tenant}/restore-runs/create`; existing restore record-view actions remain unchanged and out of scope for this slice +- **Data Ownership**: + - Control definitions remain platform-owned catalog truth in code, limited to the first-slice keys `findings.lifecycle.backfill` and `restore.execute` + - Control activations are platform-operated runtime-safety records; workspace-targeted activations reference a workspace, while global activations apply to all workspaces without embedding tenant-owned data + - No tenant-owned control records are introduced in this slice; tenant/admin surfaces consume effective control decisions only + - Audit history stays on existing `AuditLog` truth with stable action IDs for control activation, update, removal, and blocked execution; global control changes are platform-plane audit events with no false workspace or tenant owner, workspace-targeted changes and blocked starts with concrete workspace/tenant context retain truthful workspace/tenant audit scope, and blocked system-plane attempts without a concrete workspace/tenant resolve to platform-plane audit events with requested-scope metadata +- **RBAC**: + - Management happens only in the platform `/system` plane and requires `PlatformCapabilities::ACCESS_SYSTEM_PANEL` plus a dedicated operational-controls management capability + - Existing tenant/admin capabilities remain authoritative for the underlying in-scope actions (`findings.lifecycle.backfill`, `restore.execute`) + - Non-members or non-entitled users still receive 404 on tenant/workspace boundaries; members lacking the underlying capability still receive 403 and continue to follow the existing surface-specific capability-denied UX with no paused-state helper text; entitled users blocked only by an active operational control receive explicit paused-state feedback that is distinct from authorization failure + +For canonical-view specs, the spec MUST define: + +- **Default filter behavior when tenant-context is active**: N/A - this slice does not add a new canonical collection route in `/admin`; it affects existing tenant and system execution surfaces +- **Explicit entitlement checks preventing cross-tenant leakage**: Control evaluation never weakens existing tenant/workspace membership checks. Tenant-plane surfaces resolve tenant entitlement first, then evaluate the effective control state only for already-entitled users. + +## Cross-Cutting / Shared Pattern Reuse *(mandatory when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, alerts, navigation entry points, evidence/report viewers, or any other existing shared operator interaction family; otherwise write `N/A - no shared interaction family touched`)* + +- **Cross-cutting feature?**: yes +- **Interaction class(es)**: header actions, runbook launch actions, provider-backed start gating, status messaging, audit prose +- **Systems touched**: system runbooks, tenant findings maintenance action, `FindingsLifecycleBackfillRunbookService::start()` plus its CLI and deploy-hook callers, restore execution start path, existing audit logging, operation start UX, existing capability enforcement helpers +- **Existing pattern(s) to extend**: `UiEnforcement`, `ProviderOperationStartGate`, `ProviderOperationStartResultPresenter`, `OperationUxPresenter`, `AuditRecorder`, `WorkspaceAuditLogger`, and existing system/tenant operation-link helpers +- **Shared contract / presenter / builder / renderer to reuse**: one new operational-control evaluator is allowed, but it must sit beside existing capability and provider-start gates instead of creating new page-local flag logic. Existing audit and start-result presenters remain authoritative for labels, reasons, action/result messaging, and truthful system-plane versus workspace-plane audit ownership. +- **Why the existing shared path is sufficient or insufficient**: existing shared paths already solve authorization, audit recording, and start-result UX. They are insufficient because none of them currently carry one central runtime-safety decision that can pause an action consistently across tenant and system surfaces. +- **Allowed deviation and why**: none. The first slice must remove the ad-hoc environment flag for in-scope maintenance actions rather than adding another exception path. +- **Consistency impact**: control labels, paused-state wording, reason display, audit action IDs, and allow/block semantics must match across the controls page, runbooks page, findings list header action, restore execution start flow, and any related notifications. +- **Review focus**: reviewers must block new direct `config(...)` or env-based runtime gates for in-scope operational controls and verify that findings lifecycle backfill routes through `FindingsLifecycleBackfillRunbookService::start()` plus the shared evaluator for UI, CLI, and deploy-hook callers, while restore continues through its existing start seam and presenters. + +## OperationRun UX Impact *(mandatory when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`; otherwise write `N/A - no OperationRun start or link semantics touched`)* + +- **Touches OperationRun start/completion/link UX?**: yes +- **Shared OperationRun UX contract/layer reused**: `OperationRunService`, `OperationUxPresenter`, `OpsUxBrowserEvents`, `ProviderOperationStartResultPresenter`, `OperationRunLinks`, and `SystemOperationRunLinks` +- **Delegated start/completion UX behaviors**: when an action is allowed, queued toast, `View run` link, run-enqueued browser event, dedupe-or-already-queued messaging, and tenant/workspace-safe URL resolution remain delegated to the existing shared paths. When a control blocks execution, the surface reuses the shared start-result or notification path for one explicit paused-state message and does not invent a second blocked-run dialect. +- **Local surface-owned behavior that remains**: initiation inputs, confirmation text, and scope selection remain local to the runbook page, findings list page, or restore workflow. Local code does not decide operational-control truth. +- **Queued DB-notification policy**: unchanged. This slice does not introduce new queued DB notifications for paused or allowed starts. +- **Terminal notification path**: unchanged central lifecycle mechanism for runs that do start. +- **Exception required?**: none + +## Provider Boundary / Platform Core Check *(mandatory when the feature changes shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth; otherwise write `N/A - no shared provider/platform boundary touched`)* + +- **Shared provider/platform boundary touched?**: yes +- **Boundary classification**: mixed +- **Seams affected**: platform-core operational-control vocabulary, restore execution provider-start boundary, shared operator messaging for blocked execution +- **Neutral platform terms preserved or introduced**: operational control, effective state, paused, scope, reason, expiry, owner, override +- **Provider-specific semantics retained and why**: `restore.execute` remains a provider-owned operation and keeps its current Microsoft-only execution path and dry-run safeguards. The control system only governs whether that path may start; it does not rename or generalize restore semantics. +- **Why this does not deepen provider coupling accidentally**: the control catalog is platform-owned and names operation keys that already exist. Provider-specific behavior stays inside the existing restore-start path and provider registry. +- **Follow-up path**: none for the first slice; broader catalog growth remains a follow-up decision, not an implied obligation of this spec + +## UI / Surface Guardrail Impact *(mandatory when operator-facing surfaces are changed; otherwise write `N/A`)* + +| Surface / Change | Operator-facing surface change? | Native vs Custom | Shared-Family Relevance | State Layers Touched | Exception Needed? | Low-Impact / `N/A` Note | +|---|---|---|---|---|---|---| +| System ops controls surface | yes | Native Filament + shared primitives | status messaging, audit-backed actions, control state summary | page, card/action state, modal | no | New system-plane control center for a bounded first-slice catalog | +| System runbooks launcher | yes | Native Filament + shared runbook/start UX | run start messaging, confirmation flow, blocked-state messaging | page, action, preflight state | no | Existing page gains operational-control awareness only | +| Tenant findings list header action | yes | Native Filament + existing action-surface primitives | header actions, run start messaging | table, header action, modal | no | Existing maintenance action loses env-flag gating and becomes control-aware | +| Restore run create/start workflow | yes | Native Filament resource + shared provider start gate | provider-backed start result, disabled-state copy | form/wizard, create action, start-result state | no | Existing risky tenant workflow gains central pause semantics without new tenant-side control UI | + +## Decision-First Surface Role *(mandatory when operator-facing surfaces are changed)* + +| Surface | Decision Role | Human-in-the-loop Moment | Immediately Visible for First Decision | On-Demand Detail / Evidence | Why This Is Primary or Why Not | Workflow Alignment | Attention-load Reduction | +|---|---|---|---|---|---|---|---| +| System ops controls surface | Primary Decision Surface | Decide whether one risky feature should stay available, be paused, or be scoped down during an incident or rollout | control name, effective scope, paused/enabled state, reason, owner, expiry | change history, affected actions, audit links | Primary because this is the system-plane place where operators make the runtime-safety decision itself | Follows incident-control and rollout workflow, not feature storage structure | Replaces deploy-time or env-level toggling with one visible operational decision point | +| System runbooks launcher | Secondary Context Surface | Decide whether to preflight or start a runbook once the control state is already known | current control state, preflight, confirmation requirements, next safe action | existing run detail after start, control reason history | Secondary because the main decision here is execution of a specific runbook, not control management | Keeps runbook workflow intact while surfacing control truth inline | Avoids surprise 403s or silent disappearance when the runbook is paused | +| Tenant findings list header action | Secondary Context Surface | Decide whether to start tenant findings backfill | header action availability, paused-state message, tenant scope | run detail only after allowed start | Secondary because the list remains the primary findings workflow; runtime control is supporting context | Preserves list-first findings work while exposing truthful blocked state | Removes hidden env-driven behavior drift on one tenant surface | +| Restore run create/start workflow | Secondary Context Surface | Decide whether a restore may proceed now | effective control state, restore-specific next action, existing safety messaging | preview, diff, and run detail when allowed | Secondary because restore creation remains the main operator decision and control state is a gating constraint | Keeps restore workflow focused while making pause state explicit before execution | Prevents risky restore attempts from failing late or ambiguously | + +## UI/UX Surface Classification *(mandatory when operator-facing surfaces are changed)* + +| Surface | Action Surface Class | Surface Type | Likely Next Operator Action | Primary Inspect/Open Model | Row Click | Secondary Actions Placement | Destructive Actions Placement | Canonical Collection Route | Canonical Detail Route | Scope Signals | Canonical Noun | Critical Truth Visible by Default | Exception Type / Justification | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| System ops controls surface | Utility / System | Operational safety control center | Pause, resume, or scope a control | Explicit card action or modal from the page itself | forbidden | Secondary details live in card reveals or modal details only | State-changing control actions are confirmation-protected and stay on the control card/modal | `/system/ops/controls` | `/system/ops/controls` | Global, all-workspaces, or workspace-targeted scope | Operational controls / Operational control | Effective state, reason, scope, and expiry | none | +| System runbooks launcher | Monitoring / Queue / Workbench | System runbook launcher | Preflight or start a runbook | In-page action modal then `View run` after success | forbidden | Related navigation stays secondary in toast actions or page summaries | Dangerous execution remains in the existing `Run...` action with confirmation | `/system/ops/runbooks` | `/system/ops/runbooks` | Current control state and selected tenant/all-tenant scope | Runbooks / Runbook | Whether execution is allowed right now and why | none | +| Tenant findings list header action | List / Table / Bulk | List-first resource | Open findings or start lifecycle backfill | Existing table inspection remains primary; header action is explicit secondary execution | required | Secondary execution stays in the header only | Backfill remains confirmation-protected in the header action | `/admin/t/{tenant}/findings` | existing findings detail route | Tenant scope and effective control state for entitled users | Findings / Finding | Findings list truth plus explicit maintenance availability | none | +| Restore run create/start workflow | Wizard / Flow | Create and launch flow | Continue restore setup or stop because execution is paused | Existing create form/wizard remains primary | forbidden | Secondary navigation lives in helper links and post-start run links | Existing restore execution remains inside the create/start flow with its current safety steps | `/admin/t/{tenant}/restore-runs` | `/admin/t/{tenant}/restore-runs/{record}` | Tenant scope, preview/dry-run state, and effective control state | Restore runs / Restore run | Whether restore execution may proceed, with scope and reason | none | + +## Operator Surface Contract *(mandatory when operator-facing surfaces are changed)* + +| Surface | Primary Persona | Decision / Operator Action Supported | Surface Type | Primary Operator Question | Default-visible Information | Diagnostics-only Information | Status Dimensions Used | Mutation Scope | Primary Actions | Dangerous Actions | +|---|---|---|---|---|---|---|---|---|---|---| +| System ops controls surface | Platform operator / break-glass operator | Decide whether risky runtime actions remain enabled, paused globally, or paused for one workspace | Control center | Is this risky action allowed right now, for whom, and why? | control label, effective state, scope, reason, owner, expiry | audit history, exact affected surfaces, internal notes | runtime safety state, scope, expiry | TenantPilot only | Pause control, Resume control, Change scope | Pausing or resuming a control | +| System runbooks launcher | Platform operator | Decide whether to preflight/start the runbook or respect a pause control | Workbench | Can I run this runbook now? | current control state, runbook scope, preflight result, confirmation requirements | latest run detail and audit history after navigation | runtime safety state, execution readiness, preflight result | TenantPilot only when blocked; Microsoft tenant or tenant data changes only when allowed and executed | Preflight, Run | Run | +| Tenant findings list header action | Tenant manager / owner | Decide whether findings lifecycle backfill may start for the current tenant | List-first resource + secondary header action | Is lifecycle backfill available for this tenant right now? | explicit paused/enabled state for entitled users, tenant scope, action label | run detail only if execution is allowed and started | runtime safety state, execution readiness | TenantPilot only when blocked; tenant data mutation if execution is allowed | Backfill findings lifecycle | Backfill findings lifecycle | +| Restore run create/start workflow | Tenant manager / owner | Decide whether restore execution may proceed after existing safety checks | Guided creation flow | Can this restore execute now, or is the operation paused? | control state, restore scope, dry-run/preview state, next action | preview diff, post-start run detail, raw diagnostics after navigation | runtime safety state, lifecycle, restore safety/preflight state | TenantPilot only when blocked; Microsoft tenant when execution is allowed | Create restore run, Continue preview | Execute restore | + +## Proportionality Review *(mandatory when structural complexity is introduced)* + +- **New source of truth?**: yes +- **New persisted entity/table/artifact?**: yes +- **New abstraction?**: yes +- **New enum/state/reason family?**: yes, one bounded enabled/paused effective-state axis for the control contract +- **New cross-domain UI framework/taxonomy?**: no +- **Current operator problem**: Operators cannot safely pause already-existing risky actions without deploy-time flags, inconsistent UX, or page-local code branches. +- **Existing structure is insufficient because**: authorization and provider-start gates decide who may act, not whether the product should temporarily allow the action at all. The current env flag is invisible product truth and cannot cover system-plane plus tenant-plane paths consistently. +- **Narrowest correct implementation**: a code-owned two-control catalog plus persisted control activations, one evaluator, one management surface, and two concrete enforcement families (`findings.lifecycle.backfill`, `restore.execute`). +- **Ownership cost**: new runtime-safety records, audit action IDs, shared paused-state copy, evaluator tests, and guard coverage that blocks new ad-hoc runtime gates. +- **Alternative intentionally rejected**: keep using env flags, rely on full Laravel maintenance mode, or build a generic customer-facing feature-flag system. Env flags are too hidden, maintenance mode is too broad, and a generic flag platform is too large. +- **Release truth**: current-release truth + +### Compatibility posture + +This feature assumes a pre-production environment. + +Backward compatibility, legacy aliases, migration shims, historical fixtures, and compatibility-specific tests are out of scope unless explicitly required by this spec. + +Canonical replacement is preferred over preservation. + +## Testing / Lane / Runtime Impact *(mandatory for runtime behavior changes)* + +- **Test purpose / classification**: Unit, Feature +- **Validation lane(s)**: fast-feedback, confidence +- **Why this classification and these lanes are sufficient**: the feature introduces one shared evaluator plus a small number of concrete UI/service enforcement points. Unit tests prove effective-state resolution, scope precedence, expiry, and block reasons. Feature tests prove system-plane management, tenant-plane and system-plane blocked execution, audit logging, and unchanged 404/403 semantics without browser-specific behavior. +- **New or expanded test families**: focused operational-controls unit coverage, system-page management tests, findings-maintenance gate tests, restore-execution gate tests, and one guard test blocking new ad-hoc config gates for in-scope controls +- **Fixture / helper cost impact**: moderate. Tests reuse existing platform users, workspaces, tenants, OperationRun, restore-run, and findings fixtures. No new browser harness, provider emulator, or heavy governance suite is required. +- **Heavy-family visibility / justification**: none +- **Special surface test profile**: standard-native-filament, monitoring-state-page +- **Standard-native relief or required special coverage**: ordinary Filament feature coverage is sufficient for the controls page and the affected admin/system surfaces, plus explicit server-side assertions that blocked actions create no run or provider execution side effect, all-tenant blocked runbook attempts audit truthfully, and later control activation does not rewrite already accepted runs. +- **Reviewer handoff**: confirm that the env-gated findings action is now evaluator-driven, restore execution is blocked before queue/provider start, entitled-but-paused users see explicit operational-control messaging, non-entitled users still get 404 or 403 as appropriate, and audit entries record scope/reason/actor for control changes. +- **Budget / baseline / trend impact**: low-to-moderate increase in narrow unit and feature coverage only +- **Escalation needed**: none +- **Active feature PR close-out entry**: Guardrail +- **Planned validation commands**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Pause A Risky Action Centrally (Priority: P1) + +As a platform operator, I can pause one risky control from the system plane so the affected runbook and tenant surfaces stop allowing that action without requiring a deployment. + +**Why this priority**: This is the operator-visible core of the feature and the main incident-response value. + +**Independent Test**: Activate the control for `findings.lifecycle.backfill`, open the system controls surface and the affected runbook/findings surfaces, and verify that the action is visibly paused with one explicit reason and no execution path. + +**Acceptance Scenarios**: + +1. **Given** a platform operator pauses `findings.lifecycle.backfill` globally, **When** an entitled operator opens `/system/ops/runbooks` or an entitled tenant user opens `/admin/t/{tenant}/findings`, **Then** the action remains visible in its normal place but is explicitly blocked with paused-state messaging rather than disappearing or looking unauthorized. +2. **Given** the same control is resumed, **When** the affected surfaces reload, **Then** the normal execution path returns without a deploy or config-file change. + +--- + +### User Story 2 - Block Execution Server-Side Without Masquerading As Auth (Priority: P1) + +As an entitled operator, I want an active control to stop execution before any queued run or provider-backed action starts, while still preserving normal 404 and 403 authorization semantics. + +**Why this priority**: The feature fails if it only hides UI or turns operational controls into fake authorization failures. + +**Independent Test**: Activate controls for `findings.lifecycle.backfill` and `restore.execute`, attempt the affected actions through their normal pages, and assert that no `OperationRun` or provider-backed execution starts while entitlement and capability semantics remain unchanged. + +**Acceptance Scenarios**: + +1. **Given** an entitled tenant user has the underlying capability but `restore.execute` is paused for their workspace, **When** they attempt to start restore execution, **Then** the system returns explicit operational-control feedback, creates no new execution run, and makes no outbound provider call. +2. **Given** a user lacks workspace or tenant entitlement, **When** they attempt the same affected action, **Then** the system still responds as not found instead of revealing control-state details. +3. **Given** a user is entitled to the scope but lacks the underlying capability, **When** they attempt the affected action, **Then** the system still returns 403 rather than blaming the operational control. + +--- + +### User Story 3 - Scope A Pause To One Workspace (Priority: P2) + +As a platform operator, I can pause a risky control for one workspace without affecting unrelated workspaces, so incidents or staged rollouts stay bounded. + +**Why this priority**: Workspace scoping is the smallest enterprise-capable version beyond a purely global kill switch and makes the feature reusable for future rollout control. + +**Independent Test**: Create two workspaces, activate a workspace-scoped pause for one of them, and confirm that blocked behavior applies only to the targeted workspace while the other workspace continues normally. + +**Acceptance Scenarios**: + +1. **Given** `restore.execute` is paused for Workspace A only, **When** entitled users in Workspace A and Workspace B attempt restore execution, **Then** Workspace A is blocked with explicit paused-state messaging and Workspace B continues normally. +2. **Given** a workspace-scoped pause expires or is removed, **When** the targeted workspace retries the action, **Then** the action becomes available again without changing any unrelated workspace state. + +### Edge Cases + +- A workspace-scoped activation and a global activation may both exist for the same control; v1 precedence is global-first, and a matching global pause always wins. +- A control may expire while an operator is on the page; stale page state must not start a blocked action after expiry or removal. +- Break-glass platform access does not automatically bypass an operational control unless the spec explicitly authorizes that path in a later slice. +- An action may already be queued before a control is activated; the control governs new starts only and must not silently rewrite historical runs. +- Tenant/admin users who are not entitled to the workspace or tenant must not learn that a control exists for the hidden scope. +- The first slice must retire the in-scope env flag rather than leaving both the env gate and the control evaluator active in parallel. + +## Requirements *(mandatory)* + +**Constitution alignment (required):** This feature introduces no new Graph endpoint family, but it changes the start boundary for existing queued/provider-backed actions. For in-scope controls, the spec requires server-side enforcement before `findings.lifecycle.backfill` or `restore.execute` start, preserves existing confirmation/audit patterns, and keeps long-running work observable through the current `OperationRun` paths whenever execution is allowed. + +**Constitution alignment (PROP-001 / ABSTR-001 / PERSIST-001 / STATE-001 / BLOAT-001):** This feature introduces a new runtime-safety truth because the current product already needs it now: operators must pause risky actions without deploy-time env changes. The shape stays narrow: a two-key catalog, persisted activations, and one evaluator. It does not become a generalized experimentation, entitlement, or customer flag platform. + +**Constitution alignment (XCUT-001):** The slice is cross-cutting across header actions, runbook starts, provider-backed start gates, and audit messaging. It reuses `UiEnforcement`, `ProviderOperationStartGate`, existing OperationRun UX presenters, and `WorkspaceAuditLogger` rather than introducing local blocked-state dialects. + +**Constitution alignment (PROV-001):** `restore.execute` remains provider-owned, while operational-control vocabulary remains platform-core. The spec keeps provider specifics inside the existing restore path and uses neutral control language for scope, reason, and effective state. + +**Constitution alignment (TEST-GOV-001):** Proof stays in narrow unit and feature coverage. No browser or heavy-governance family is justified. Reviewer handoff must explicitly verify lane fit, unchanged 404/403 semantics, and no hidden provider-side effects on blocked paths. + +**Constitution alignment (OPS-UX):** For starts that are still allowed, the default Ops-UX 3-surface contract remains unchanged. `OperationRun.status` and `OperationRun.outcome` transitions remain service-owned. Paused starts create no `OperationRun`, no queued DB notification, and no new summary-count semantics. + +**Constitution alignment (OPS-UX-START-001):** The feature includes the `OperationRun UX Impact` section and reuses the shared start UX paths. Local surfaces remain responsible only for initiation inputs and page-local confirmation text. Blocked-state feedback is delivered through existing result/notification helpers instead of page-local composition. + +**Constitution alignment (RBAC-UX):** This slice spans the platform `/system` plane for control management and the tenant/admin `/admin` plane for affected execution surfaces. Cross-plane access remains 404. Non-members or non-entitled users receive 404. Members lacking the underlying capability receive 403 and keep the existing surface-specific capability-denied UX rather than paused-state helper text. Entitled users blocked only by an active control receive explicit operational-control feedback distinct from authorization failure. All mutating management actions require a staged safety flow with scope-impact preview, server-side capability checks, and confirmation. Break-glass does not bypass an active control in v1. + +**Constitution alignment (OPS-EX-AUTH-001):** Not applicable. + +**Constitution alignment (BADGE-001):** If paused/enabled state is rendered as a badge or status chip, it must use centralized badge rendering or one shared control-state presentation path, not page-local color decisions. + +**Constitution alignment (UI-FIL-001):** The controls page, runbooks page, findings page, and restore flow remain native Filament surfaces using existing action, section, infolist, and notification primitives. No local status card framework or custom blocked-state component library is introduced. + +**Constitution alignment (UI-NAMING-001):** Primary operator-facing labels use stable verbs and nouns: `Pause control`, `Resume control`, `Operational controls`, `Backfill findings lifecycle`, and `Restore execution`. `Restore execution` is the control and status label, while `Execute restore` is the gated action label. Route-entry labels such as `New restore run` and `Create restore run` refer only to the ungated draft/setup flow. The same vocabulary must be preserved across buttons, modal titles, notifications, and audit prose. + +**Constitution alignment (DECIDE-001):** The new system controls surface is the only new primary decision surface. Runbooks, findings, and restore remain secondary execution contexts that surface control truth inline instead of becoming separate troubleshooting flows. + +**Constitution alignment (UI-CONST-001 / UI-SURF-001 / ACTSURF-001 / UI-HARD-001 / UI-EX-001 / UI-REVIEW-001 / HDR-001):** The controls page acts as a bounded control center with explicit action buttons and no competing inspect model. Existing runbooks, findings, and restore surfaces preserve their current primary inspect/open paths and action hierarchies while gaining one truthful blocked-state branch. + +**Constitution alignment (ACTSURF-001 - action hierarchy):** Control management actions remain separated from navigation and diagnostics. The controls page owns pause/resume management. Runbooks, findings, and restore keep execution actions local but do not own control truth. + +**Constitution alignment (OPSURF-001):** Default-visible content stays operator-first: whether the action is allowed, for which scope, and why. Raw internal control records or configuration internals stay secondary. Each affected execution action must state its mutation scope before execution when allowed, and blocked paths must state that no tenant/provider mutation will occur. + +**Constitution alignment (UI-SEM-001 / LAYER-001 / TEST-TRUTH-001):** The spec allows one new evaluator because existing direct domain-to-UI mapping cannot express runtime-safety state consistently across system and tenant surfaces. No second presenter taxonomy or explanation framework is added beyond the minimum blocked-state copy path. + +**Constitution alignment (Filament Action Surfaces):** The Action Surface Contract remains satisfied on all touched Filament surfaces. Each affected surface keeps one primary inspect/open model, no redundant `View` action is added, no empty action groups are introduced, and state-changing control actions require confirmation. + +**Constitution alignment (UX-001 - Layout & Information Architecture):** The new controls page uses native Filament sections/cards for control summaries and action modals. Existing runbooks, findings, and restore pages keep their established layout patterns. Any blocked-state summary remains within the current page structure and does not add ad-hoc full-page exception layouts. + +### Functional Requirements + +- **FR-001**: System MUST define a central operational-control catalog for the first-slice keys `findings.lifecycle.backfill` and `restore.execute`. +- **FR-002**: Platform operators MUST be able to activate, update, and remove a control for all workspaces or one specific workspace from the system plane, with a human-readable reason and optional expiry, through a staged safety flow that previews scope impact before confirmation. +- **FR-003**: System MUST enforce the effective control state server-side before any in-scope findings lifecycle backfill start at `FindingsLifecycleBackfillRunbookService::start()`, any affected maintenance action, or any provider-backed restore execution begins. +- **FR-004**: System MUST show explicit paused-state feedback to entitled users on affected surfaces and MUST keep that feedback distinct from authorization failure. +- **FR-005**: System MUST preserve existing 404 vs 403 semantics for non-membership and missing capability checks even when a control is active, and capability-denied members MUST follow the existing surface-specific denial UX rather than operational-control helper text. +- **FR-006**: System MUST create no new `OperationRun`, no queued execution `RestoreRun`, no queued job, and no outbound provider execution when an in-scope action is blocked by an active control, and MUST NOT retroactively mutate already accepted or historical runs when a control is activated later. +- **FR-007**: System MUST audit every control activation, update, removal, and blocked execution decision with stable action IDs, actor, scope, reason, and timestamp; global control changes MUST be recorded as platform-plane audit events without assigning a false workspace or tenant owner, and blocked system-plane attempts without a concrete workspace or tenant MUST be recorded as platform-plane events with requested-scope metadata. +- **FR-008**: The findings-maintenance action currently gated by `config('tenantpilot.allow_admin_maintenance_actions')` MUST be migrated to the shared operational-control path and the local env gate retired for this in-scope behavior. +- **FR-009**: System MUST expose enough effective-state information on the controls page and affected execution surfaces to make the operator's next action clear without opening raw config or database detail. + +## UI Action Matrix *(mandatory when Filament is changed)* + +If this feature adds/modifies any Filament Resource / RelationManager / Page, fill out the matrix below. + +For each surface, list the exact action labels, whether they are destructive (confirmation? typed confirmation?), +RBAC gating (capability + enforcement helper), whether the mutation writes an audit log, and any exemption or exception used. + +| Surface | Location | Header Actions | Inspect Affordance (List/Table) | Row Actions (max 2 visible) | Bulk Actions (grouped) | Empty-State CTA(s) | View Header Actions | Create/Edit Save+Cancel | Audit log? | Notes / Exemptions | +|---|---|---|---|---|---|---|---|---|---|---| +| System ops controls surface | app/Filament/System/Pages/Ops/Controls.php | `Pause control`, `Resume control`, `Edit scope` (scope-impact preview + confirmation required for state changes) | Same-page control cards or modals; no row-click model | none beyond card actions | none | none in v1 | same-page actions only | `Review impact`, `Save changes`, `Cancel` in staged modal forms | yes | Management is platform-plane only; global changes audit as platform-plane events without workspace/tenant ownership; system users missing `platform.ops.controls.manage` receive 403 before page content renders | +| System runbooks launcher | app/Filament/System/Pages/Ops/Runbooks.php | `Preflight`, `Run...` | Same-page action modal and `View run` toast action | none | none | none | none | `Run`, `Cancel` in modal | yes | Existing start UX retained; blocked execution decisions are always audited | +| Findings list page | app/Filament/Resources/FindingResource/Pages/ListFindings.php | `Backfill findings lifecycle` (confirmation required) | Existing findings inspection model unchanged | unchanged | unchanged | unchanged | unchanged | N/A | yes | In-scope change replaces env gating with control evaluation and blocked execution audit | +| Restore run resource | app/Filament/Resources/RestoreRunResource.php | `New restore run` | Existing clickable-row/resource inspection model unchanged | existing row actions unchanged | existing grouped maintenance actions unchanged | existing empty-state CTA unchanged | existing view header unchanged | `Create restore run`, `Cancel` plus existing safety steps | yes | In-scope change gates only the `Execute restore` step inside the create flow; draft/setup labels and existing row/view actions remain unchanged | + +### Key Entities *(include if feature involves data)* + +- **Operational Control Definition**: The bounded catalog entry that identifies one controllable risky action, its canonical key, operator label, supported scopes, and default behavior. +- **Operational Control Activation**: The runtime safety record that pauses a control for either all workspaces or one specific workspace, including reason, optional expiry, and an owner display that resolves to the last mutating actor (`updated_by` when present, otherwise `created_by`). +- **Operational Control Decision**: The derived evaluation result returned to affected surfaces and service boundaries, including effective state, matched scope, reason, and whether execution may proceed. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: In timed manual smoke, platform operators can pause or resume either first-slice control from the system plane in under 1 minute without editing environment variables, code, or database rows manually. +- **SC-002**: In blocked validation scenarios, 100% of attempted in-scope starts create no new execution run and no outbound provider-backed execution for the targeted scope. +- **SC-003**: In validation scenarios covering the affected surfaces, entitled users see explicit paused-state feedback on the first attempt in 100% of cases, while non-entitled users still receive 404 or 403 semantics as defined by RBAC rules. +- **SC-004**: Workspace-scoped activation affects only the targeted workspace in validation scenarios and leaves at least one non-targeted workspace unaffected for the same control. diff --git a/specs/242-operational-controls/tasks.md b/specs/242-operational-controls/tasks.md new file mode 100644 index 00000000..f50ba7a9 --- /dev/null +++ b/specs/242-operational-controls/tasks.md @@ -0,0 +1,187 @@ +--- + +description: "Task list for Operational Controls" + +--- + +# Tasks: Operational Controls + +**Input**: Design documents from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/` +**Prerequisites**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/plan.md` (required), `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/spec.md` (required), `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/research.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/data-model.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/contracts/operational-controls.contract.yaml`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/242-operational-controls/quickstart.md` + +**Tests**: REQUIRED (Pest) for all runtime behavior changes in this slice. Keep proof in Unit + Feature lanes only. +**Operations**: Allowed starts must continue to reuse the shared OperationRun start UX. Blocked starts for `findings.lifecycle.backfill` and `restore.execute` must create no queued execution `OperationRun`, no queued execution `RestoreRun`, no queued job, and no provider-backed execution. Control activation governs new starts only and must not retroactively mutate already accepted runs. +**RBAC**: Management is platform-plane only under `/system`; affected execution surfaces stay on `/system` or `/admin/t/{tenant}`. Non-members remain `404`, members without the underlying capability remain `403`, and entitled users blocked only by an active operational control get explicit paused-state feedback. +**Organization**: Tasks are grouped by user story so each slice remains independently testable and bounded to the first-slice controls `findings.lifecycle.backfill` and `restore.execute`. + +## Phase 1: Setup (Shared Infrastructure) + +**Purpose**: Prepare the local implementation lane and feature-local file layout without widening scope. + +- [x] T001 Start the local Sail environment with `cd apps/platform && ./vendor/bin/sail up -d` (script: `apps/platform/vendor/bin/sail`) +- [x] T002 Create the bounded feature-local directories under `apps/platform/app/Support/OperationalControls/`, `apps/platform/tests/Unit/Support/OperationalControls/`, `apps/platform/tests/Feature/System/OpsControls/`, `apps/platform/tests/Feature/System/OpsRunbooks/`, `apps/platform/tests/Feature/Findings/`, `apps/platform/tests/Feature/Restore/`, and `apps/platform/tests/Feature/OperationalControls/` + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Add the single new persistence model, the bounded operational-controls support namespace, and the shared capability/audit plumbing that all stories depend on. + +**Checkpoint**: The repo has one `operational_control_activations` truth, one evaluator/catalog/decision support path, one platform capability, and shared audit IDs before any surface integration begins. + +- [x] T003 Create the activation migration in `apps/platform/database/migrations/*_create_operational_control_activations_table.php`, including partial unique indexes for active global and workspace-scoped rows +- [x] T004 Create the activation model in `apps/platform/app/Models/OperationalControlActivation.php` +- [x] T005 [P] Create the activation factory in `apps/platform/database/factories/OperationalControlActivationFactory.php` +- [x] T006 [P] Create the bounded catalog for `findings.lifecycle.backfill` and `restore.execute` in `apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php` +- [x] T007 [P] Create the derived decision object in `apps/platform/app/Support/OperationalControls/OperationalControlDecision.php` +- [x] T008 Create the shared evaluator for global and workspace-targeted activations, including foundational global-first precedence and expiry handling, in `apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php` +- [x] T009 [P] Add the platform management capability for the controls surface in `apps/platform/app/Support/Auth/PlatformCapabilities.php` and grant it in `apps/platform/database/seeders/PlatformUserSeeder.php` for the seeded local operator path +- [x] T010 [P] Add stable audit action IDs for pause, update, resume, and execution-blocked events in `apps/platform/app/Support/Audit/AuditActionId.php` +- [x] T011 Extend canonical audit plumbing for control scope, reason, expiry, requested-scope metadata, and blocked-execution evidence using `apps/platform/app/Services/Audit/AuditRecorder.php` for global control changes and blocked system all-tenant attempts and `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php` for workspace/tenant-scoped changes and blocked execution +- [x] T012 [P] Add catalog and evaluator unit coverage in `apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php` and `apps/platform/tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php` +- [x] T013 [P] Add global-first precedence and expiry coverage in `apps/platform/tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php` +- [x] T014 Run the foundational unit suite with `cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php tests/Unit/Support/OperationalControls/OperationalControlEvaluatorTest.php tests/Unit/Support/OperationalControls/OperationalControlScopeResolutionTest.php` (tests: `apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php`) + +--- + +## Phase 3: User Story 1 — Pause A Risky Action Centrally (Priority: P1) 🎯 MVP + +**Goal**: Give platform operators one system-plane control center that can pause `findings.lifecycle.backfill` and make the runbook and findings surfaces show one explicit blocked-state path instead of env-driven disappearance. + +**Independent Test**: Pause `findings.lifecycle.backfill` globally from `/system/ops/controls`, then verify `/system/ops/runbooks` and `/admin/t/{tenant}/findings` both show the action truthfully for entitled users, block execution server-side, and show the same paused-state reason. + +### Tests for User Story 1 + +- [x] T015 [P] [US1] Add system-plane management coverage for staged scope-impact preview, controls-page 403 access denial, pause, update, resume, confirmation, global-audit ownership, on-demand audit history links, and audit logging in `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php` +- [x] T016 [P] [US1] Add findings lifecycle gate coverage for blocked `findings.lifecycle.backfill` starts at the shared `FindingsLifecycleBackfillRunbookService::start()` seam, including system-runbook callers, mandatory blocked-execution audit evidence, and truthful platform-plane ownership for blocked all-tenant attempts in `apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php` +- [x] T017 [P] [US1] Add findings header-action coverage for explicit blocked-state feedback, mandatory blocked-execution audit evidence, and no-start behavior in `apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php` + +### Implementation for User Story 1 + +- [x] T018 [US1] Create the system ops controls page with native Filament sections, staged scope-impact preview plus confirmation-protected pause/resume actions, effective-state summaries, owner display, on-demand audit history links, and the mutation-path cleanup that deletes expired conflicting activations before writing a new pause in `apps/platform/app/Filament/System/Pages/Ops/Controls.php` +- [x] T019 [US1] Integrate the evaluator into `apps/platform/app/Services/Runbooks/FindingsLifecycleBackfillRunbookService.php::start()` so the system runbooks page, CLI command, and deploy-hook command all honor the same blocked-start contract +- [x] T020 [US1] Replace the env-gated findings maintenance path by routing the tenant findings action through the shared findings lifecycle service/evaluator path, blocked-execution audit recording, and shared paused-state feedback in `apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php` +- [x] T021 [US1] Retire the in-scope findings env gate from `apps/platform/config/tenantpilot.php` +- [x] T022 [US1] Run the US1 suite with `cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php` (tests: `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php`) + +--- + +## Phase 4: User Story 2 — Block Execution Server-Side Without Masquerading As Auth (Priority: P1) + +**Goal**: Stop `restore.execute` before any queue, `OperationRun`, `RestoreRun`, or provider-backed execution starts while preserving normal `404` and `403` semantics. + +**Independent Test**: Activate `restore.execute`, attempt the restore execution flow as an entitled user, a non-member, and a member missing capability, and verify the outcomes are respectively paused-with-reason, `404`, and `403`, with no execution side effects on blocked paths. + +### Tests for User Story 2 + +- [x] T023 [P] [US2] Add restore execution gate coverage for blocked execution starts, mandatory blocked-execution audit evidence, no queued-execution `RestoreRun`/`OperationRun` side effects, provider-start suppression, and proof that a later pause does not retroactively mutate already accepted restore runs in `apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php` +- [x] T024 [P] [US2] Add explicit authorization-semantics and break-glass non-bypass coverage for non-member `404`, missing-capability `403` with the existing capability-denied UX, and paused-state feedback only for entitled users blocked by an operational control in `apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` + +### Implementation for User Story 2 + +- [x] T025 [US2] Integrate the evaluator into the restore execution start seam so blocked `restore.execute` decisions stop before any queued execution `RestoreRun`, queued execution `OperationRun`, queue dispatch, or provider call in `apps/platform/app/Filament/Resources/RestoreRunResource.php` +- [x] T026 [US2] Reuse the shared provider-start gate for operational-control blocked outcomes instead of introducing restore-local runtime flags in `apps/platform/app/Services/Providers/ProviderOperationStartGate.php` +- [x] T027 [US2] Run the US2 suite with `cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` (tests: `apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php`) + +--- + +## Phase 5: User Story 3 — Scope A Pause To One Workspace (Priority: P2) + +**Goal**: Allow platform operators to target one workspace without affecting unrelated workspaces, while keeping evaluator precedence and expiry behavior explicit and stable. + +**Independent Test**: Pause `restore.execute` or `findings.lifecycle.backfill` for Workspace A only, then verify the targeted workspace is blocked with the correct reason and a second workspace remains unaffected until the activation is removed or expires. + +### Tests for User Story 3 + +- [x] T028 [P] [US3] Extend controls-page feature coverage for workspace-targeted pause, update, expiry, and resume flows in `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php` +- [x] T029 [P] [US3] Add workspace-isolation coverage for targeted findings and restore blocking in `apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php` and `apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php` + +### Implementation for User Story 3 + +- [x] T030 [US3] Add workspace-targeted scope selection, validation, and effective-state presentation to the controls page in `apps/platform/app/Filament/System/Pages/Ops/Controls.php` +- [x] T031 [US3] Extend `OperationalControlDecision` with workspace-targeted presentation details for matched scope, reason, expiry, and labels without redefining the foundational shared decision shape in `apps/platform/app/Support/OperationalControls/OperationalControlDecision.php` +- [x] T032 [US3] Ensure runbooks, findings, and restore all pass workspace context consistently into control evaluation in `apps/platform/app/Filament/System/Pages/Ops/Runbooks.php`, `apps/platform/app/Filament/Resources/FindingResource/Pages/ListFindings.php`, and `apps/platform/app/Filament/Resources/RestoreRunResource.php` +- [x] T033 [US3] Run the US3 suite with `cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php` (tests: `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php`) + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Lock down the shared-path contract, update feature artifacts if implementation details move, and run the narrow validation suite. + +- [x] T034 [P] Add a CI guard against ad-hoc `config(...)` or page-local runtime-control bypasses for the in-scope controls in `apps/platform/tests/Feature/OperationalControls/NoAdHocOperationalControlBypassTest.php` +- [x] T035 Update feature artifact close-out notes and final validation commands in `specs/242-operational-controls/plan.md` and `specs/242-operational-controls/quickstart.md` +- [x] T036 Run formatting on touched platform files with `cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent` (target: `apps/platform/`) +- [x] T037 Run the full narrow validation suite from `specs/242-operational-controls/quickstart.md`, including the timed manual smoke for SC-001, across `apps/platform/tests/Unit/Support/OperationalControls/`, `apps/platform/tests/Feature/System/OpsControls/`, `apps/platform/tests/Feature/System/OpsRunbooks/`, `apps/platform/tests/Feature/Findings/`, `apps/platform/tests/Feature/Restore/`, and `apps/platform/tests/Feature/OperationalControls/` + +--- + +## Dependencies & Execution Order + +### User Story Dependency Graph + +```text +Phase 1 (Setup) + ↓ +Phase 2 (Foundation: activation persistence + evaluator + capability/audit plumbing) + ↓ +US1 (system controls page + findings lifecycle gating) ─┐ + ├─→ US3 (workspace-targeted scope + precedence/expiry) +US2 (restore execution gate + auth semantics) ──────────┘ +``` + +### Parallel Opportunities + +- Foundational tasks marked `[P]` can run in parallel once the migration/model direction is agreed. +- US1 tests for controls, runbooks, and findings can be authored in parallel because they target separate files. +- US2 restore and authorization-semantics tests can run in parallel while the restore seam work is isolated to `RestoreRunResource.php` and `ProviderOperationStartGate.php`. +- US3 extends existing US1/US2 tests and can parallelize the findings and restore workspace-isolation assertions while one person updates the controls page scope UI. + +--- + +## Parallel Example: User Story 1 + +```bash +Task: "Add system-plane management coverage in apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php" +Task: "Add runbook gating coverage in apps/platform/tests/Feature/System/OpsRunbooks/OperationalControlRunbookGateTest.php" +Task: "Add findings header-action coverage in apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php" +Task: "Create the controls page in apps/platform/app/Filament/System/Pages/Ops/Controls.php" +``` + +--- + +## Parallel Example: User Story 2 + +```bash +Task: "Add restore execution gate coverage in apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php" +Task: "Add authorization-semantics coverage in apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php" +Task: "Integrate the evaluator into apps/platform/app/Filament/Resources/RestoreRunResource.php" +``` + +--- + +## Parallel Example: User Story 3 + +```bash +Task: "Extend controls-page workspace scope coverage in apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php" +Task: "Add workspace-isolation assertions in apps/platform/tests/Feature/Findings/OperationalControlFindingsBackfillGateTest.php" +Task: "Add workspace-isolation assertions in apps/platform/tests/Feature/Restore/OperationalControlRestoreExecutionGateTest.php" +Task: "Add workspace-targeted scope selection in apps/platform/app/Filament/System/Pages/Ops/Controls.php" +``` + +--- + +## Implementation Strategy + +### MVP First (User Story 1) + +1. Complete Phase 1 and Phase 2. +2. Deliver the controls page plus `findings.lifecycle.backfill` integrations in US1. +3. Validate with the US1 feature suite before extending the second control. + +### Incremental Delivery + +1. US1 delivers the new system-plane management surface and removes the ad-hoc findings env gate. +2. US2 wires the same evaluator into `restore.execute` and proves blocked execution is not treated as authorization failure. +3. US3 adds workspace-targeted scope, precedence, and expiry without widening the catalog or support namespace. +4. Phase 6 adds the bypass guard, feature-artifact close-out, formatting, and the narrow validation pass. \ No newline at end of file