From ff3392892baac02ed070c9c11f5f3b89e487989e Mon Sep 17 00:00:00 2001 From: ahmido Date: Mon, 27 Apr 2026 21:18:37 +0000 Subject: [PATCH] Merge 248-private-ai-policy-foundation into dev (#288) Automated PR: merge branch 248-private-ai-policy-foundation into dev (created by Copilot) Co-authored-by: Ahmed Darrazi Reviewed-on: https://git.cloudarix.de/ahmido/TenantAtlas/pulls/288 --- .../Pages/Settings/WorkspaceSettings.php | 76 + .../Filament/System/Pages/Ops/Controls.php | 69 +- .../app/Support/Ai/AiDataClassification.php | 27 + .../Ai/AiDecisionAuditMetadataFactory.php | 39 + .../app/Support/Ai/AiDecisionReasonCode.php | 18 + .../app/Support/Ai/AiExecutionDecision.php | 37 + .../app/Support/Ai/AiExecutionRequest.php | 28 + apps/platform/app/Support/Ai/AiPolicyMode.php | 43 + .../app/Support/Ai/AiProviderClass.php | 19 + .../app/Support/Ai/AiUseCaseCatalog.php | 126 + .../Ai/GovernedAiExecutionBoundary.php | 181 + .../app/Support/Audit/AuditActionId.php | 3 + .../OperationalControlCatalog.php | 7 + .../ContextualHelpResolver.php | 38 + .../app/Support/Settings/SettingsRegistry.php | 10 + .../SupportDiagnosticBundleBuilder.php | 34 + .../Guards/NoDirectAiProviderBypassTest.php | 49 + ...ionalControlAuthorizationSemanticsTest.php | 49 + .../WorkspaceAiPolicySettingsTest.php | 66 + .../WorkspaceSettingsAuditTest.php | 73 + .../WorkspaceSettingsManageTest.php | 18 + ...WorkspaceSettingsNonMemberNotFoundTest.php | 9 + .../WorkspaceSettingsViewOnlyTest.php | 25 +- .../AiExecutionOperationalControlTest.php | 109 + .../Support/Ai/AiApprovedSourceInputsTest.php | 54 + .../Ai/AiDecisionAuditMetadataTest.php | 67 + .../Unit/Support/Ai/AiUseCaseCatalogTest.php | 48 + .../Ai/GovernedAiExecutionBoundaryTest.php | 172 + .../OperationalControlCatalogTest.php | 8 +- docs/product/implementation-ledger.md | 273 ++ docs/product/principles.md | 46 + docs/product/spec-candidates.md | 4013 ++--------------- .../checklists/requirements.md | 57 + .../plan.md | 214 +- .../spec.md | 308 +- .../tasks.md | 195 +- .../checklists/requirements.md | 57 + .../private-ai-governance.openapi.yaml | 277 ++ .../data-model.md | 209 + .../248-private-ai-policy-foundation/plan.md | 282 ++ .../quickstart.md | 76 + .../research.md | 142 + .../248-private-ai-policy-foundation/spec.md | 348 ++ .../248-private-ai-policy-foundation/tasks.md | 194 + 44 files changed, 4357 insertions(+), 3836 deletions(-) create mode 100644 apps/platform/app/Support/Ai/AiDataClassification.php create mode 100644 apps/platform/app/Support/Ai/AiDecisionAuditMetadataFactory.php create mode 100644 apps/platform/app/Support/Ai/AiDecisionReasonCode.php create mode 100644 apps/platform/app/Support/Ai/AiExecutionDecision.php create mode 100644 apps/platform/app/Support/Ai/AiExecutionRequest.php create mode 100644 apps/platform/app/Support/Ai/AiPolicyMode.php create mode 100644 apps/platform/app/Support/Ai/AiProviderClass.php create mode 100644 apps/platform/app/Support/Ai/AiUseCaseCatalog.php create mode 100644 apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php create mode 100644 apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php create mode 100644 apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php create mode 100644 apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php create mode 100644 apps/platform/tests/Unit/Support/Ai/AiApprovedSourceInputsTest.php create mode 100644 apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php create mode 100644 apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php create mode 100644 apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php create mode 100644 docs/product/implementation-ledger.md create mode 100644 specs/043-cross-tenant-compare-and-promotion/checklists/requirements.md create mode 100644 specs/248-private-ai-policy-foundation/checklists/requirements.md create mode 100644 specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml create mode 100644 specs/248-private-ai-policy-foundation/data-model.md create mode 100644 specs/248-private-ai-policy-foundation/plan.md create mode 100644 specs/248-private-ai-policy-foundation/quickstart.md create mode 100644 specs/248-private-ai-policy-foundation/research.md create mode 100644 specs/248-private-ai-policy-foundation/spec.md create mode 100644 specs/248-private-ai-policy-foundation/tasks.md diff --git a/apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php b/apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php index 8eec6584..062d2616 100644 --- a/apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php +++ b/apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php @@ -7,6 +7,8 @@ use App\Models\User; use App\Models\Workspace; use App\Models\WorkspaceSetting; +use App\Support\Ai\AiPolicyMode; +use App\Support\Ai\AiUseCaseCatalog; use App\Services\Auth\WorkspaceCapabilityResolver; use App\Services\Entitlements\WorkspaceEntitlementResolver; use App\Services\Entitlements\WorkspacePlanProfileCatalog; @@ -22,6 +24,7 @@ use BackedEnum; use Filament\Actions\Action; use Filament\Forms\Components\KeyValue; +use Filament\Forms\Components\Placeholder; use Filament\Forms\Components\Select; use Filament\Forms\Components\Textarea; use Filament\Forms\Components\TextInput; @@ -54,6 +57,7 @@ class WorkspaceSettings extends Page * @var array */ private const SETTING_FIELDS = [ + 'ai_policy_mode' => ['domain' => 'ai', 'key' => 'policy_mode', 'type' => 'string'], 'backup_retention_keep_last_default' => ['domain' => 'backup', 'key' => 'retention_keep_last_default', 'type' => 'int'], 'backup_retention_min_floor' => ['domain' => 'backup', 'key' => 'retention_min_floor', 'type' => 'int'], 'drift_severity_mapping' => ['domain' => 'drift', 'key' => 'severity_mapping', 'type' => 'json'], @@ -248,6 +252,27 @@ public function content(Schema $schema): Schema ->disabled(fn (): bool => ! $this->currentUserCanManage()) ->helperText(fn (): string => $this->reviewPackGenerationReasonHelperText()), ]), + Section::make('Workspace AI policy') + ->description($this->sectionDescription('ai', 'Control whether the workspace disables AI entirely or allows approved internal-only drafts on private-only infrastructure.')) + ->schema([ + Select::make('ai_policy_mode') + ->label('AI posture') + ->options(AiPolicyMode::optionLabels()) + ->placeholder('Unset (uses default)') + ->native(false) + ->disabled(fn (): bool => ! $this->currentUserCanManage()) + ->helperText(fn (): string => $this->aiPolicyModeHelperText()) + ->hintAction($this->makeResetAction('ai_policy_mode')), + Placeholder::make('ai_approved_use_cases') + ->label('Approved use cases') + ->content(fn (): string => $this->aiApprovedUseCasesText()), + Placeholder::make('ai_allowed_provider_classes') + ->label('Allowed provider classes') + ->content(fn (): string => $this->aiAllowedProviderClassesText()), + Placeholder::make('ai_blocked_data_classifications') + ->label('Blocked data classifications') + ->content(fn (): string => $this->aiBlockedDataClassificationsText()), + ]), Section::make('Backup settings') ->description($this->sectionDescription('backup', 'Workspace defaults used when a schedule has no explicit value.')) ->schema([ @@ -793,6 +818,57 @@ private function reviewPackGenerationReasonHelperText(): string ); } + private function aiPolicyModeHelperText(): string + { + $resolved = $this->resolvedSettings['ai_policy_mode'] ?? null; + + if (! is_array($resolved)) { + return ''; + } + + $mode = AiPolicyMode::tryFrom((string) ($resolved['value'] ?? AiPolicyMode::Disabled->value)) + ?? AiPolicyMode::Disabled; + + $prefix = ! $this->hasWorkspaceOverride('ai_policy_mode') + ? sprintf('Effective posture: %s. Source: %s.', $mode->label(), $this->sourceLabel((string) ($resolved['source'] ?? 'system_default'))) + : sprintf('Effective posture: %s.', $mode->label()); + + return sprintf('%s %s', $prefix, $mode->summary()); + } + + private function aiApprovedUseCasesText(): string + { + return implode('; ', app(AiUseCaseCatalog::class)->labels()).'.'; + } + + private function aiAllowedProviderClassesText(): string + { + $labels = app(AiUseCaseCatalog::class)->allowedProviderClassLabelsForMode($this->effectiveAiPolicyMode()); + + if ($labels === []) { + return 'No provider classes are allowed while AI is disabled.'; + } + + return implode(', ', $labels).'.'; + } + + private function aiBlockedDataClassificationsText(): string + { + return implode(', ', app(AiUseCaseCatalog::class)->blockedDataClassificationLabels()).'.'; + } + + private function effectiveAiPolicyMode(): AiPolicyMode + { + $resolved = $this->resolvedSettings['ai_policy_mode'] ?? null; + + if (! is_array($resolved)) { + return AiPolicyMode::Disabled; + } + + return AiPolicyMode::tryFrom((string) ($resolved['value'] ?? AiPolicyMode::Disabled->value)) + ?? AiPolicyMode::Disabled; + } + private function entitlementReasonHelperText(string $valueField, string $key): string { $decision = $this->entitlementDecision($key); diff --git a/apps/platform/app/Filament/System/Pages/Ops/Controls.php b/apps/platform/app/Filament/System/Pages/Ops/Controls.php index a7c02a0e..a5f39efa 100644 --- a/apps/platform/app/Filament/System/Pages/Ops/Controls.php +++ b/apps/platform/app/Filament/System/Pages/Ops/Controls.php @@ -80,6 +80,9 @@ protected function getHeaderActions(): array $this->pauseRestoreExecuteAction(), $this->resumeRestoreExecuteAction(), $this->viewHistoryRestoreExecuteAction(), + $this->pauseAiExecutionAction(), + $this->resumeAiExecutionAction(), + $this->viewHistoryAiExecutionAction(), ]; } @@ -199,6 +202,21 @@ public function viewHistoryRestoreExecuteAction(): Action return $this->historyActionFor('restore.execute'); } + public function pauseAiExecutionAction(): Action + { + return $this->pauseActionFor('ai.execution'); + } + + public function resumeAiExecutionAction(): Action + { + return $this->resumeActionFor('ai.execution'); + } + + public function viewHistoryAiExecutionAction(): Action + { + return $this->historyActionFor('ai.execution'); + } + private function pauseActionFor(string $controlKey): Action { $label = app(OperationalControlCatalog::class)->label($controlKey); @@ -213,7 +231,7 @@ private function pauseActionFor(string $controlKey): Action ->form($this->pauseFormSchema($controlKey)) ->action(function (array $data, AuditRecorder $auditRecorder, WorkspaceAuditLogger $workspaceAuditLogger) use ($controlKey, $label): void { $actor = $this->controlsActor(); - [$scopeType, $workspace, $reasonText, $expiresAt] = $this->normalizePauseInput($data); + [$scopeType, $workspace, $reasonText, $expiresAt] = $this->normalizePauseInput($controlKey, $data); $scopeQuery = $this->activationScopeQuery($controlKey, $scopeType, $workspace); @@ -273,7 +291,7 @@ private function resumeActionFor(string $controlKey): Action ->form($this->resumeFormSchema($controlKey)) ->action(function (array $data, AuditRecorder $auditRecorder, WorkspaceAuditLogger $workspaceAuditLogger) use ($controlKey, $label): void { $actor = $this->controlsActor(); - [$scopeType, $workspace] = $this->normalizeResumeInput($data); + [$scopeType, $workspace] = $this->normalizeResumeInput($controlKey, $data); $activation = $this->activationScopeQuery($controlKey, $scopeType, $workspace) ->notExpired() @@ -331,11 +349,8 @@ private function pauseFormSchema(string $controlKey): array return [ Radio::make('scope_type') ->label('Scope') - ->options([ - 'global' => 'Global', - 'workspace' => 'One workspace', - ]) - ->default('global') + ->options($this->scopeOptions($controlKey)) + ->default($this->defaultScopeFor($controlKey)) ->live() ->required(), @@ -395,11 +410,8 @@ private function resumeFormSchema(string $controlKey): array return [ Radio::make('scope_type') ->label('Scope') - ->options([ - 'global' => 'Global', - 'workspace' => 'One workspace', - ]) - ->default('global') + ->options($this->scopeOptions($controlKey)) + ->default($this->defaultScopeFor($controlKey)) ->live() ->required(), @@ -456,9 +468,9 @@ private function controlsActor(): PlatformUser /** * @return array{0: string, 1: ?Workspace, 2: string, 3: ?CarbonInterface} */ - private function normalizePauseInput(array $data): array + private function normalizePauseInput(string $controlKey, array $data): array { - [$scopeType, $workspace] = $this->resolveScopeInput($data); + [$scopeType, $workspace] = $this->resolveScopeInput($controlKey, $data); $reasonText = trim((string) ($data['reason_text'] ?? '')); if ($reasonText === '') { @@ -485,19 +497,20 @@ private function normalizePauseInput(array $data): array /** * @return array{0: string, 1: ?Workspace} */ - private function normalizeResumeInput(array $data): array + private function normalizeResumeInput(string $controlKey, array $data): array { - return $this->resolveScopeInput($data); + return $this->resolveScopeInput($controlKey, $data); } /** * @return array{0: string, 1: ?Workspace} */ - private function resolveScopeInput(array $data): array + private function resolveScopeInput(string $controlKey, array $data): array { $scopeType = (string) ($data['scope_type'] ?? 'global'); + $supportedScopes = app(OperationalControlCatalog::class)->definition($controlKey)['supported_scopes'] ?? ['global']; - if (! in_array($scopeType, ['global', 'workspace'], true)) { + if (! in_array($scopeType, $supportedScopes, true)) { throw ValidationException::withMessages([ 'scope_type' => 'Invalid scope selected.', ]); @@ -526,6 +539,26 @@ private function resolveScopeInput(array $data): array return [$scopeType, $workspace]; } + /** + * @return array + */ + private function scopeOptions(string $controlKey): array + { + $supportedScopes = app(OperationalControlCatalog::class)->definition($controlKey)['supported_scopes']; + + return Arr::only([ + 'global' => 'Global', + 'workspace' => 'One workspace', + ], $supportedScopes); + } + + private function defaultScopeFor(string $controlKey): string + { + $supportedScopes = app(OperationalControlCatalog::class)->definition($controlKey)['supported_scopes']; + + return $supportedScopes[0] ?? 'global'; + } + private function activationScopeQuery(string $controlKey, string $scopeType, ?Workspace $workspace): \Illuminate\Database\Eloquent\Builder { $query = OperationalControlActivation::query() diff --git a/apps/platform/app/Support/Ai/AiDataClassification.php b/apps/platform/app/Support/Ai/AiDataClassification.php new file mode 100644 index 00000000..f9ce3eef --- /dev/null +++ b/apps/platform/app/Support/Ai/AiDataClassification.php @@ -0,0 +1,27 @@ + 'Product knowledge', + self::OperationalMetadata => 'Operational metadata', + self::RedactedSupportSummary => 'Redacted support summary', + self::PersonalData => 'Personal data', + self::CustomerConfidential => 'Customer confidential', + self::RawProviderPayload => 'Raw provider payload', + }; + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiDecisionAuditMetadataFactory.php b/apps/platform/app/Support/Ai/AiDecisionAuditMetadataFactory.php new file mode 100644 index 00000000..ba5ed5c3 --- /dev/null +++ b/apps/platform/app/Support/Ai/AiDecisionAuditMetadataFactory.php @@ -0,0 +1,39 @@ + + */ + public function make(AiExecutionRequest $request, AiExecutionDecision $decision): array + { + return array_filter([ + 'use_case_key' => $decision->useCaseKey, + 'decision_outcome' => $decision->outcome, + 'decision_reason' => $decision->reasonCode->value, + 'workspace_ai_policy_mode' => $decision->workspaceAiPolicyMode, + 'requested_provider_class' => $decision->requestedProviderClass, + 'data_classifications' => $decision->dataClassifications, + 'source_family' => $decision->sourceFamily, + 'workspace_id' => $request->workspace?->getKey(), + 'tenant_id' => $request->tenant?->getKey(), + 'context_fingerprint' => $this->normalizedFingerprint($request->contextFingerprint), + 'matched_operational_control_scope' => $decision->matchedOperationalControlScope, + ], static fn (mixed $value): bool => $value !== null); + } + + private function normalizedFingerprint(?string $contextFingerprint): ?string + { + if (! is_string($contextFingerprint)) { + return null; + } + + $normalized = trim($contextFingerprint); + + return $normalized === '' ? null : $normalized; + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiDecisionReasonCode.php b/apps/platform/app/Support/Ai/AiDecisionReasonCode.php new file mode 100644 index 00000000..68318359 --- /dev/null +++ b/apps/platform/app/Support/Ai/AiDecisionReasonCode.php @@ -0,0 +1,18 @@ + $dataClassifications + * @param array $auditMetadata + */ + public function __construct( + public string $outcome, + public AiDecisionReasonCode $reasonCode, + public string $workspaceAiPolicyMode, + public ?string $matchedOperationalControlScope, + public string $useCaseKey, + public string $requestedProviderClass, + public array $dataClassifications, + public string $sourceFamily, + public AuditActionId $auditAction, + public array $auditMetadata, + ) {} + + public function isAllowed(): bool + { + return $this->outcome === 'allowed'; + } + + public function isBlocked(): bool + { + return $this->outcome === 'blocked'; + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiExecutionRequest.php b/apps/platform/app/Support/Ai/AiExecutionRequest.php new file mode 100644 index 00000000..747c6d74 --- /dev/null +++ b/apps/platform/app/Support/Ai/AiExecutionRequest.php @@ -0,0 +1,28 @@ + $dataClassifications + */ + public function __construct( + public ?Workspace $workspace, + public ?Tenant $tenant, + public User|PlatformUser|null $actor, + public string $useCaseKey, + public string $requestedProviderClass, + public array $dataClassifications, + public string $sourceFamily, + public ?string $callerSurface = null, + public ?string $contextFingerprint = null, + ) {} +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiPolicyMode.php b/apps/platform/app/Support/Ai/AiPolicyMode.php new file mode 100644 index 00000000..8bebe561 --- /dev/null +++ b/apps/platform/app/Support/Ai/AiPolicyMode.php @@ -0,0 +1,43 @@ + 'Disabled', + self::PrivateOnly => 'Private only', + }; + } + + public function summary(): string + { + return match ($this) { + self::Disabled => 'No AI execution is allowed for this workspace.', + self::PrivateOnly => 'Only approved internal drafts may use private-only AI for approved use cases.', + }; + } + + /** + * @return array + */ + public static function optionLabels(): array + { + return array_reduce( + self::cases(), + static function (array $labels, self $mode): array { + $labels[$mode->value] = $mode->label(); + + return $labels; + }, + [], + ); + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiProviderClass.php b/apps/platform/app/Support/Ai/AiProviderClass.php new file mode 100644 index 00000000..02ed4d9f --- /dev/null +++ b/apps/platform/app/Support/Ai/AiProviderClass.php @@ -0,0 +1,19 @@ + 'Local private', + self::ExternalPublic => 'External public', + }; + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/AiUseCaseCatalog.php b/apps/platform/app/Support/Ai/AiUseCaseCatalog.php new file mode 100644 index 00000000..79537eb4 --- /dev/null +++ b/apps/platform/app/Support/Ai/AiUseCaseCatalog.php @@ -0,0 +1,126 @@ +, + * allowed_data_classifications: list, + * source_family: string, + * tenant_context_permitted: bool + * }> + */ + private const USE_CASES = [ + 'product_knowledge.answer_draft' => [ + 'key' => 'product_knowledge.answer_draft', + 'label' => 'Product knowledge answer draft', + 'future_consumer' => 'ContextualHelpResolver', + 'visibility' => 'internal_only_draft', + 'allowed_provider_classes' => [AiProviderClass::LocalPrivate->value], + 'allowed_data_classifications' => [ + AiDataClassification::ProductKnowledge->value, + AiDataClassification::OperationalMetadata->value, + ], + 'source_family' => 'product_knowledge', + 'tenant_context_permitted' => false, + ], + 'support_diagnostics.summary_draft' => [ + 'key' => 'support_diagnostics.summary_draft', + 'label' => 'Support diagnostics summary draft', + 'future_consumer' => 'SupportDiagnosticBundleBuilder', + 'visibility' => 'internal_only_draft', + 'allowed_provider_classes' => [AiProviderClass::LocalPrivate->value], + 'allowed_data_classifications' => [AiDataClassification::RedactedSupportSummary->value], + 'source_family' => 'support_diagnostics', + 'tenant_context_permitted' => true, + ], + ]; + + /** + * @return list, + * allowed_data_classifications: list, + * source_family: string, + * tenant_context_permitted: bool + * }> + */ + public function all(): array + { + return array_values(self::USE_CASES); + } + + /** + * @return array{ + * key: string, + * label: string, + * future_consumer: string, + * visibility: string, + * allowed_provider_classes: list, + * allowed_data_classifications: list, + * source_family: string, + * tenant_context_permitted: bool + * }|null + */ + public function find(string $key): ?array + { + return self::USE_CASES[$key] ?? null; + } + + /** + * @return list + */ + public function labels(): array + { + return array_map( + static fn (array $definition): string => $definition['label'], + $this->all(), + ); + } + + /** + * @return list + */ + public function allowedProviderClassLabelsForMode(AiPolicyMode $mode): array + { + if ($mode === AiPolicyMode::Disabled) { + return []; + } + + $labels = []; + + foreach ($this->all() as $definition) { + foreach ($definition['allowed_provider_classes'] as $providerClass) { + $labels[$providerClass] = AiProviderClass::from($providerClass)->label(); + } + } + + return array_values($labels); + } + + /** + * @return list + */ + public function blockedDataClassificationLabels(): array + { + return array_map( + static fn (AiDataClassification $classification): string => $classification->label(), + [ + AiDataClassification::PersonalData, + AiDataClassification::CustomerConfidential, + AiDataClassification::RawProviderPayload, + ], + ); + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php b/apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php new file mode 100644 index 00000000..81485956 --- /dev/null +++ b/apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php @@ -0,0 +1,181 @@ +decisionFor($request); + $metadata = $this->auditMetadataFactory->make($request, $decision); + + $decision = new AiExecutionDecision( + outcome: $decision->outcome, + reasonCode: $decision->reasonCode, + workspaceAiPolicyMode: $decision->workspaceAiPolicyMode, + matchedOperationalControlScope: $decision->matchedOperationalControlScope, + useCaseKey: $decision->useCaseKey, + requestedProviderClass: $decision->requestedProviderClass, + dataClassifications: $decision->dataClassifications, + sourceFamily: $decision->sourceFamily, + auditAction: $decision->auditAction, + auditMetadata: $metadata, + ); + + if ($request->workspace !== null) { + $definition = $this->useCaseCatalog->find($request->useCaseKey); + + $this->workspaceAuditLogger->log( + workspace: $request->workspace, + action: $decision->auditAction, + context: ['metadata' => $decision->auditMetadata], + actor: $request->actor, + status: $decision->isAllowed() ? 'success' : 'blocked', + resourceType: 'ai_use_case', + resourceId: $request->useCaseKey, + targetLabel: $definition['label'] ?? $request->useCaseKey, + summary: 'AI execution decision evaluated', + tenant: $request->tenant, + ); + } + + return $decision; + } + + private function decisionFor(AiExecutionRequest $request): AiExecutionDecision + { + if ($request->workspace === null) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::MissingWorkspaceContext, + workspaceAiPolicyMode: AiPolicyMode::Disabled->value, + ); + } + + if ($request->tenant !== null && (int) $request->tenant->workspace_id !== (int) $request->workspace->getKey()) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::TenantOutsideWorkspace, + workspaceAiPolicyMode: AiPolicyMode::Disabled->value, + ); + } + + $controlDecision = $this->operationalControls->evaluate('ai.execution', $request->workspace); + + if ($controlDecision->isPaused()) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::OperationalControlPaused, + workspaceAiPolicyMode: $this->resolvedPolicyMode($request), + matchedOperationalControlScope: $controlDecision->matchedScopeType, + ); + } + + $policyMode = $this->resolvedPolicyMode($request); + + if ($policyMode === AiPolicyMode::Disabled->value) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::WorkspacePolicyDisabled, + workspaceAiPolicyMode: $policyMode, + ); + } + + $definition = $this->useCaseCatalog->find($request->useCaseKey); + + if ($definition === null) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::UnregisteredUseCase, + workspaceAiPolicyMode: $policyMode, + ); + } + + if ($definition['source_family'] !== $request->sourceFamily) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::SourceFamilyMismatch, + workspaceAiPolicyMode: $policyMode, + ); + } + + if (! in_array($request->requestedProviderClass, $definition['allowed_provider_classes'], true)) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::ProviderClassBlocked, + workspaceAiPolicyMode: $policyMode, + ); + } + + foreach ($request->dataClassifications as $classification) { + if (! in_array($classification, $definition['allowed_data_classifications'], true)) { + return $this->blockedDecision( + request: $request, + reasonCode: AiDecisionReasonCode::DataClassificationBlocked, + workspaceAiPolicyMode: $policyMode, + ); + } + } + + return new AiExecutionDecision( + outcome: 'allowed', + reasonCode: AiDecisionReasonCode::Allowed, + workspaceAiPolicyMode: $policyMode, + matchedOperationalControlScope: null, + useCaseKey: $request->useCaseKey, + requestedProviderClass: $request->requestedProviderClass, + dataClassifications: $request->dataClassifications, + sourceFamily: $request->sourceFamily, + auditAction: AuditActionId::AiExecutionDecisionEvaluated, + auditMetadata: [], + ); + } + + private function resolvedPolicyMode(AiExecutionRequest $request): string + { + if ($request->workspace === null) { + return AiPolicyMode::Disabled->value; + } + + $resolved = $this->settingsResolver->resolveValue($request->workspace, 'ai', 'policy_mode'); + + return is_string($resolved) && $resolved !== '' + ? $resolved + : AiPolicyMode::Disabled->value; + } + + private function blockedDecision( + AiExecutionRequest $request, + AiDecisionReasonCode $reasonCode, + string $workspaceAiPolicyMode, + ?string $matchedOperationalControlScope = null, + ): AiExecutionDecision { + return new AiExecutionDecision( + outcome: 'blocked', + reasonCode: $reasonCode, + workspaceAiPolicyMode: $workspaceAiPolicyMode, + matchedOperationalControlScope: $matchedOperationalControlScope, + useCaseKey: $request->useCaseKey, + requestedProviderClass: $request->requestedProviderClass, + dataClassifications: $request->dataClassifications, + sourceFamily: $request->sourceFamily, + auditAction: AuditActionId::AiExecutionDecisionEvaluated, + auditMetadata: [], + ); + } +} \ No newline at end of file diff --git a/apps/platform/app/Support/Audit/AuditActionId.php b/apps/platform/app/Support/Audit/AuditActionId.php index 9f8fde32..85d9ce06 100644 --- a/apps/platform/app/Support/Audit/AuditActionId.php +++ b/apps/platform/app/Support/Audit/AuditActionId.php @@ -101,6 +101,7 @@ enum AuditActionId: string case SupportDiagnosticsOpened = 'support_diagnostics.opened'; case SupportRequestCreated = 'support_request.created'; + case AiExecutionDecisionEvaluated = 'ai_execution.decision_evaluated'; case OperationalControlPaused = 'operational_control.paused'; case OperationalControlUpdated = 'operational_control.updated'; case OperationalControlResumed = 'operational_control.resumed'; @@ -243,6 +244,7 @@ private static function labels(): array self::TenantTriageReviewMarkedFollowUpNeeded->value => 'Triage review marked follow-up needed', self::SupportDiagnosticsOpened->value => 'Support diagnostics opened', self::SupportRequestCreated->value => 'Support request created', + self::AiExecutionDecisionEvaluated->value => 'AI execution decision evaluated', self::OperationalControlPaused->value => 'Operational control paused', self::OperationalControlUpdated->value => 'Operational control updated', self::OperationalControlResumed->value => 'Operational control resumed', @@ -330,6 +332,7 @@ private static function summaries(): array self::TenantReviewSuccessorCreated->value => 'Tenant review next cycle created', self::SupportDiagnosticsOpened->value => 'Support diagnostics opened', self::SupportRequestCreated->value => 'Support request created', + self::AiExecutionDecisionEvaluated->value => 'AI execution decision evaluated', self::OperationalControlPaused->value => 'Operational control paused', self::OperationalControlUpdated->value => 'Operational control updated', self::OperationalControlResumed->value => 'Operational control resumed', diff --git a/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php b/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php index 414ea34f..c50d6e94 100644 --- a/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php +++ b/apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php @@ -17,6 +17,13 @@ final class OperationalControlCatalog 'operation_types' => ['restore.execute'], 'affected_surfaces' => ['tenant.restore_runs.create'], ], + 'ai.execution' => [ + 'key' => 'ai.execution', + 'label' => 'AI execution', + 'supported_scopes' => ['global'], + 'operation_types' => ['ai.execution'], + 'affected_surfaces' => ['governed_ai.execution'], + ], ]; /** diff --git a/apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php b/apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php index 4f0e41a4..320ac80e 100644 --- a/apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php +++ b/apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php @@ -6,6 +6,7 @@ use App\Models\ProviderConnection; use App\Models\Tenant; +use App\Support\Ai\AiDataClassification; use App\Support\Governance\PlatformVocabularyGlossary; use App\Support\Links\RequiredPermissionsLinks; use App\Support\ReasonTranslation\ReasonPresenter; @@ -147,6 +148,43 @@ public function knowledgeSource(): array return $this->catalog->knowledgeSource(); } + /** + * @return array{ + * use_case_key: string, + * source_family: string, + * data_classifications: list, + * operational_metadata: array{version: int, topic_count: int}, + * topics: list, + * headline: string, + * short_explanation: string, + * troubleshooting_steps: list, + * safe_next_action: string, + * glossary_terms: list, + * docs_links: list + * }> + * } + */ + public function aiProductKnowledgeAnswerDraftSource(): array + { + $source = $this->knowledgeSource(); + + return [ + 'use_case_key' => 'product_knowledge.answer_draft', + 'source_family' => 'product_knowledge', + 'data_classifications' => [ + AiDataClassification::ProductKnowledge->value, + AiDataClassification::OperationalMetadata->value, + ], + 'operational_metadata' => [ + 'version' => (int) $source['version'], + 'topic_count' => (int) $source['topic_count'], + ], + 'topics' => $source['topics'], + ]; + } + /** * @param array|null $verificationReport */ diff --git a/apps/platform/app/Support/Settings/SettingsRegistry.php b/apps/platform/app/Support/Settings/SettingsRegistry.php index fffc8780..694a930e 100644 --- a/apps/platform/app/Support/Settings/SettingsRegistry.php +++ b/apps/platform/app/Support/Settings/SettingsRegistry.php @@ -4,6 +4,7 @@ namespace App\Support\Settings; +use App\Support\Ai\AiPolicyMode; use App\Models\Finding; use App\Services\Entitlements\WorkspacePlanProfileCatalog; @@ -18,6 +19,15 @@ public function __construct() { $this->definitions = []; + $this->register(new SettingDefinition( + domain: 'ai', + key: 'policy_mode', + type: 'string', + systemDefault: AiPolicyMode::Disabled->value, + rules: ['required', 'string', 'in:disabled,private_only'], + normalizer: static fn (mixed $value): string => strtolower(trim((string) $value)), + )); + $this->register(new SettingDefinition( domain: 'backup', key: 'retention_keep_last_default', diff --git a/apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php b/apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php index ade8c1b6..0ab64eac 100644 --- a/apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php +++ b/apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php @@ -19,6 +19,7 @@ use App\Models\TenantReview; use App\Models\User; use App\Models\Workspace; +use App\Support\Ai\AiDataClassification; use App\Support\Navigation\RelatedNavigationResolver; use App\Support\OperationRunLinks; use App\Support\OpsUx\GovernanceRunDiagnosticSummaryBuilder; @@ -133,6 +134,39 @@ public function forOperationRun(OperationRun $run, ?User $actor = null): array ); } + /** + * @return array{ + * use_case_key: string, + * source_family: string, + * data_classifications: list, + * summary: array{ + * headline: string, + * dominant_issue: string, + * freshness_state: string, + * completeness_note: ?string, + * redaction_note: string, + * generated_from: string + * }, + * redaction: array{mode: string, markers: list}, + * notes: list + * } + */ + public function aiSupportDiagnosticsSummaryDraftSource(Tenant $tenant, ?User $actor = null): array + { + $bundle = $this->forTenant($tenant, $actor); + + return [ + 'use_case_key' => 'support_diagnostics.summary_draft', + 'source_family' => 'support_diagnostics', + 'data_classifications' => [ + AiDataClassification::RedactedSupportSummary->value, + ], + 'summary' => $bundle['summary'], + 'redaction' => $bundle['redaction'], + 'notes' => $bundle['notes'], + ]; + } + /** * @param list> $sections * @return array diff --git a/apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php b/apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php new file mode 100644 index 00000000..79a3fddb --- /dev/null +++ b/apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php @@ -0,0 +1,49 @@ +map(fn (\SplFileInfo $file): string => str_replace($root.'/', '', $file->getPathname())) + ->filter(fn (string $relativePath): bool => str_starts_with($relativePath, 'Support/Ai/') + || $relativePath === 'Support/ProductKnowledge/ContextualHelpResolver.php' + || $relativePath === 'Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php') + ->values(); + + $patterns = [ + 'outbound_http' => '/\bHttp::/', + 'guzzle_client' => '/\bnew\s+Client\b/', + 'curl_runtime' => '/\bcurl_/i', + 'openai_vendor' => '/\bOpenAI\b/i', + 'anthropic_vendor' => '/\bAnthropic\b/i', + 'gemini_vendor' => '/\bGemini\b/i', + 'openrouter_vendor' => '/\bOpenRouter\b/i', + 'chat_completions_runtime' => '/\bChatCompletion\b/i', + ]; + + $hits = []; + + foreach ($files as $relativePath) { + $contents = file_get_contents($root.'/'.$relativePath); + + if (! is_string($contents) || $contents === '') { + continue; + } + + $lines = preg_split('/\R/', $contents) ?: []; + + foreach ($patterns as $label => $pattern) { + foreach ($lines as $index => $line) { + if (preg_match($pattern, $line) === 1) { + $hits[] = $relativePath.':'.($index + 1).' ['.$label.'] '.trim($line); + } + } + } + } + + expect($hits)->toBeEmpty("AI governance surfaces must stay vendor-neutral and must not perform outbound provider runtime calls directly:\n".implode("\n", $hits)); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php b/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php index 6173e288..20461d5b 100644 --- a/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php +++ b/apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php @@ -2,14 +2,17 @@ declare(strict_types=1); +use App\Filament\System\Pages\Ops\Controls; use App\Filament\Resources\RestoreRunResource; use App\Filament\Resources\RestoreRunResource\Pages\CreateRestoreRun; use App\Models\BackupItem; use App\Models\BackupSet; use App\Models\OperationalControlActivation; +use App\Models\PlatformUser; use App\Models\Policy; use App\Models\Tenant; use App\Models\User; +use App\Support\Auth\PlatformCapabilities; use Filament\Facades\Filament; use Illuminate\Foundation\Testing\RefreshDatabase; use Livewire\Livewire; @@ -130,4 +133,50 @@ function seedRestoreAuthorizationContext(): array ]) ->call('create') ->assertNotified('Restore execution paused'); +}); + +it('forbids ai execution controls for platform users missing system panel access', function (): void { + $user = PlatformUser::factory()->create([ + 'capabilities' => [ + PlatformCapabilities::OPS_CONTROLS_MANAGE, + ], + 'is_active' => true, + ]); + + $this->actingAs($user, 'platform') + ->get(Controls::getUrl(panel: 'system')) + ->assertForbidden(); +}); + +it('forbids ai execution controls for platform users missing ops controls manage', function (): void { + $user = PlatformUser::factory()->create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + ], + 'is_active' => true, + ]); + + $this->actingAs($user, 'platform') + ->get(Controls::getUrl(panel: 'system')) + ->assertForbidden(); +}); + +it('shows ai execution controls only to platform users with the existing system control capabilities', function (): void { + $user = PlatformUser::factory()->create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + PlatformCapabilities::OPS_CONTROLS_MANAGE, + ], + 'is_active' => true, + ]); + + $this->actingAs($user, 'platform') + ->get(Controls::getUrl(panel: 'system')) + ->assertSuccessful() + ->assertSee('AI execution'); + + Livewire::actingAs($user, 'platform') + ->test(Controls::class) + ->assertActionVisible('pause_ai_execution') + ->assertActionVisible('resume_ai_execution'); }); \ No newline at end of file diff --git a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php new file mode 100644 index 00000000..e92f1271 --- /dev/null +++ b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php @@ -0,0 +1,66 @@ +create(); + $user = User::factory()->create(); + + WorkspaceMembership::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'user_id' => (int) $user->getKey(), + 'role' => 'manager', + ]); + + session()->put(WorkspaceContext::SESSION_KEY, (int) $workspace->getKey()); + + return [$workspace, $user]; +} + +it('renders the workspace ai policy section and lets managers save and reset the ai posture', function (): void { + [$workspace, $user] = workspaceAiPolicyManager(); + + $this->actingAs($user) + ->get(WorkspaceSettings::getUrl(panel: 'admin')) + ->assertSuccessful() + ->assertSee('Workspace AI policy') + ->assertSee('Disabled') + ->assertSee('Private only') + ->assertSee('Approved use cases') + ->assertSee('Blocked data classifications'); + + expect(app(SettingsResolver::class)->resolveValue($workspace, 'ai', 'policy_mode')) + ->toBe('disabled'); + + $component = Livewire::actingAs($user) + ->test(WorkspaceSettings::class) + ->assertSet('data.ai_policy_mode', null) + ->set('data.ai_policy_mode', 'private_only') + ->callAction('save') + ->assertHasNoErrors() + ->assertSet('data.ai_policy_mode', 'private_only'); + + expect(app(SettingsResolver::class)->resolveValue($workspace, 'ai', 'policy_mode')) + ->toBe('private_only'); + + $component + ->mountFormComponentAction('ai_policy_mode', 'reset_ai_policy_mode', [], 'content') + ->callMountedFormComponentAction() + ->assertHasNoErrors() + ->assertSet('data.ai_policy_mode', null); + + expect(app(SettingsResolver::class)->resolveValue($workspace, 'ai', 'policy_mode')) + ->toBe('disabled'); +}); \ No newline at end of file diff --git a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php index ba5d05f5..27d22755 100644 --- a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php +++ b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php @@ -79,3 +79,76 @@ ->and(data_get($audit?->metadata, 'before_value'))->toBe(48) ->and(data_get($audit?->metadata, 'after_value'))->toBe(30); }); + +it('writes a workspace-scoped audit entry when ai policy mode is updated', function (): void { + $workspace = Workspace::factory()->create(); + $user = User::factory()->create(); + + WorkspaceMembership::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'user_id' => (int) $user->getKey(), + 'role' => 'manager', + ]); + + app(SettingsWriter::class)->updateWorkspaceSetting( + actor: $user, + workspace: $workspace, + domain: 'ai', + key: 'policy_mode', + value: 'private_only', + ); + + $audit = AuditLog::query()->latest('id')->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->workspace_id)->toBe((int) $workspace->getKey()) + ->and($audit?->tenant_id)->toBeNull() + ->and($audit?->action)->toBe(AuditActionId::WorkspaceSettingUpdated->value) + ->and(data_get($audit?->metadata, 'domain'))->toBe('ai') + ->and(data_get($audit?->metadata, 'key'))->toBe('policy_mode') + ->and(data_get($audit?->metadata, 'scope'))->toBe('workspace') + ->and(data_get($audit?->metadata, 'before_value'))->toBeNull() + ->and(data_get($audit?->metadata, 'after_value'))->toBe('private_only'); +}); + +it('writes a workspace-scoped audit entry when ai policy mode is reset', function (): void { + $workspace = Workspace::factory()->create(); + $user = User::factory()->create(); + + WorkspaceMembership::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'user_id' => (int) $user->getKey(), + 'role' => 'manager', + ]); + + $writer = app(SettingsWriter::class); + + $writer->updateWorkspaceSetting( + actor: $user, + workspace: $workspace, + domain: 'ai', + key: 'policy_mode', + value: 'private_only', + ); + + $writer->resetWorkspaceSetting( + actor: $user, + workspace: $workspace, + domain: 'ai', + key: 'policy_mode', + ); + + $audit = AuditLog::query() + ->where('action', AuditActionId::WorkspaceSettingReset->value) + ->latest('id') + ->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->workspace_id)->toBe((int) $workspace->getKey()) + ->and($audit?->tenant_id)->toBeNull() + ->and(data_get($audit?->metadata, 'domain'))->toBe('ai') + ->and(data_get($audit?->metadata, 'key'))->toBe('policy_mode') + ->and(data_get($audit?->metadata, 'scope'))->toBe('workspace') + ->and(data_get($audit?->metadata, 'before_value'))->toBe('private_only') + ->and(data_get($audit?->metadata, 'after_value'))->toBe('disabled'); +}); diff --git a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php index 5448cc6d..b1c5f25e 100644 --- a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php +++ b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php @@ -44,6 +44,7 @@ function workspaceManagerUser(): array $component = Livewire::actingAs($user) ->test(WorkspaceSettings::class) + ->assertSet('data.ai_policy_mode', null) ->assertSet('data.backup_retention_keep_last_default', null) ->assertSet('data.backup_retention_min_floor', null) ->assertSet('data.drift_severity_mapping', []) @@ -58,6 +59,7 @@ function workspaceManagerUser(): array ->assertSet('data.findings_sla_low', null) ->assertSet('data.operations_operation_run_retention_days', null) ->assertSet('data.operations_stuck_run_threshold_minutes', null) + ->set('data.ai_policy_mode', 'private_only') ->set('data.backup_retention_keep_last_default', 55) ->set('data.backup_retention_min_floor', 12) ->set('data.drift_severity_mapping', ['drift' => 'critical']) @@ -74,6 +76,7 @@ function workspaceManagerUser(): array ->set('data.operations_stuck_run_threshold_minutes', 60) ->callAction('save') ->assertHasNoErrors() + ->assertSet('data.ai_policy_mode', 'private_only') ->assertSet('data.backup_retention_keep_last_default', 55) ->assertSet('data.backup_retention_min_floor', 12) ->assertSet('data.baseline_severity_missing_policy', 'critical') @@ -97,6 +100,9 @@ function workspaceManagerUser(): array expect(app(SettingsResolver::class)->resolveValue($workspace, 'backup', 'retention_keep_last_default')) ->toBe(55); + expect(app(SettingsResolver::class)->resolveValue($workspace, 'ai', 'policy_mode')) + ->toBe('private_only'); + expect(app(SettingsResolver::class)->resolveValue($workspace, 'backup', 'retention_min_floor')) ->toBe(12); @@ -142,6 +148,18 @@ function workspaceManagerUser(): array ->where('key', 'retention_keep_last_default') ->exists())->toBeFalse(); + $component + ->mountFormComponentAction('ai_policy_mode', 'reset_ai_policy_mode', [], 'content') + ->callMountedFormComponentAction() + ->assertHasNoErrors() + ->assertSet('data.ai_policy_mode', null); + + expect(WorkspaceSetting::query() + ->where('workspace_id', (int) $workspace->getKey()) + ->where('domain', 'ai') + ->where('key', 'policy_mode') + ->exists())->toBeFalse(); + $component ->mountFormComponentAction('operations_operation_run_retention_days', 'reset_operations_operation_run_retention_days', [], 'content') ->callMountedFormComponentAction() diff --git a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php index f9034d44..3a7a6453 100644 --- a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php +++ b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php @@ -5,6 +5,7 @@ use App\Filament\Pages\Settings\WorkspaceSettings; use App\Models\User; use App\Models\Workspace; +use App\Models\WorkspaceSetting; use App\Support\Workspaces\WorkspaceContext; use Livewire\Livewire; @@ -12,6 +13,14 @@ $workspace = Workspace::factory()->create(); $user = User::factory()->create(); + WorkspaceSetting::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'domain' => 'ai', + 'key' => 'policy_mode', + 'value' => 'private_only', + 'updated_by_user_id' => null, + ]); + session()->put(WorkspaceContext::SESSION_KEY, (int) $workspace->getKey()); $this->actingAs($user) diff --git a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php index d3e67041..bf028bc6 100644 --- a/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php +++ b/apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php @@ -30,6 +30,14 @@ 'updated_by_user_id' => null, ]); + WorkspaceSetting::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'domain' => 'ai', + 'key' => 'policy_mode', + 'value' => 'private_only', + 'updated_by_user_id' => null, + ]); + session()->put(WorkspaceContext::SESSION_KEY, (int) $workspace->getKey()); $this->actingAs($user) @@ -38,6 +46,7 @@ Livewire::actingAs($user) ->test(WorkspaceSettings::class) + ->assertSet('data.ai_policy_mode', 'private_only') ->assertSet('data.backup_retention_keep_last_default', 27) ->assertSet('data.backup_retention_min_floor', null) ->assertSet('data.drift_severity_mapping', []) @@ -56,6 +65,8 @@ ->assertActionDisabled('save') ->assertFormComponentActionVisible('backup_retention_keep_last_default', 'reset_backup_retention_keep_last_default', [], 'content') ->assertFormComponentActionDisabled('backup_retention_keep_last_default', 'reset_backup_retention_keep_last_default', [], 'content') + ->assertFormComponentActionVisible('ai_policy_mode', 'reset_ai_policy_mode', [], 'content') + ->assertFormComponentActionDisabled('ai_policy_mode', 'reset_ai_policy_mode', [], 'content') ->assertFormComponentActionVisible('backup_retention_min_floor', 'reset_backup_retention_min_floor', [], 'content') ->assertFormComponentActionDisabled('backup_retention_min_floor', 'reset_backup_retention_min_floor', [], 'content') ->assertFormComponentActionVisible('drift_severity_mapping', 'reset_drift_severity_mapping', [], 'content') @@ -75,6 +86,11 @@ ->call('save') ->assertStatus(403); + Livewire::actingAs($user) + ->test(WorkspaceSettings::class) + ->call('resetSetting', 'ai_policy_mode') + ->assertStatus(403); + Livewire::actingAs($user) ->test(WorkspaceSettings::class) ->call('resetSetting', 'backup_retention_keep_last_default') @@ -88,5 +104,12 @@ ->where('key', 'retention_keep_last_default') ->first(); - expect($setting)->not->toBeNull(); + $aiSetting = WorkspaceSetting::query() + ->where('workspace_id', (int) $workspace->getKey()) + ->where('domain', 'ai') + ->where('key', 'policy_mode') + ->first(); + + expect($setting)->not->toBeNull() + ->and($aiSetting)->not->toBeNull(); }); diff --git a/apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php b/apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php new file mode 100644 index 00000000..735292c8 --- /dev/null +++ b/apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php @@ -0,0 +1,109 @@ +create([ + 'capabilities' => [ + PlatformCapabilities::ACCESS_SYSTEM_PANEL, + PlatformCapabilities::OPS_CONTROLS_MANAGE, + ], + 'is_active' => true, + ]); +} + +it('pauses and resumes ai execution through the global-only controls card', function (): void { + $workspaceA = Workspace::factory()->create(['name' => 'Acme']); + $workspaceB = Workspace::factory()->create(['name' => 'Bravo']); + + Tenant::factory()->count(2)->create(['workspace_id' => (int) $workspaceA->getKey()]); + Tenant::factory()->count(1)->create(['workspace_id' => (int) $workspaceB->getKey()]); + + $user = makeAiControlsManager(); + $this->actingAs($user, 'platform'); + + $this->get(Controls::getUrl(panel: 'system')) + ->assertSuccessful() + ->assertSee("mountAction('pause_ai_execution')", escape: false); + + $component = Livewire::test(Controls::class) + ->assertActionExists('pause_ai_execution', fn (Action $action): bool => $action->isConfirmationRequired()) + ->assertActionExists('resume_ai_execution', fn (Action $action): bool => $action->isConfirmationRequired()) + ->assertActionExists('view_history_ai_execution', fn (Action $action): bool => $action->getLabel() === 'View AI execution history'); + + $summary = $component->instance()->controlSummary('ai.execution'); + $preview = $component->instance()->scopeImpactPreview('ai.execution', 'global', null); + + expect($summary['label'])->toBe('AI execution') + ->and($summary['supported_scopes'])->toBe(['global']) + ->and($summary['effective_state'])->toBe('enabled') + ->and($preview['summary'])->toContain('AI execution') + ->and($preview['workspace_count'])->toBe(2) + ->and($preview['tenant_count'])->toBe(3); + + $component + ->callAction('pause_ai_execution', data: [ + 'scope_type' => 'global', + 'reason_text' => 'Paused for AI rollout review.', + 'expires_at' => now()->addDay()->toDateTimeString(), + ]) + ->assertNotified('AI execution paused'); + + $activation = OperationalControlActivation::query() + ->forControl('ai.execution') + ->forGlobalScope() + ->first(); + + expect($activation)->not->toBeNull() + ->and($activation?->reason_text)->toBe('Paused for AI rollout review.'); + + $pausedSummary = $component->instance()->controlSummary('ai.execution'); + + expect($pausedSummary['effective_state'])->toBe('paused') + ->and($pausedSummary['state_label'])->toBe('Paused globally'); + + $component + ->callAction('resume_ai_execution', data: [ + 'scope_type' => 'global', + ]) + ->assertNotified('AI execution resumed'); + + expect(OperationalControlActivation::query() + ->forControl('ai.execution') + ->forGlobalScope() + ->count())->toBe(0); + + $audits = AuditLog::query() + ->whereIn('action', [ + AuditActionId::OperationalControlPaused->value, + AuditActionId::OperationalControlResumed->value, + ]) + ->where('metadata->control_key', 'ai.execution') + ->orderBy('id') + ->get(); + + expect($audits)->toHaveCount(2) + ->and($audits[0]->workspace_id)->toBeNull() + ->and($audits[1]->workspace_id)->toBeNull(); +}); diff --git a/apps/platform/tests/Unit/Support/Ai/AiApprovedSourceInputsTest.php b/apps/platform/tests/Unit/Support/Ai/AiApprovedSourceInputsTest.php new file mode 100644 index 00000000..e6874180 --- /dev/null +++ b/apps/platform/tests/Unit/Support/Ai/AiApprovedSourceInputsTest.php @@ -0,0 +1,54 @@ +aiProductKnowledgeAnswerDraftSource(); + + expect($source)->toMatchArray([ + 'use_case_key' => 'product_knowledge.answer_draft', + 'source_family' => 'product_knowledge', + 'data_classifications' => [ + AiDataClassification::ProductKnowledge->value, + AiDataClassification::OperationalMetadata->value, + ], + ]) + ->and($source['topics'])->not->toBeEmpty() + ->and($source['operational_metadata'])->toHaveKeys(['version', 'topic_count']) + ->and($source)->not->toHaveKeys(['tenant', 'tenant_id', 'workspace', 'workspace_id']); +}); + +it('exposes only the approved redacted support summary input for ai diagnostic drafts', function (): void { + $workspace = Workspace::factory()->create(); + $tenant = Tenant::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + ]); + + $source = app(SupportDiagnosticBundleBuilder::class)->aiSupportDiagnosticsSummaryDraftSource($tenant); + + expect($source)->toMatchArray([ + 'use_case_key' => 'support_diagnostics.summary_draft', + 'source_family' => 'support_diagnostics', + 'data_classifications' => [ + AiDataClassification::RedactedSupportSummary->value, + ], + ]) + ->and($source['summary'])->toHaveKeys([ + 'headline', + 'dominant_issue', + 'freshness_state', + 'redaction_note', + 'generated_from', + ]) + ->and(data_get($source, 'redaction.mode'))->toBe('default_redacted') + ->and($source)->not->toHaveKeys(['sections', 'context', 'tenant', 'workspace', 'operation_run']); +}); diff --git a/apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php b/apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php new file mode 100644 index 00000000..8a4401a8 --- /dev/null +++ b/apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php @@ -0,0 +1,67 @@ +create(); + $tenant = Tenant::factory()->create(['workspace_id' => (int) $workspace->getKey()]); + + $request = new AiExecutionRequest( + workspace: $workspace, + tenant: $tenant, + actor: null, + useCaseKey: 'support_diagnostics.summary_draft', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [AiDataClassification::RedactedSupportSummary->value], + sourceFamily: 'support_diagnostics', + callerSurface: 'support_diagnostics', + contextFingerprint: 'support_diagnostics:summary:v1', + ); + + $decision = new AiExecutionDecision( + outcome: 'blocked', + reasonCode: AiDecisionReasonCode::DataClassificationBlocked, + workspaceAiPolicyMode: 'private_only', + matchedOperationalControlScope: null, + useCaseKey: 'support_diagnostics.summary_draft', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [AiDataClassification::RedactedSupportSummary->value], + sourceFamily: 'support_diagnostics', + auditAction: AuditActionId::AiExecutionDecisionEvaluated, + auditMetadata: [], + ); + + $metadata = app(AiDecisionAuditMetadataFactory::class)->make($request, $decision); + + expect($metadata)->toMatchArray([ + 'use_case_key' => 'support_diagnostics.summary_draft', + 'decision_outcome' => 'blocked', + 'decision_reason' => AiDecisionReasonCode::DataClassificationBlocked->value, + 'workspace_ai_policy_mode' => 'private_only', + 'requested_provider_class' => 'local_private', + 'data_classifications' => ['redacted_support_summary'], + 'source_family' => 'support_diagnostics', + 'workspace_id' => (int) $workspace->getKey(), + 'tenant_id' => (int) $tenant->getKey(), + 'context_fingerprint' => 'support_diagnostics:summary:v1', + ]) + ->and($metadata)->not->toHaveKeys([ + 'prompt_text', + 'source_payload', + 'provider_payload', + 'output_text', + ]); +}); diff --git a/apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php b/apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php new file mode 100644 index 00000000..b1635182 --- /dev/null +++ b/apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php @@ -0,0 +1,48 @@ +all(); + + expect($definitions)->toHaveCount(2) + ->and($definitions[0])->toMatchArray([ + 'key' => 'product_knowledge.answer_draft', + 'label' => 'Product knowledge answer draft', + 'future_consumer' => 'ContextualHelpResolver', + 'source_family' => 'product_knowledge', + 'tenant_context_permitted' => false, + ]) + ->and($definitions[0]['allowed_provider_classes'])->toBe(['local_private']) + ->and($definitions[0]['allowed_data_classifications'])->toBe([ + 'product_knowledge', + 'operational_metadata', + ]) + ->and($definitions[1])->toMatchArray([ + 'key' => 'support_diagnostics.summary_draft', + 'label' => 'Support diagnostics summary draft', + 'future_consumer' => 'SupportDiagnosticBundleBuilder', + 'source_family' => 'support_diagnostics', + 'tenant_context_permitted' => true, + ]) + ->and($definitions[1]['allowed_provider_classes'])->toBe(['local_private']) + ->and($definitions[1]['allowed_data_classifications'])->toBe([ + 'redacted_support_summary', + ]); +}); + +it('derives provider and blocked-data summaries from the catalog for the workspace policy surface', function (): void { + $catalog = app(AiUseCaseCatalog::class); + + expect($catalog->allowedProviderClassLabelsForMode(AiPolicyMode::Disabled))->toBe([]) + ->and($catalog->allowedProviderClassLabelsForMode(AiPolicyMode::PrivateOnly))->toBe(['Local private']) + ->and($catalog->blockedDataClassificationLabels())->toBe([ + AiDataClassification::PersonalData->label(), + AiDataClassification::CustomerConfidential->label(), + AiDataClassification::RawProviderPayload->label(), + ]); +}); diff --git a/apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php b/apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php new file mode 100644 index 00000000..e8340c6b --- /dev/null +++ b/apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php @@ -0,0 +1,172 @@ +create(); + $user = User::factory()->create(); + + WorkspaceMembership::factory()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'user_id' => (int) $user->getKey(), + 'role' => 'manager', + ]); + + WorkspaceSetting::query()->create([ + 'workspace_id' => (int) $workspace->getKey(), + 'domain' => 'ai', + 'key' => 'policy_mode', + 'value' => $policyMode, + 'updated_by_user_id' => (int) $user->getKey(), + ]); + + return [$workspace, $user]; +} + +it('allows approved local-private support-diagnostics requests and writes bounded audit metadata', function (): void { + [$workspace, $user] = aiPolicyWorkspace(); + $tenant = Tenant::factory()->create(['workspace_id' => (int) $workspace->getKey()]); + + $decision = assertNoOutboundHttp(fn () => app(GovernedAiExecutionBoundary::class)->evaluate(new AiExecutionRequest( + workspace: $workspace, + tenant: $tenant, + actor: $user, + useCaseKey: 'support_diagnostics.summary_draft', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [AiDataClassification::RedactedSupportSummary->value], + sourceFamily: 'support_diagnostics', + callerSurface: 'support_diagnostics', + contextFingerprint: 'support_diagnostics:summary:v1', + ))); + + expect($decision->isAllowed())->toBeTrue() + ->and($decision->reasonCode)->toBe(AiDecisionReasonCode::Allowed) + ->and($decision->workspaceAiPolicyMode)->toBe('private_only') + ->and($decision->matchedOperationalControlScope)->toBeNull(); + + $audit = AuditLog::query()->latest('id')->first(); + + expect($audit)->not->toBeNull() + ->and($audit?->action)->toBe(AuditActionId::AiExecutionDecisionEvaluated->value) + ->and($audit?->workspace_id)->toBe((int) $workspace->getKey()) + ->and($audit?->tenant_id)->toBe((int) $tenant->getKey()) + ->and(data_get($audit?->metadata, 'decision_outcome'))->toBe('allowed') + ->and(data_get($audit?->metadata, 'decision_reason'))->toBe(AiDecisionReasonCode::Allowed->value) + ->and(data_get($audit?->metadata, 'use_case_key'))->toBe('support_diagnostics.summary_draft') + ->and(data_get($audit?->metadata, 'requested_provider_class'))->toBe('local_private') + ->and(data_get($audit?->metadata, 'data_classifications'))->toBe(['redacted_support_summary']) + ->and(data_get($audit?->metadata, 'context_fingerprint'))->toBe('support_diagnostics:summary:v1') + ->and(data_get($audit?->metadata, 'prompt_text'))->toBeNull() + ->and(data_get($audit?->metadata, 'output_text'))->toBeNull(); +}); + +it('blocks external-public provider classes before any provider resolution', function (): void { + [$workspace, $user] = aiPolicyWorkspace(); + + $decision = assertNoOutboundHttp(fn () => app(GovernedAiExecutionBoundary::class)->evaluate(new AiExecutionRequest( + workspace: $workspace, + tenant: null, + actor: $user, + useCaseKey: 'product_knowledge.answer_draft', + requestedProviderClass: AiProviderClass::ExternalPublic->value, + dataClassifications: [ + AiDataClassification::ProductKnowledge->value, + AiDataClassification::OperationalMetadata->value, + ], + sourceFamily: 'product_knowledge', + callerSurface: 'product_knowledge', + contextFingerprint: 'product_knowledge:answer:v1', + ))); + + expect($decision->isBlocked())->toBeTrue() + ->and($decision->reasonCode)->toBe(AiDecisionReasonCode::ProviderClassBlocked) + ->and($decision->matchedOperationalControlScope)->toBeNull(); +}); + +it('blocks disallowed data classifications before any provider resolution', function (): void { + [$workspace, $user] = aiPolicyWorkspace(); + $tenant = Tenant::factory()->create(['workspace_id' => (int) $workspace->getKey()]); + + $decision = assertNoOutboundHttp(fn () => app(GovernedAiExecutionBoundary::class)->evaluate(new AiExecutionRequest( + workspace: $workspace, + tenant: $tenant, + actor: $user, + useCaseKey: 'support_diagnostics.summary_draft', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [AiDataClassification::RawProviderPayload->value], + sourceFamily: 'support_diagnostics', + callerSurface: 'support_diagnostics', + contextFingerprint: 'support_diagnostics:raw:v1', + ))); + + expect($decision->isBlocked())->toBeTrue() + ->and($decision->reasonCode)->toBe(AiDecisionReasonCode::DataClassificationBlocked); +}); + +it('blocks unregistered use cases', function (): void { + [$workspace, $user] = aiPolicyWorkspace(); + + $decision = assertNoOutboundHttp(fn () => app(GovernedAiExecutionBoundary::class)->evaluate(new AiExecutionRequest( + workspace: $workspace, + tenant: null, + actor: $user, + useCaseKey: 'customer_email.reply', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [AiDataClassification::ProductKnowledge->value], + sourceFamily: 'product_knowledge', + callerSurface: 'product_knowledge', + contextFingerprint: 'customer_email:reply:v1', + ))); + + expect($decision->isBlocked())->toBeTrue() + ->and($decision->reasonCode)->toBe(AiDecisionReasonCode::UnregisteredUseCase); +}); + +it('lets the ai execution operational control override an otherwise valid request', function (): void { + [$workspace, $user] = aiPolicyWorkspace(); + + OperationalControlActivation::factory()->forGlobalScope()->create([ + 'control_key' => 'ai.execution', + 'reason_text' => 'Paused for AI rollout review.', + ]); + + $decision = assertNoOutboundHttp(fn () => app(GovernedAiExecutionBoundary::class)->evaluate(new AiExecutionRequest( + workspace: $workspace, + tenant: null, + actor: $user, + useCaseKey: 'product_knowledge.answer_draft', + requestedProviderClass: AiProviderClass::LocalPrivate->value, + dataClassifications: [ + AiDataClassification::ProductKnowledge->value, + AiDataClassification::OperationalMetadata->value, + ], + sourceFamily: 'product_knowledge', + callerSurface: 'product_knowledge', + contextFingerprint: 'product_knowledge:answer:v1', + ))); + + expect($decision->isBlocked())->toBeTrue() + ->and($decision->reasonCode)->toBe(AiDecisionReasonCode::OperationalControlPaused) + ->and($decision->matchedOperationalControlScope)->toBe('global'); +}); diff --git a/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php index 2074291c..3cf3fa81 100644 --- a/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php +++ b/apps/platform/tests/Unit/Support/OperationalControls/OperationalControlCatalogTest.php @@ -7,12 +7,18 @@ it('exposes only active runtime controls in the bounded control catalog', function (): void { $catalog = app(OperationalControlCatalog::class); - expect($catalog->keys())->toBe(['restore.execute']) + expect($catalog->keys())->toBe(['restore.execute', 'ai.execution']) ->and($catalog->definition('restore.execute'))->toMatchArray([ 'key' => 'restore.execute', 'label' => 'Restore execution', 'supported_scopes' => ['global', 'workspace'], 'operation_types' => ['restore.execute'], + ]) + ->and($catalog->definition('ai.execution'))->toMatchArray([ + 'key' => 'ai.execution', + 'label' => 'AI execution', + 'supported_scopes' => ['global'], + 'operation_types' => ['ai.execution'], ]); }); diff --git a/docs/product/implementation-ledger.md b/docs/product/implementation-ledger.md new file mode 100644 index 00000000..5be9ca39 --- /dev/null +++ b/docs/product/implementation-ledger.md @@ -0,0 +1,273 @@ +# TenantPilot Implementation Ledger + +## Purpose + +Dieses Dokument beschreibt den aktuellen repo-basierten Implementierungsstand von TenantPilot. Es ergaenzt `roadmap.md` und `spec-candidates.md`, ersetzt sie aber nicht. + +Bewertungsregeln fuer dieses Ledger: + +- Repo-basiert only: Aussagen zaehlen nur, wenn Code, Datenmodell, Workflow, UI-Adoption oder Test-Artefakte im Repo belastbar darauf hinweisen. +- Keine Roadmap- oder Spec-Absicht ohne Repo-Evidence. +- `sellable` wird nur dort verwendet, wo UI, Workflow, Datenmodell, RBAC/Audit und passende Test-Artefakte plausibel zusammenpassen. +- Backend-only bleibt `foundation-only`. +- UI-only gilt nicht als fertig. +- Wenn Tests unten als vorhanden markiert sind, bedeutet das: passende Test-Dateien existieren im Repo. Sie wurden fuer dieses Ledger nicht ausgefuehrt. + +## Current Product Position + +TenantPilot ist aktuell ein starkes internes Governance- und Operations-Produkt mit belastbaren Foundations fuer Execution Truth, Baselines/Drift, Findings, Evidence, Reviews, Review Packs, Supportability, Telemetry und Safety Controls. Die Repo-Wahrheit liegt damit ueber einer simplen Lesart von "R1 done / R2 partial". Gleichzeitig ist das Produkt noch nicht voll als kundenseitig konsumierbare Review- und Portfolio-Plattform ausgereift: Customer-safe Review Consumption, Cross-Tenant-Workflows und kommerzielle Lifecycle-Reife sind noch unvollstaendig. + +## Status Model + +- `planned`: nur in Roadmap oder Kandidatenliste, ohne belastbare Repo-Evidence +- `specified`: als Spec oder Draft angelegt, aber nicht repo-verifiziert umgesetzt +- `implemented_partial`: Teilumsetzung vorhanden, aber noch nicht als fertig bewertbar +- `implemented_backend`: belastbare Backend- oder Modelllogik vorhanden, aber keine ausreichende UI-Adoption +- `implemented_ui`: sichtbare UI vorhanden, aber Workflow- oder Backend-Proof ist noch zu schwach +- `implemented_verified`: Code, Modell, Workflow und Test-Artefakte sind plausibel vorhanden +- `adopted`: implementiert und bereits in zentrale Produktoberflaechen oder Kernablaeufe uebernommen +- `deferred`: bewusst verschoben +- `obsolete`: durch neuere Repo-Realitaet oder andere Implementierung ueberholt + +Evidence-Level im Dokument: + +- `none`: keine belastbare Repo-Evidence +- `weak`: duenne Code- oder Doc-Spur, aber kein belastbarer Gesamtworkflow +- `medium`: mehrere Repo-Signale, aber noch nicht durchgaengig +- `strong`: Datenmodell, Workflow, UI- oder Test-Spur greifen konsistent ineinander + +## Roadmap Coverage Summary + +| Roadmap Area | Status | Evidence Level | UI Ready | Tested | Sellable | Notes | +|---|---|---:|---|---|---|---| +| R1 Golden Master Governance | adopted | strong | yes | repo tests, not run | yes | Baselines, Drift, Findings und OperationRun-Truth sind breit im Produkt verankert. | +| R2 Tenant Reviews, Evidence & Control Foundation | adopted | strong | yes | repo tests, not run | almost | Review-, Evidence- und Control-Foundations sind stark; Customer Review Workspace fehlt noch. | +| Alert escalation + notification routing | implemented_verified | strong | partial | repo tests, not run | yes | Alert-Regeln, Dispatch, Cooldown und Quiet Hours sind real. | +| Governance & Architecture Hardening | implemented_partial | strong | partial | repo tests, not run | foundation-only | Viele Hardening-Slices sind bereits im Code, die Lane bleibt aber aktiv. | +| UI & Product Maturity Polish | implemented_partial | medium | partial | partial repo tests, not run | no | Einzelne Polishing-Slices sind da, aber kein geschlossenes "fertig"-Signal auf Theme-Ebene. | +| Secret & Security Hardening | implemented_verified | strong | yes | repo tests, not run | almost | Provider-Verifikation, Permission-Diagnostics und Redaction sind belastbar. | +| Baseline Drift Engine (Cutover) | adopted | strong | yes | repo tests, not run | yes | Compare- und Drift-Workflow wirken als produktive Kernfunktion. | +| R1.9 Platform Localization v1 | planned | none | no | no | no | Keine belastbare Locale-Foundation im Repo gefunden. | +| Product Scalability & Self-Service Foundation | implemented_partial | strong | yes | repo tests, not run | almost | Onboarding, Support, Help und Entitlements sind weit; Billing, Trial und Demo-Reife fehlen. | +| R2.0 Canonical Control Catalog Foundation | implemented_verified | strong | partial | repo tests, not run | foundation-only | Bereits implementiert und in Evidence/Reviews referenziert, aber kein eigenstaendiger Kundennutzen-Surface. | +| R2 Completion: customer review, support, help | implemented_partial | strong | yes | repo tests, not run | almost | Support und Help sind real; kundensichere Review-Consumption ist noch offen. | +| Findings Workflow v2 / Execution Layer | implemented_partial | strong | yes | repo tests, not run | almost | Triage, Ownership, Alerts und Hygiene sind vorhanden; der naechste Operator-Layer fehlt. | +| Policy Lifecycle / Ghost Policies | specified | weak | no | no | no | Als Richtung sichtbar, aber nicht als repo-verifizierter Workflow. | +| Platform Operations Maturity | implemented_partial | strong | yes | repo tests, not run | almost | System Panel, Control Tower und Ops Controls sind real; CSV/Raw Drilldowns bleiben offen. | +| Product Usage, Customer Health & Operational Controls | adopted | strong | yes | repo tests, not run | almost | Diese Mid-term-Lane ist im Repo bereits substanziell vorhanden. | +| Private AI Execution & Usage Governance Foundation | planned | none | no | no | no | Keine belastbare AI-Governance-Foundation im Repo. | +| MSP Portfolio & Operations | implemented_partial | medium | partial | repo tests, not run | foundation-only | Portfolio-Triage ist da; Compare/Promotion und Decision Workboard fehlen. | +| Human-in-the-Loop Autonomous Governance | planned | none | no | no | no | Kein repo-verifizierter Decision-Pack- oder Approval-Workflow. | +| Drift & Change Governance | specified | weak | no | no | no | Einzelne Foundations existieren, die thematische Produkt-Lane aber nicht. | +| Standardization & Policy Quality | planned | none | no | no | no | Keine starke Repo-Evidence fuer eine Intune-Linting- oder Policy-Quality-Oberflaeche. | +| PSA / Ticketing Handoff | planned | none | no | no | no | Support Requests existieren, externe Handoff-Integration aber nicht. | + +## Implemented Capabilities + +| Capability | Status | Backend | UI | Tests | RBAC/Audit | Sellable | Evidence | +|---|---|---|---|---|---|---|---| +| OperationRun truth layer | implemented_verified | yes | partial | repo tests, not run | yes | foundation-only | `app/Models/OperationRun.php`; `tests/Feature/System/*`; `tests/Feature/ReviewPack/*` | +| Baseline profiles, snapshots and compare | implemented_verified | yes | yes | repo tests, not run | yes | yes | `app/Models/BaselineProfile.php`; `app/Models/BaselineSnapshot.php`; `app/Services/Baselines/BaselineCompareService.php` | +| Drift findings and governance pressure | adopted | yes | yes | repo tests, not run | yes | yes | `app/Models/Finding.php`; `app/Filament/Widgets/Dashboard/RecentDriftFindings.php`; `tests/Feature/Findings/*` | +| Restore workflow with safety gates | implemented_verified | yes | yes | repo tests, not run | yes | yes | `app/Models/OperationRun.php`; restore gates and tests in `tests/Feature/Restore/*` | +| Evidence snapshots | implemented_verified | yes | yes | repo tests, not run | yes | foundation-only | `app/Models/EvidenceSnapshot.php`; `app/Services/Evidence/EvidenceSnapshotService.php`; `tests/Feature/Evidence/*` | +| Tenant reviews | implemented_verified | yes | yes | repo tests, not run | yes | almost | `app/Models/TenantReview.php`; `app/Services/TenantReviews/TenantReviewService.php`; `tests/Feature/TenantReview/*` | +| Review pack generation and export | implemented_verified | yes | yes | repo tests, not run | yes | yes | `app/Models/ReviewPack.php`; `app/Services/ReviewPackService.php`; `tests/Feature/ReviewPack/*` | +| Alerts and notification routing | implemented_verified | yes | partial | repo tests, not run | yes | yes | `app/Services/Alerts/AlertDispatchService.php`; `tests/Feature/*Alert*` | +| Provider health, onboarding readiness and required permissions | adopted | yes | yes | repo tests, not run | yes | almost | `app/Jobs/ProviderConnectionHealthCheckJob.php`; `app/Services/Onboarding/OnboardingLifecycleService.php`; `app/Filament/Pages/TenantRequiredPermissions.php` | +| Permission posture reporting | implemented_verified | yes | yes | repo tests, not run | yes | yes | `app/Services/PermissionPosture/PermissionPostureFindingGenerator.php`; `tests/Feature/PermissionPosture/*` | +| Entra admin roles reporting | implemented_verified | yes | yes | repo tests, not run | yes | yes | `app/Services/EntraAdminRoles/EntraAdminRolesReportService.php`; `tests/Feature/EntraAdminRoles/*` | +| Stored reports substrate | implemented_verified | yes | partial | repo tests, not run | partial | foundation-only | `app/Models/StoredReport.php`; `tests/Feature/PermissionPosture/StoredReportModelTest.php`; `tests/Feature/EntraAdminRoles/StoredReportFingerprintTest.php` | +| Support diagnostics | adopted | yes | yes | repo tests, not run | yes | almost | `app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php`; `app/Filament/Pages/TenantDashboard.php`; `tests/Feature/SupportDiagnostics/*` | +| In-app support requests | implemented_verified | yes | yes | repo tests, not run | yes | almost | `app/Models/SupportRequest.php`; `app/Support/SupportRequests/*`; `tests/Feature/SupportRequests/*` | +| Product knowledge and contextual help | implemented_partial | yes | yes | repo tests, not run | partial | almost | `app/Support/ProductKnowledge/ContextualHelpCatalog.php`; `tests/Feature/Onboarding/ProductKnowledgeOnboardingHelpTest.php` | +| Product telemetry | implemented_verified | yes | yes | repo tests, not run | yes | almost | `app/Models/ProductUsageEvent.php`; `app/Filament/System/Widgets/ProductTelemetryKpis.php`; `tests/Feature/System/ProductTelemetry/*` | +| Customer health scoring | implemented_verified | yes | yes | repo tests, not run | partial | almost | `app/Filament/System/Widgets/CustomerHealthKpis.php`; `app/Filament/System/Widgets/CustomerHealthTopWorkspaces.php`; `tests/Feature/System/CustomerHealth/*` | +| Operational controls | implemented_verified | yes | yes | repo tests, not run | yes | almost | `app/Models/OperationalControlActivation.php`; `app/Support/OperationalControls/*`; `tests/Feature/System/OpsControls/*` | +| Workspace entitlements | implemented_verified | yes | yes | repo tests, not run | yes | foundation-only | `app/Services/Entitlements/WorkspaceEntitlementResolver.php`; `tests/Feature/Filament/Settings/WorkspaceEntitlementsSettingsPageTest.php` | +| Capability-first RBAC | adopted | yes | yes | repo tests, not run | yes | foundation-only | `app/Services/Auth/CapabilityResolver.php`; `app/Services/Auth/RoleCapabilityMap.php`; many `tests/Feature/Rbac/*` | +| Audit log foundation | adopted | yes | yes | repo tests, not run | yes | foundation-only | `app/Models/AuditLog.php`; `app/Services/Audit/WorkspaceAuditLogger.php`; many audit-focused feature tests | +| Canonical control catalog | implemented_verified | yes | partial | repo tests, not run | partial | foundation-only | `app/Support/Governance/Controls/CanonicalControlCatalog.php`; `config/canonical_controls.php`; `tests/Unit/Governance/*` | +| Portfolio triage continuity | implemented_verified | yes | yes | repo tests, not run | yes | foundation-only | `app/Services/PortfolioTriage/TenantTriageReviewService.php`; `app/Support/PortfolioTriage/*`; `tests/Feature/Filament/TenantRegistryTriageReviewStateTest.php` | + +## Foundation-Only Capabilities + +- OperationRun truth and canonical operation typing: starke Execution-Foundation, aber kein eigenstaendiger Kundennutzen-Surface. +- Audit log foundation: breit genutzt und wichtig fuer Governance, aber allein nicht verkaufbar. +- Capability-first RBAC: belastbar und testnah, bleibt aber Enablement-Layer. +- Workspace entitlements: reale Gate- und Override-Logik, aber noch keine volle Commercial Lifecycle Story. +- Canonical control catalog: starke semantische Foundation fuer Evidence, Findings und Reviews. +- Stored reports substrate: wichtig fuer Reports, Evidence und Diagnostics, aber kein eigenstaendiges Produktversprechen. +- Evidence snapshot substrate: tragende technische Basis fuer Reviews und Exports. +- Operational control registry and evaluator: starke Safety-Control-Foundation, primar operatorseitig. +- Customer health scoring: reale interne SaaS-Operations-Layer, aber noch keine eigenstaendige Kundenoberflaeche. +- Portfolio triage continuity: sinnvoller Multi-Tenant-Unterbau, aber noch kein vollstaendiges Portfolio-Produkt. + +## Partial Capabilities + +- Customer-facing review consumption: Tenant Reviews, Evidence Snapshots und Review Packs sind stark, aber ein repo-verifizierter Customer Review Workspace fehlt. +- Findings Workflow v2: Triage, Assignment, Hygiene und Notifications sind vorhanden, aber kein konsolidierter Decision-/Inbox-Layer. +- Product scalability and self-service: Onboarding, Support, Help und Entitlements sind weit, Billing-, Trial- und Demo-Reife aber nicht. +- MSP portfolio operations: Portfolio-Triage ist vorhanden, Cross-Tenant Compare und Promotion fehlen. +- Platform operations maturity: Control Tower und Ops Controls sind stark, aber einige geplante operatorseitige Drilldowns/Exports fehlen noch. +- Product knowledge rollout: Help-Katalog und Resolver sind real, aber noch nicht breit genug adoptiert fuer "fertig". + +## Planned But Not Implemented + +- Platform Localization v1 +- Private AI Execution & Usage Governance Foundation +- Human-in-the-Loop Autonomous Governance +- Standardization & Policy Quality / Intune Linting +- PSA / Ticketing Handoff +- Customer Review Workspace v1 +- Cross-Tenant Compare and Promotion v1 +- Later compliance overlays beyond the current control/evidence foundation + +## Release Readiness + +| Release / Theme | Readiness | Notes | +|---|---|---| +| R1 Golden Master Governance | implemented | Die zentrale Governance- und Execution-Layer ist repo-verifiziert und breit adoptiert. | +| R2 Tenant Reviews & Evidence Packs | partially implemented | Reviews, Evidence Snapshots und Review Packs sind stark; kundensichere Consumption fehlt noch. | +| R3 MSP Portfolio OS | foundation only | Portfolio-Triage ist da, aber Compare/Promotion und Decision Workflows fehlen. | +| Later Compliance Light | foundation only | Canonical Controls, Evidence und Exceptions existieren als Grundlage; ein Compliance-Produkt ist nicht repo-proven. | + +## Commercial Readiness + +### Demo-ready + +- Baseline compare and drift walkthroughs +- Review pack generation and export +- Provider health, onboarding readiness and required permissions +- Support diagnostics +- Permission posture and Entra admin roles reporting + +### Almost sellable + +- Review-driven governance workflow around tenant reviews and review packs +- Baseline drift and restore governance +- Alerting and run visibility for governance operations +- Support requests with contextual diagnostics +- Provider readiness and permission posture reporting + +### Foundation-only + +- OperationRun truth layer +- Audit foundation +- Capability-first RBAC +- Workspace entitlements +- Canonical control catalog +- Stored reports substrate +- Evidence snapshot substrate +- Product telemetry +- Customer health scoring +- Operational controls +- Portfolio triage continuity + +### Not sellable yet + +- Customer Review Workspace v1 +- Cross-Tenant Compare and Promotion v1 +- Localization v1 +- Private AI Execution Governance Foundation +- External Support Desk / PSA Handoff +- Compliance Light product layer + +## Open Gaps & Blockers + +| Gap | Type | Impact | Roadmap Area | Recommended Spec | +|---|---|---|---|---| +| Customer-safe review workspace is missing | Release blocker | Existing review and evidence assets cannot yet be consumed as a clear customer-facing surface | R2 completion / Tenant Reviews | P0 Customer Review Workspace v1 | +| No consolidated operator decision inbox | UX blocker | Operators still move between findings, runs, alerts and portfolio surfaces to act | Findings Workflow / MSP Portfolio | P0 Decision-Based Governance Inbox v1 | +| Cross-tenant compare and promotion is not repo-proven | Release blocker | MSP portfolio story remains partial | MSP Portfolio & Operations | P1 Cross-Tenant Compare and Promotion v1 | +| Localization foundation is absent | UX blocker | Product polish and DACH-readiness remain limited | R1.9 Platform Localization v1 | P1 Localization v1 | +| Entitlements stop short of full commercial lifecycle | Commercialization blocker | Plan gating exists, but trial, grace and suspension semantics remain incomplete | Product Scalability & Self-Service Foundation | P2 Commercial Entitlements and Billing-State Maturity | +| Support requests do not hand off to an external desk | Commercialization blocker | Support operations still depend on manual follow-through outside the product | R2 completion / Support | P2 External Support Desk / PSA Handoff | +| AI governance foundation is absent | Architecture blocker | Future AI features would risk trust and policy drift if added directly | Private AI Execution & Usage Governance | P3 Private AI Execution Governance Foundation | +| Roadmap understates current repo truth | Architecture blocker | Prioritization can drift because strategy docs lag implementation | Product planning / roadmap maintenance | none - docs alignment | +| Test files were not executed for this ledger update | Testing blocker | This document relies on code plus test presence, not live runtime validation | all areas | none - run targeted suites | + +## Recommended Next Specs + +- `P0 Customer Review Workspace v1`: turns existing reviews, evidence and review-pack outputs into a customer-safe read-only product surface. +- `P0 Decision-Based Governance Inbox v1`: consolidates existing findings, runs, alerts and triage signals into one operator work surface. +- `P1 Cross-Tenant Compare and Promotion v1`: needed to move from portfolio visibility to portfolio action. +- `P1 Localization v1`: still absent in repo and becomes more expensive the later it lands. +- `P2 Commercial Entitlements and Billing-State Maturity`: extends the already real entitlement substrate into a usable commercial lifecycle. +- `P2 External Support Desk / PSA Handoff`: extends support requests beyond internal persistence. +- `P3 Private AI Execution Governance Foundation`: should exist before feature-level AI adoption, not after it. + +## Roadmap Drift Notes + +- `roadmap.md` understates the current R2 control foundation. Canonical controls, stored reports, permission posture and Entra admin roles are already repo-real, not just near-term ideas. +- `roadmap.md` understates product supportability. Support diagnostics, in-app support requests and contextual help already exist in the repo. +- `roadmap.md` understates operational maturity. Product telemetry, customer health and operational controls are already implemented and wired into the system panel. +- `roadmap.md` understates commercial foundations. A workspace entitlement resolver, plan profiles and enforcement points already exist, even though full billing-state maturity does not. +- The roadmap is stronger at describing missing customer-facing consumption than missing backend foundations. Customer Review Workspace v1, Cross-Tenant Compare and Promotion, Localization and AI Governance still look genuinely unimplemented. +- The main drift pattern is underestimation, not overestimation. The only place where optimism should still be resisted is customer-facing review maturity: internal review and evidence foundations are strong, but the repo does not yet prove a finished customer review workspace. + +## Evidence Sources + +Wichtigste Strategie- und Scope-Quellen: + +- `docs/product/roadmap.md` +- `docs/product/spec-candidates.md` + +Wichtige Plattform- und UI-Anker: + +- `apps/platform/bootstrap/providers.php` +- `apps/platform/app/Providers/Filament/AdminPanelProvider.php` +- `apps/platform/app/Providers/Filament/SystemPanelProvider.php` +- `apps/platform/app/Filament/Pages/TenantDashboard.php` +- `apps/platform/app/Filament/System/Pages/Dashboard.php` +- `apps/platform/app/Filament/Pages/TenantRequiredPermissions.php` + +Wichtige Models: + +- `apps/platform/app/Models/OperationRun.php` +- `apps/platform/app/Models/Finding.php` +- `apps/platform/app/Models/FindingException.php` +- `apps/platform/app/Models/BaselineProfile.php` +- `apps/platform/app/Models/BaselineSnapshot.php` +- `apps/platform/app/Models/EvidenceSnapshot.php` +- `apps/platform/app/Models/TenantReview.php` +- `apps/platform/app/Models/ReviewPack.php` +- `apps/platform/app/Models/StoredReport.php` +- `apps/platform/app/Models/SupportRequest.php` +- `apps/platform/app/Models/ProductUsageEvent.php` +- `apps/platform/app/Models/OperationalControlActivation.php` +- `apps/platform/app/Models/AuditLog.php` + +Wichtige Services und Jobs: + +- `apps/platform/app/Services/ReviewPackService.php` +- `apps/platform/app/Services/TenantReviews/TenantReviewService.php` +- `apps/platform/app/Services/Evidence/EvidenceSnapshotService.php` +- `apps/platform/app/Services/Baselines/BaselineCompareService.php` +- `apps/platform/app/Services/Alerts/AlertDispatchService.php` +- `apps/platform/app/Jobs/ProviderConnectionHealthCheckJob.php` +- `apps/platform/app/Services/Onboarding/OnboardingLifecycleService.php` +- `apps/platform/app/Services/Entitlements/WorkspaceEntitlementResolver.php` +- `apps/platform/app/Services/PortfolioTriage/TenantTriageReviewService.php` +- `apps/platform/app/Support/Governance/Controls/CanonicalControlCatalog.php` +- `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php` +- `apps/platform/app/Services/Auth/CapabilityResolver.php` + +Wichtige Test-Anker im Repo: + +- `apps/platform/tests/Feature/ReviewPack/*` +- `apps/platform/tests/Feature/Evidence/*` +- `apps/platform/tests/Feature/PermissionPosture/*` +- `apps/platform/tests/Feature/EntraAdminRoles/*` +- `apps/platform/tests/Feature/SupportDiagnostics/*` +- `apps/platform/tests/Feature/SupportRequests/*` +- `apps/platform/tests/Feature/System/CustomerHealth/*` +- `apps/platform/tests/Feature/System/ProductTelemetry/*` +- `apps/platform/tests/Feature/System/OpsControls/*` +- `apps/platform/tests/Feature/Filament/TenantRegistryTriageReviewStateTest.php` +- `apps/platform/tests/Unit/Governance/*` +- `apps/platform/tests/Unit/Entitlements/*` + +## Last Updated + +2026-04-27 on branch `248-private-ai-policy-foundation` diff --git a/docs/product/principles.md b/docs/product/principles.md index e7127f33..89baaba1 100644 --- a/docs/product/principles.md +++ b/docs/product/principles.md @@ -104,6 +104,52 @@ ### Data minimization & safe logging --- +## Governance & Decision Model + +### Decision-first surfaces (non-negotiable) +Every operator-facing surface must default to: +- Decision +- Reason +- Impact +- One primary next action + +Diagnostics and evidence must be progressively disclosed. + +### Surface layering (mandatory) +All operator surfaces must follow a strict layering model: +1. Decision layer (default-visible) +2. Diagnostic layer (expandable) +3. Evidence layer (deep, raw, or audit-level) + +No surface may start at diagnostic or raw data level. + +### Multiple truth layers (explicit separation) +The platform separates: +- **Execution truth** (OperationRun) +- **Artifact truth** (Reports, Evidence) +- **Backup truth** (Snapshots) +- **Governance truth** (Findings, Exceptions) + +These layers must never be conflated or implicitly derived from each other. + +### Governance-first model +The system models governance explicitly as: +- **Expected state** (Baselines) +- **Observed state** (Inventory / Evidence) +- **Deviations** (Findings) +- **Decisions** (Exceptions / Risk acceptance) + +All governance workflows must align with this model. + +### Baselines as reference truth +Baselines define the expected state. +All comparisons, drift detection, and governance decisions must reference an explicit baseline. +Implicit or “last state vs current state” comparisons are forbidden. + +### No false calmness (strict) +Missing, stale, or partial data must be explicitly visible. +The system must never present a "healthy" or "complete" state without sufficient evidence. + ## UI & Information Architecture ### UI/UX constitution governs operator surfaces diff --git a/docs/product/spec-candidates.md b/docs/product/spec-candidates.md index ad988bd9..4a2e0b33 100644 --- a/docs/product/spec-candidates.md +++ b/docs/product/spec-candidates.md @@ -1,3773 +1,268 @@ # Spec Candidates -> Concrete future specs waiting for prioritization. -> Each entry has enough structure to become a real spec when the time comes. -> -> **Flow**: Inbox → Qualified → Planned → Spec created → moved to `Promoted to Spec` +> Repo-based next-spec queue for TenantPilot. +> This file is not a wishlist. It tracks only open gaps that are still worth turning into new or refreshed specs. -> **Last reviewed**: 2026-04-27 (added Audited Support Sessions / Assisted Tenant Access and Audience-Aware Decision Surface Adoption Closure; retained Product Scalability & Self-Service Foundation, Codebase Quality & Engineering Maturity cluster, Microsoft-first provider-extensible Decision-Based Operating candidates, and Private AI Execution & Usage Governance Foundation candidates) +> **Last reviewed**: 2026-04-27 +> **Basis**: `implementation-ledger.md`, `roadmap.md`, current `specs/` truth --- -## Inbox +## Candidate Rules -> Ungefiltert. Kurze Notiz reicht. Wöchentlich sichten. +- Work repo-based, not roadmap-aspirational. +- Do not keep implemented features as active candidates. +- Do not keep already-specced foundations as active candidates unless a narrower follow-up gap remains. +- P0 is reserved for blockers to the next sellable release. +- P1 is for enterprise and product maturity gaps. +- P2 is for commercial and scale readiness. +- P3 is for later platform ambitions after current release blockers close. +- Existing candidate history is preserved through `Promoted to Spec`, `Deferred`, and `Superseded / Removed` notes rather than silent deletion. -- Dashboard trend visualizations (sparklines, compliance gauge, drift-over-time chart) -- Dashboard "Needs Attention" should be visually louder (alert color, icon, severity weighting) -- Dashboard enterprise polish: severity-weighted drift table, actionable alert buttons, progressive disclosure (demoted from Qualified — needs bounded scope before re-qualifying) -- Operations table should show duration + affected policy count -- Density control / comfortable view toggle for admin tables -- Inventory landing page may be redundant — consider pure navigation section -- Settings change history → explainable change tracking -- Workspace chooser v2: search, sort, favorites, pins, environment badges, last activity -- Workspace-level PII override for review packs (deferred from Spec 109 — controls whether PII is included/redacted in tenant review pack exports at workspace scope) -- CSV export for filtered run metadata (deferred from Spec 114 — allow operators to export filtered operation run lists from the system console as CSV) -- Raw error/context drilldowns for system console (deferred from Spec 114 — in-product drilldown into raw error payloads and execution context for failed/stuck runs in the system console) -- Lead Capture & CRM Pipeline (company-ops track; not a product spec unless TenantPilot later needs in-product customer lifecycle surfaces) -- Billing & Contract Readiness (company-ops track; product spec only for plan/entitlement/billing-status foundation) -- AVV / DPA / TOM / Legal Pack (company-ops track; source artifacts should align with product data model but are not a product feature by default) -- Support Desk + AI Triage (company-ops track; product spec only where TenantPilot creates support context bundles or in-app support requests) -- Monitoring & Incident Runbooks (company-ops track; product spec only where platform telemetry or customer-facing status integrations are required) -- Release & Customer Communication Automation (company-ops track; product spec only where release metadata/changelog becomes in-product) -- Vendor Questionnaire Answer Bank (company-ops track; generally not a product spec unless answers become customer-facing trust-center content or product-backed compliance evidence) -- Support Severity Matrix & Runbooks (company-ops track; product spec only where severity, SLA, escalation, or incident state becomes modeled in TenantPilot) -- Business Continuity / Founder Backup Plan (company-ops track; not a product spec unless product-side operational controls or customer-facing continuity surfaces are required) +## Active Candidate Queue ---- +### P0 — Release Blockers + +### Customer Review Workspace v1 +- **Priority**: P0 +- **Why this stays active**: The repo already has strong internal review foundations: tenant reviews, evidence snapshots, review packs, redaction paths, entitlements, audit, and RBAC-aware surfaces. What is still missing is the customer-safe read-only consumption layer that turns those internal assets into a clearly sellable review product. +- **Roadmap relationship**: R2 completion / customer-facing review consumption. +- **Dependencies**: + - `TenantReview` + - `EvidenceSnapshot` + - `ReviewPack` + - existing redaction behavior + - workspace entitlements + - tenant/workspace RBAC and audit foundations +- **Scope**: + - customer-safe read-only workspace or view for latest review state + - latest findings and accepted risks in customer-safe form + - review-pack download surface with existing redaction rules + - explicit absence of admin or remediation actions + - clear authorization boundaries for customer and read-only viewers +- **Non-scope**: + - admin settings + - remediation actions + - raw operator diagnostics + - a broader customer portal rewrite + - billing or contract workflows +- **Acceptance criteria**: + - an authorized customer or read-only actor can open the review workspace + - latest review status, accepted risks, and key findings are visible without exposing admin controls + - review-pack downloads respect existing redaction and entitlement rules + - tenant and workspace isolation are enforced and tested + - audit-sensitive or operator-only data is not exposed through this surface +- **Notes**: This is the clearest repo-derived blocker between current internal review strength and a cleaner sellable release. + +### P1 — Enterprise Maturity + +### Decision-Based Governance Inbox v1 +- **Priority**: P1 +- **Why this stays active**: Findings, alerts, operation runs, review-pack generation, and portfolio triage already exist, but operators still work across several surfaces. The next maturity step is a single decision-oriented work surface, not more raw detail pages. +- **Roadmap relationship**: Findings workflow maturity; later MSP Portfolio OS prerequisite. +- **Dependencies**: + - findings workflow semantics and inbox foundations from Specs 219, 221, 222, 224, 225, 230, 231 + - alert routing foundation + - `OperationRun` truth + - portfolio triage continuity + - contextual help and reason-code surfaces where helpful +- **Scope**: + - one operator-facing inbox for high-signal governance work + - grouping or prioritization across findings, alerts, stale runs, and related attention signals + - direct action links into compare, finding review, review-pack generation, or triage paths + - auditable state changes such as snooze, assign, or acknowledge where already supported +- **Non-scope**: + - autonomous remediation + - AI-generated recommendations + - customer-facing inboxes + - full cross-tenant workboard redesign +- **Acceptance criteria**: + - one surface shows prioritized governance work from more than one underlying signal family + - actions route to existing product truth rather than duplicating state + - visibility is capability-aware and workspace-safe + - auditable state changes are recorded where the inbox mutates work state + - tests prove signal grouping and authorization boundaries +- **Notes**: Important, but not a P0 release blocker while Customer Review Workspace is still missing. + +### Cross-Tenant Compare and Promotion v1 +- **Priority**: P1 +- **Why this stays active**: Portfolio triage exists, but portfolio action does not. The repo already contains an older draft spec for this direction, yet the capability is not repo-proven as a finished product workflow. +- **Roadmap relationship**: MSP Portfolio & Operations. +- **Existing spec**: Spec 043 exists and should be refreshed against current repo truth rather than replaced by a new broad direction. +- **Dependencies**: + - inventory foundations + - baseline compare truth + - restore and execution guardrails + - audit log foundation + - tenant and workspace isolation plus RBAC +- **Scope**: + - choose source and target tenants within allowed scope + - show a structured compare preview + - support a dry-run or promotion preflight before any write path + - preserve auditability and scope boundaries +- **Non-scope**: + - blind one-click promotion + - autonomous rollout + - multi-cloud or multi-provider compare + - full MSP control-plane redesign +- **Acceptance criteria**: + - operator can produce a compare preview between two allowed tenants + - promotion path includes explicit preflight or dry-run semantics + - authorization and tenant isolation are enforced and tested + - audit trail exists for compare and promotion entry points + - the slice refreshes or narrows Spec 043 instead of reopening it as a vague ambition + +### Localization v1 +- **Priority**: P1 +- **Why this stays active**: The repo and roadmap both indicate this is still absent. It is not a backend foundation gap; it is a product maturity gap that will get more expensive as the governance surface grows. +- **Roadmap relationship**: R1.9 Platform Localization v1. +- **Dependencies**: + - existing status and terminology catalogs + - contextual help boundaries + - notification and UI copy inventory on critical surfaces + - locale resolution rules for workspace, user, and system context +- **Scope**: + - `de` and `en` on core governance surfaces + - locale resolution order and fallback behavior + - locale-aware formatting for dates, times, and numbers + - stable machine and export formats that remain non-localized +- **Non-scope**: + - public website localization + - broad documentation translation + - retrospective translation of every legacy free-text record + - marketing copy systems +- **Acceptance criteria**: + - core navigation, dashboard, findings, baseline compare, alerts, and operations surfaces support `de` and `en` + - no raw translation keys appear on critical UI paths + - fallback to English is controlled and predictable + - locale-aware formatting does not affect audit or export truth + - targeted regression coverage exists for fallback and key critical flows + +### P2 — Commercial / Scale + +### Commercial Entitlements and Billing-State Maturity +- **Priority**: P2 +- **Why this stays active**: The repo already has a real entitlement foundation and an existing spec for plans and billing readiness. The remaining gap is narrower: commercial lifecycle maturity, not inventing entitlements from scratch. +- **Roadmap relationship**: Product Scalability & Self-Service Foundation. +- **Existing spec context**: Spec 247 exists for `Plans, Entitlements & Billing Readiness`. This candidate is the follow-up gap after the current entitlement substrate, not a duplicate foundation spec. +- **Dependencies**: + - existing `WorkspaceEntitlementResolver` + - workspace settings surfaces + - review-pack entitlement gates + - audit foundation + - customer-facing read-only and suspension semantics where applicable +- **Scope**: + - commercial lifecycle states such as trial, grace, suspended/read-only, and active paid usage + - clearer enforcement at key product gates + - explicit disabled and read-only messaging distinct from authorization failures + - audited state changes and overrides +- **Non-scope**: + - payment provider integration + - invoicing + - tax or accounting workflows + - public pricing pages +- **Acceptance criteria**: + - central commercial state can be resolved for a workspace + - at least two real behaviors are gated by lifecycle state, not scattered conditionals + - read-only or suspended behavior preserves safe access to needed history or evidence while blocking disallowed actions + - changes and overrides are audited + - tests cover blocked and allowed paths + +### External Support Desk / PSA Handoff +- **Priority**: P2 +- **Why this stays active**: In-app support requests are already repo-real. The remaining gap is external handoff and visible ticket linkage, not support-request creation itself. +- **Roadmap relationship**: R2 support follow-through; later commercial scale. +- **Dependencies**: + - support request context flow from Spec 246 + - support diagnostic pack + - audit logging + - tenant and workspace authorization boundaries +- **Scope**: + - outbound adapter seam for one support desk or PSA target + - store and display external ticket reference + - auditable create or link actions + - visible product linkage back from support requests to external references +- **Non-scope**: + - full bidirectional sync + - SLA engine + - generic helpdesk product + - AI support automation +- **Acceptance criteria**: + - a support request can create or link an external ticket through one bounded adapter + - resulting ticket reference is stored and visible in the right context + - failures are explicit and auditable + - tenant and workspace scope are enforced and tested + - the slice extends the existing support-request model instead of replacing it + +### P3 — Later Platform Ambitions + +- No active P3 candidate from the current focus set. +- `Private AI Execution & Policy Foundation` is already promoted as Spec 248 and should no longer remain in the open candidate queue. +- Broader AI-assisted customer operations can return later as a follow-up only after Spec 248 and the current customer-facing release gaps are materially closed. + +## Deferred / Existing Drafts Outside the Current Queue + +These items are still useful, but they are not the next best open specs from the current repo state. + +- `Policy Lifecycle / Ghost Policies`: still a valid gap, but not ahead of Customer Review Workspace or Cross-Tenant Compare. +- `Workspace-level PII override for review packs`: bounded deferred follow-up from Spec 109. +- `CSV export for filtered run metadata`: valid system-console follow-up, but not near the top of the queue. +- `Raw error/context drilldowns for system console`: useful operator enhancement, but not ahead of current P0-P2 gaps. +- UI polish snippets such as dashboard sparklines, density toggles, louder attention cards, or chooser refinements: keep out of the active spec queue until they become bounded release work. ## Promoted to Spec -> Historical ledger for candidates that are no longer open. Keep them here so prioritization stays clean without losing decision history. +Historical ledger for candidates that are no longer open. Keep them here so prioritization stays clean without losing decision history. -- Queued Execution Reauthorization and Scope Continuity → Spec 149 (`queued-execution-reauthorization`) -- Livewire Context Locking and Trusted-State Reduction → Spec 152 (`livewire-context-locking`) -- Evidence Domain Foundation → Spec 153 (`evidence-domain-foundation`) -- Exception / Risk-Acceptance Workflow for Findings → Spec 154 (`finding-risk-acceptance`) -- Operator Outcome Taxonomy and Cross-Domain State Separation → Spec 156 (`operator-outcome-taxonomy`) -- Operator Reason Code Translation and Humanization Contract → Spec 157 (`reason-code-translation`) -- Governance Artifact Truthful Outcomes & Fidelity Semantics → Spec 158 (`artifact-truth-semantics`) -- Operator Explanation Layer for Degraded / Partial / Suppressed Results → Spec 161 (`operator-explanation-layer`) -- Request-Scoped Derived State and Resolver Memoization → Spec 167 (`derived-state-memoization`) -- Tenant Governance Aggregate Contract → Spec 168 (`tenant-governance-aggregate-contract`) -- Governance Operator Outcome Compression → Spec 214 (`governance-outcome-compression`) -- Finding Ownership Semantics Clarification → Spec 219 (`finding-ownership-semantics`) -- Humanized Diagnostic Summaries for Governance Operations → Spec 220 (`governance-run-summaries`) -- Findings Operator Inbox v1 → Spec 221 (`findings-operator-inbox`) +- Canonical Operation Type Source of Truth -> Spec 239 (`canonical-operation-type-source-of-truth`) +- Self-Service Tenant Onboarding & Connection Readiness -> Spec 240 (`tenant-onboarding-readiness`) +- Support Diagnostic Pack -> Spec 241 (`support-diagnostic-pack`) +- Operational Controls & Feature Flags -> Spec 242 (`operational-controls`) +- Product Usage & Adoption Telemetry -> Spec 243 (`product-usage-adoption-telemetry`) +- Product Knowledge & Contextual Help -> Spec 244 (`product-knowledge-contextual-help`) +- Customer Health Score -> Spec 245 (`customer-health-score`) +- In-App Support Request with Context -> Spec 246 (`support-request-context`) +- Plans, Entitlements & Billing Readiness -> Spec 247 (`plans-entitlements-billing-readiness`) +- Private AI Execution & Policy Foundation -> Spec 248 (`private-ai-policy-foundation`) +- Queued Execution Reauthorization and Scope Continuity -> Spec 149 (`queued-execution-reauthorization`) +- Livewire Context Locking and Trusted-State Reduction -> Spec 152 (`livewire-context-locking`) +- Evidence Domain Foundation -> Spec 153 (`evidence-domain-foundation`) +- Exception / Risk-Acceptance Workflow for Findings -> Spec 154 (`finding-risk-acceptance`) +- Operator Outcome Taxonomy and Cross-Domain State Separation -> Spec 156 (`operator-outcome-taxonomy`) +- Operator Reason Code Translation and Humanization Contract -> Spec 157 (`reason-code-translation`) +- Governance Artifact Truthful Outcomes & Fidelity Semantics -> Spec 158 (`artifact-truth-semantics`) +- Operator Explanation Layer for Degraded / Partial / Suppressed Results -> Spec 161 (`operator-explanation-layer`) +- Request-Scoped Derived State and Resolver Memoization -> Spec 167 (`derived-state-memoization`) +- Tenant Governance Aggregate Contract -> Spec 168 (`tenant-governance-aggregate-contract`) +- Record Page Header Discipline & Contextual Navigation -> Spec 192 (`record-header-discipline`) +- Monitoring Surface Action Hierarchy & Workbench Semantics -> Spec 193 (`monitoring-action-hierarchy`) +- Governance Friction & Operator Vocabulary Hardening -> Spec 194 (`governance-friction-hardening`) +- Governance Operator Outcome Compression -> Spec 214 (`governance-outcome-compression`) +- Provider-Backed Action Preflight and Dispatch Gate Unification -> Spec 216 (`provider-dispatch-gate`) +- Finding Ownership Semantics Clarification -> Spec 219 (`finding-ownership-semantics`) +- Humanized Diagnostic Summaries for Governance Operations -> Spec 220 (`governance-run-summaries`) +- Findings Operator Inbox v1 -> Spec 221 (`findings-operator-inbox`) - Findings Intake & Team Queue v1 -> Spec 222 (`findings-intake-team-queue`) -- Findings Notifications & Escalation v1 → Spec 224 (`findings-notifications-escalation`) -- Assignment Hygiene & Stale Work Detection → Spec 225 (`assignment-hygiene`) -- Findings Notification Presentation Convergence → Spec 230 (`findings-notification-convergence`) -- Finding Outcome Taxonomy & Verification Semantics → Spec 231 (`finding-outcome-taxonomy`) -- Operation Run Link Contract Enforcement → Spec 232 (`operation-run-link-contract`) -- Operation Run Active-State Visibility & Stale Escalation → Spec 233 (`stale-run-visibility`) -- Provider Boundary Hardening → Spec 237 (`provider-boundary-hardening`) -- Support Diagnostic Pack → Spec 241 (`support-diagnostic-pack`) -- Provider-Backed Action Preflight and Dispatch Gate Unification → Spec 216 (`provider-dispatch-gate`) -- Record Page Header Discipline & Contextual Navigation → Spec 192 (`record-header-discipline`) -- Monitoring Surface Action Hierarchy & Workbench Semantics → Spec 193 (`monitoring-action-hierarchy`) -- Governance Friction & Operator Vocabulary Hardening → Spec 194 (`governance-friction-hardening`) - ---- - -## Qualified - -> Problem + Nutzen klar. Scope noch offen. Braucht noch Priorisierung. - - -> **Current strategic priority — Governance Platform Foundation** -> -> The next promoted specs should stabilize TenantPilot as a Governance-of-Record platform before expanding into additional Microsoft domains, compliance overlays, or multi-cloud execution. -> -> Recommended next sequence: -> -> 1. **Self-Service Tenant Onboarding & Connection Readiness** (already promoted as Spec 240 on its feature branch) -> 2. **Product Usage & Adoption Telemetry** -> 3. **Operational Controls & Feature Flags** -> 4. **Private AI Execution & Policy Foundation** -> 5. **AI Usage Budgeting, Context & Result Governance** -> 6. **Decision-Based Governance Inbox v1** -> 7. **Decision Pack Contract & Approval Workflow** -> 8. **Provider Identity & Target Scope Neutrality** -> 9. **Canonical Operation Type Source of Truth** -> 10. **Platform Vocabulary Boundary Enforcement for Governed Subject Keys** -> 11. **Customer Review Workspace v1** -> -> Rationale: the repo already has strong baseline, findings, evidence, review, operation-run, and operator foundations. With Canonical Control Catalog Foundation and Provider Boundary Hardening now specced, the immediate remaining product risk is not only semantic drift in provider identity, operation-type dual semantics, and governed-subject key leakage; it is also founder-dependent onboarding/support, lack of product-side observability/control, ungoverned AI introduction risk, and customer-facing search-and-troubleshoot workflows. With self-service onboarding already promoted as Spec 240 and Support Diagnostic Pack now promoted as Spec 241, adoption telemetry, operational controls, private AI execution governance, and a decision-based governance inbox become the next open priorities so TenantPilot becomes repeatably operable, measurable, AI-ready, and safe to run with low headcount while customers receive decision-ready work instead of raw troubleshooting surfaces. - - -> Product Scalability & Self-Service Foundation cluster: these candidates come from the roadmap update on 2026-04-25. The goal is to keep TenantPilot operable as a low-headcount, AI-assisted SaaS by productizing recurring onboarding, support, diagnostics, entitlement, help, demo, and customer-operations work. This cluster should not become a generic backoffice automation program. Only product-impacting or repeatable engineering work belongs here; pure company-ops work stays in the roadmap / operating system track. - -### Self-Service Tenant Onboarding & Connection Readiness -- **Type**: product scalability / onboarding foundation -- **Source**: roadmap update 2026-04-25 — Product Scalability & Self-Service Foundation -- **Problem**: Tenant onboarding, Microsoft consent readiness, provider connection validation, permission diagnostics, and setup guidance can become founder-led manual work if they are not productized. A customer or MSP should not need a live walkthrough for every tenant connection just to understand what is missing, what is healthy, and what the next action is. -- **Why it matters**: TenantPilot cannot scale as a solo-founder or low-headcount SaaS if every pilot, trial, or customer tenant requires manual onboarding support. The product already has ProviderConnection, health, onboarding, operation-run, and permission-related foundations; these need to converge into an operator-facing readiness workflow. -- **Proposed direction**: - - provide guided tenant setup with clear setup steps and completion state - - expose consent readiness and permission diagnostics in product language - - show provider connection health and actionable next steps before deeper governance workflows are used - - distinguish missing consent, missing permissions, unreachable provider, expired credentials, blocked health checks, and not-yet-run checks - - persist or derive an onboarding/readiness status that can be reused by dashboards, support diagnostics, trial flows, and customer review surfaces - - keep provider-specific Microsoft details contextual while preserving the provider-boundary language from the platform hardening lane -- **Scope boundaries**: - - **In scope**: guided onboarding status, readiness checklist, provider connection health summary, permission diagnostics, setup progress, next-action guidance, and tests for readiness semantics - - **Out of scope**: full CRM/trial pipeline, billing activation, broad provider marketplace, custom customer-specific onboarding flows, or autonomous tenant remediation -- **Acceptance points**: - - a new workspace/tenant operator can see which onboarding steps are complete and which are blocking - - missing or insufficient Microsoft permissions produce explicit operator guidance rather than generic failure copy - - provider connection health is visible without requiring raw run/context inspection - - readiness state can be consumed by support diagnostic packs and trial/demo flows - - server-side policies still enforce who can view or manage onboarding state -- **Risks / open questions**: - - Avoid creating a second onboarding model if existing onboarding/session/provider entities can be composed - - Readiness must not become a false-green signal; failed or stale health checks need explicit freshness semantics - - Provider-specific consent details should not leak into generic platform vocabulary as permanent truth -- **Dependencies**: ProviderConnection, managed tenant onboarding workflow, provider health checks, permission/consent diagnostics, OperationRun links, Provider Boundary Hardening -- **Related specs / candidates**: Provider Identity & Target Scope Neutrality, Provider Surface Vocabulary & Descriptor Cleanup, Support Diagnostic Pack, Product Knowledge & Contextual Help -- **Strategic sequencing**: First item in this product-scalability cluster because it directly reduces manual onboarding and supports trials, demos, support, and customer transparency. -- **Priority**: high - -### In-App Support Request with Context -- **Type**: product scalability / support workflow -- **Source**: roadmap update 2026-04-25 — Product Scalability & Self-Service Foundation -- **Problem**: A generic support email or external ticket link loses the most important product context: workspace, tenant, operation, finding, report, evidence, severity, and current diagnostic state. This creates avoidable back-and-forth and makes support impossible to automate cleanly. -- **Why it matters**: If TenantPilot is meant to scale with minimal staff, support requests must be structured at the moment they are created. The product should attach the right context automatically instead of relying on customers to describe technical state manually. -- **Proposed direction**: - - add context-aware support request entry points on selected high-value surfaces - - attach workspace, tenant, OperationRun, Finding, ProviderConnection, StoredReport, or review-pack references automatically - - attach or reference a Support Diagnostic Pack when available - - capture severity, customer-facing message, optional reproduction notes, and contact metadata - - create an internal support reference or external ticket reference when configured - - emit an audit event for support request creation where appropriate -- **Scope boundaries**: - - **In scope**: in-product support request model or outbound adapter seam, context attachment, diagnostic-pack reference, ticket reference field, audit event, capability checks, and first adoption on one or two critical surfaces - - **Out of scope**: full helpdesk product, two-way ticket sync, SLA engine, AI support bot, CRM pipeline, or broad customer success automation -- **Acceptance points**: - - support request created from a run/finding/tenant surface carries the relevant context without manual copy-paste - - request creation respects workspace/tenant authorization - - diagnostic pack attachment/reference is capability- and redaction-aware - - support request status or ticket reference can be shown back in the product where useful - - tests prove unrelated tenant context cannot be attached accidentally -- **Risks / open questions**: - - Decide whether the first version stores support requests in TenantPilot, sends them outbound only, or supports both via an adapter seam - - Avoid coupling the product to one helpdesk provider too early - - Ensure support request creation does not expose internal-only diagnostic content to customer members -- **Dependencies**: Support Diagnostic Pack, audit log foundation, notification/ticket-ref patterns, Customer Review Workspace v1 if customer users can create requests -- **Related specs / candidates**: Support Diagnostic Pack, PSA/Ticketing v1, Customer Review Workspace v1, AI-Assisted Customer Operations -- **Strategic sequencing**: Third item in this cluster; should follow or minimally depend on the diagnostic-pack contract. -- **Priority**: high - -### Product Knowledge & Contextual Help -- **Type**: product scalability / operator guidance / support reduction -- **Source**: roadmap update 2026-04-25 — Product Scalability & Self-Service Foundation -- **Problem**: Statuses, findings, drift states, permission requirements, risk acceptance, evidence gaps, and operation outcomes can require founder explanation if the product does not provide contextual help. Existing glossary and reason-code work creates the vocabulary foundation, but not a structured product-help layer. -- **Why it matters**: Every unclear state becomes a support ticket, onboarding call, or sales objection. A product knowledge layer also becomes the maintained source for public docs, support macros, AI support summaries, and customer-facing explanations. -- **Proposed direction**: - - introduce a contextual help registry keyed by feature, surface, status, reason code, and action where appropriate - - reuse canonical glossary and reason-code translation semantics instead of inventing local help copy - - provide operator-facing explanations for common states such as drift, limited confidence, risk accepted, evidence gap, blocked run, stale run, missing permission, and connection unhealthy - - support docs links, troubleshooting hints, and safe next actions - - keep machine/audit/export semantics invariant and avoid localizing core identifiers - - make the registry usable by later AI-assisted customer operations as a trusted knowledge source -- **Scope boundaries**: - - **In scope**: help registry, first high-value surface integrations, glossary/reason-code linkage, docs-link structure, troubleshooting snippets, and tests for missing/invalid keys where useful - - **Out of scope**: full public documentation site, AI chatbot, complete localization overhaul, legal/compliance claims, or rewriting every help text in the product -- **Acceptance points**: - - at least two critical surfaces consume contextual help from the registry instead of local hardcoded explanations - - help copy references canonical terminology for findings, baseline, drift, risk acceptance, evidence, and operation outcomes - - missing help keys fail predictably or degrade gracefully - - the registry can expose a machine-readable source set for future AI support without including secrets or customer data - - help content is reviewable and versionable as product knowledge, not scattered UI prose -- **Risks / open questions**: - - Too much help text can make enterprise UI noisy; progressive disclosure is required - - Help registry should not become a second source of truth for status semantics - - Localization and terminology governance need a clear boundary with Platform Localization v1 -- **Dependencies**: Platform Vocabulary Glossary, Operator Reason Code Translation, Governance Friction & Operator Vocabulary Hardening, Platform Localization v1 direction -- **Related specs / candidates**: AI-Assisted Customer Operations, Self-Service Tenant Onboarding & Connection Readiness, Support Diagnostic Pack, Baseline Compare Scope Guardrails & Ambiguity Guidance -- **Strategic sequencing**: Can run in parallel with support diagnostics, but should land before AI-generated customer explanations. -- **Priority**: high - -### Plans, Entitlements & Billing Readiness -- **Type**: product architecture / commercial scalability foundation -- **Source**: roadmap update 2026-04-25 — Product Scalability & Self-Service Foundation -- **Problem**: TenantPilot needs a product-level way to express plan limits, feature gates, trial/grace status, workspace/tenant/user/report/export/retention limits, and billing state before real customer growth. Without an entitlement foundation, pricing and packaging decisions later require invasive retrofits across RBAC, exports, retention, reports, tenant counts, and customer views. -- **Why it matters**: A SaaS cannot scale cleanly if commercial packaging is implemented as scattered conditionals or manual founder decisions. Entitlements are not just billing; they are product behavior, support behavior, trial behavior, and customer expectation management. -- **Proposed direction**: - - introduce plan and entitlement primitives at workspace/account scope - - model feature gates and quantitative limits separately - - support trial, active, grace, suspended/read-only, and canceled billing states where appropriate - - define enforcement points for tenants, users, exports, retention, reports, evidence packs, and advanced governance features - - audit plan changes and entitlement overrides - - keep external billing-provider integration behind an adapter seam and out of the initial foundation if needed -- **Scope boundaries**: - - **In scope**: plan model, entitlement model, feature-gate checks, limit checks, trial/grace/billing status, audit events, first enforcement points, and tests - - **Out of scope**: full Stripe integration, payment collection UI, invoice rendering, accounting integration, tax automation, custom enterprise contract engine, or public pricing page -- **Acceptance points**: - - workspace/account has a resolved plan and entitlement set - - feature gates and numeric limits can be checked through a central service instead of scattered conditionals - - trial and grace states influence product access in a predictable and tested way - - plan changes and overrides are audited - - at least one real product limit is enforced through the entitlement service -- **Risks / open questions**: - - Premature pricing complexity could slow product discovery; start with simple plans and explicit overrides - - Enterprise contracts may require manual overrides, but those overrides must remain auditable - - Read-only/suspended behavior must be carefully designed so customers do not lose access to evidence or audit history unexpectedly -- **Dependencies**: workspace/account model, RBAC/capabilities, audit log foundation, retention/export/report features, Customer Review Workspace direction -- **Related specs / candidates**: Customer Review Workspace v1, Review Pack export, Evidence domain, Security Trust Pack Light -- **Strategic sequencing**: High priority before broader customer onboarding and paid trials, but can be implemented as a foundation slice without full billing integration. -- **Priority**: high - -### Demo & Trial Readiness -- **Type**: product scalability / sales enablement foundation -- **Source**: roadmap update 2026-04-25 — Product Scalability & Self-Service Foundation and Solo-Founder SaaS Automation track -- **Problem**: Demos and trials become manual work if the product cannot provide repeatable demo data, resettable demo workspaces, realistic sample baselines/findings/reports, and a clear trial provisioning path. Without this, sales conversations depend on live manual setup or fragile local data. -- **Why it matters**: A solo-founder SaaS needs demos and trials to be repeatable. TenantPilot's value is easier to understand when buyers can see baselines, drift, findings, risk acceptance, evidence packs, and reviews without waiting for a real tenant to produce all states naturally. -- **Proposed direction**: - - create a demo workspace/sample data mode with seeded tenants, baselines, findings, review packs, evidence, and operation history - - provide a reset flow or safe reseed process for demo environments - - define demo stories for MSP buyers and enterprise IT buyers - - create a trial provisioning checklist that ties into onboarding/readiness and plan/entitlement state - - keep demo data clearly marked so it never mixes with production customer truth -- **Scope boundaries**: - - **In scope**: demo seed data, demo reset support, sample governance artifacts, trial readiness checklist, demo-mode indicators, and tests for data separation - - **Out of scope**: CRM pipeline, public signup flow, payment collection, marketing website, fully automated self-serve provisioning, or fake provider execution pretending to be real tenant truth -- **Acceptance points**: - - demo environment can be prepared repeatably without manual database editing - - sample data covers at least baseline, drift/finding, risk acceptance, evidence/report, and operation-run stories - - demo/sample data is visibly marked and isolated from real customer data - - trial readiness can reuse onboarding/readiness and entitlement foundations - - reset/reseed process is safe and documented -- **Risks / open questions**: - - Fake data must not undermine trust by looking like real Microsoft tenant evidence - - Demo mode should not introduce shortcuts into production code paths without explicit safeguards - - Trial provisioning may later become its own larger spec once real acquisition flow is known -- **Dependencies**: StoredReports / EvidenceItems, Findings workflow, Baseline governance, Self-Service Tenant Onboarding, Plans / Entitlements -- **Related specs / candidates**: Customer Review Workspace v1, Tenant Review Run, Product Knowledge & Contextual Help -- **Strategic sequencing**: Medium-high. It becomes more urgent once first external demos and pilots become frequent. -- **Priority**: medium-high - -### Security Trust Pack Light -- **Type**: company-ops / product trust enablement -- **Source**: roadmap update 2026-04-25 — Solo-Founder SaaS Automation & Operating Readiness -- **Problem**: Enterprise buyers will repeatedly ask how TenantPilot handles hosting, data categories, Microsoft permissions, least privilege, RBAC, retention, backups, audit logs, subprocessors, and what is not stored. If these answers remain ad-hoc, sales and onboarding become founder-dependent and inconsistent. -- **Why it matters**: TenantPilot deals with tenant governance artifacts and Microsoft configuration data. Trust documentation is not just legal paperwork; it is a sales and support scalability asset. It also forces the product to stay honest about what it stores, processes, and exposes. -- **Proposed direction**: - - create a lightweight security trust pack aligned to the actual product architecture and data model - - document hosting, data categories, permission model, least-privilege stance, RBAC, audit logging, backup/retention, subprocessors, and non-stored data - - map claims to product features and architecture, avoiding unsupported compliance or certification claims - - keep the pack versioned and updateable as product capabilities change - - identify any product gaps that block truthful trust claims and feed those back into roadmap/spec candidates -- **Scope boundaries**: - - **In scope**: structured trust-pack content, product-data mapping, permission explanation, security overview, gap list, and maintenance ownership - - **Out of scope**: legal finalization, ISO/SOC2 certification, public trust center portal, penetration test execution, or broad security program implementation -- **Acceptance points**: - - trust pack answers the standard first-pass customer security questions consistently - - Microsoft permission explanations match actual provider scopes and product behavior - - data categories and retention claims map to real tables/artifacts or documented operating processes - - unsupported claims are explicitly avoided - - product gaps discovered during trust-pack creation are recorded as roadmap/spec candidates when engineering work is required -- **Risks / open questions**: - - This is partly non-code work; only engineering gaps should become implementation specs - - Legal review may change wording, but not the underlying product truth - - Over-claiming compliance posture would damage trust -- **Dependencies**: Provider permission model, RBAC model, audit logs, retention behavior, backup behavior, deployment/hosting decisions, AVV/DPA/TOM work -- **Related specs / candidates**: Plans / Entitlements & Billing Readiness, System Panel Least-Privilege Capability Model, Provider Boundary Hardening, Evidence domain -- **Strategic sequencing**: Should run before serious enterprise sales conversations and before broad customer onboarding. -- **Priority**: medium-high - -### AI-Assisted Customer Operations -- **Type**: AI-assisted operations / human-in-the-loop product support -- **Source**: roadmap update 2026-04-25 — Mid-term AI-Assisted Customer Operations -- **Problem**: Customer reviews, support triage, finding explanations, diagnostic summaries, release communication, and report summaries can consume large amounts of founder time. However, unbounded AI automation would be risky in a governance product, especially for tenant-changing actions, customer commitments, legal statements, or risk decisions. -- **Why it matters**: TenantPilot can use AI to stay lean, but the product must preserve auditability, human approval, and clear responsibility. The right early AI layer prepares and summarizes work; it does not autonomously change customer tenants or make commitments. -- **Proposed direction**: - - use structured product truth from diagnostic packs, findings, stored reports, evidence, operation runs, and the product knowledge registry as AI input - - generate draft support summaries, finding explanations, tenant review summaries, diagnostic summaries, release-note drafts, and response drafts - - require human approval before customer-facing messages, legal statements, risk acceptance, or tenant-changing actions - - log AI-generated drafts and human approval where product-relevant - - define safety boundaries for what AI can read, suggest, and never execute -- **Scope boundaries**: - - **In scope**: AI draft/summarization workflows for support, findings, reviews, diagnostics, release notes, and customer explanations; approval gates; audit references; source attribution to product records - - **Out of scope**: autonomous tenant remediation, automatic risk acceptance, automatic legal commitments, auto-sending customer communications without review, general-purpose chatbot, or broad AI platform redesign -- **Acceptance points**: - - generated summaries cite or reference underlying product records rather than inventing unsupported conclusions - - customer-facing drafts require human approval before sending or publishing - - tenant-changing actions are not executed by AI in this spec - - AI access is scoped and redacted through existing permission/diagnostic-pack boundaries - - operators can distinguish draft AI text from approved product/customer communication -- **Risks / open questions**: - - AI hallucination risk must be mitigated through structured inputs and source references - - Privacy and data-processing boundaries need explicit review before customer data is sent to any model provider - - The first version should probably be internal-only until diagnostics, knowledge, and support-request foundations are stable -- **Dependencies**: Support Diagnostic Pack, Product Knowledge & Contextual Help, In-App Support Request with Context, StoredReports / EvidenceItems, Findings workflow, release communication process, security/privacy review -- **Related specs / candidates**: Human-in-the-Loop Autonomous Governance, Operator Explanation Layer, Humanized Diagnostic Summaries for Governance Operations, Security Trust Pack Light -- **Strategic sequencing**: Mid-term. Do not promote before diagnostic, knowledge, and support-context foundations exist. -- **Priority**: medium - -> Recommended sequence for this cluster: -> 1. **Self-Service Tenant Onboarding & Connection Readiness** -> 2. **Support Diagnostic Pack** -> 3. **Product Knowledge & Contextual Help** -> 4. **In-App Support Request with Context** -> 5. **Plans, Entitlements & Billing Readiness** -> 6. **Demo & Trial Readiness** -> 7. **Security Trust Pack Light** -> 8. **AI-Assisted Customer Operations** -> - - -> Additional Solo-Founder Scale Guardrails cluster: these candidates come from the roadmap update on 2026-04-25. The goal is to make the highest-impact solo-founder operating risks measurable, controllable, and product-backed without turning TenantPilot into a CRM, helpdesk, analytics suite, or generic backoffice platform. Pure company-ops artifacts stay in the roadmap; the candidates below are only the product-impacting slices. - -### Product Usage & Adoption Telemetry -- **Type**: product observability / adoption analytics foundation -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails and Product Usage, Customer Health & Operational Controls -- **Problem**: TenantPilot currently risks relying on founder intuition, support tickets, or manual database/log inspection to understand onboarding drop-off, feature adoption, trial health, failed flows, report/export usage, and support-triggering surfaces. Without privacy-aware product telemetry, it is hard to know where customers get stuck or which product areas actually drive value. -- **Why it matters**: Low-headcount SaaS requires the product to reveal adoption and friction automatically. Telemetry is also a prerequisite for Customer Health Score, lifecycle communication, trial conversion analysis, and prioritizing product work based on behavior rather than anecdotes. -- **Proposed direction**: - - define a minimal product telemetry event contract for product usage and adoption signals - - capture events such as onboarding step completed/blocked, provider connection checked, baseline capture/compare started, report exported, review pack generated, support request opened, contextual help opened, and trial activation milestones - - keep events workspace-/tenant-aware but privacy-aware and avoid raw provider payloads or customer-sensitive data in telemetry - - model event name, actor, workspace, tenant, feature area, subject reference, timestamp, and safe metadata - - provide aggregate read models for adoption dashboards and customer health scoring - - document telemetry boundaries and opt-out / data-processing considerations where appropriate -- **Scope boundaries**: - - **In scope**: internal product telemetry event model, minimal event capture points, privacy/redaction rules, aggregate usage read model, basic operator visibility, and tests for isolation/redaction - - **Out of scope**: full analytics platform, third-party product analytics integration, marketing attribution, session recording, user tracking beyond product-operation needs, or broad BI dashboards -- **Acceptance points**: - - key onboarding, governance, report/export, and support-intake events can be captured through a central contract - - telemetry metadata never stores raw provider payloads or secrets - - workspace/tenant isolation is enforced for telemetry reads - - aggregate adoption indicators can be queried without scanning arbitrary application logs - - telemetry capture can be disabled or bounded by configuration where needed -- **Risks / open questions**: - - Telemetry must not become invasive or create unnecessary privacy exposure - - Too many events too early can create noise; start with high-signal product milestones - - Decide whether telemetry is stored in the primary database initially or written through an adapter seam for future external analytics -- **Dependencies**: Self-Service Tenant Onboarding & Connection Readiness, OperationRun truth, ProviderConnection health, StoredReports / EvidenceItems, Support Diagnostic Pack, audit/data-processing review -- **Related specs / candidates**: Customer Health Score, Customer Lifecycle Communication, Plans / Entitlements & Billing Readiness, Security Trust Pack Light -- **Strategic sequencing**: First item in this guardrails cluster because health score and lifecycle communication need reliable usage signals. -- **Priority**: high - -### Customer Health Score -- **Type**: product observability / customer success signal -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails and Product Usage, Customer Health & Operational Controls -- **Problem**: Churn, inactive customers, unhealthy provider connections, stale baseline compares, unresolved high-risk findings, overdue SLAs, failed runs, expiring risk acceptances, and missing review packs may be noticed too late if the founder has to manually inspect each workspace. -- **Why it matters**: A solo-founder or low-headcount SaaS needs a simple, trustworthy signal for which customers or workspaces need attention. This is especially important for MSP-oriented governance, where portfolio risk can grow silently across many tenants. -- **Proposed direction**: - - derive workspace/customer health indicators from product truth instead of manual notes - - combine signals such as onboarding status, last login/activity, provider health, last successful sync, baseline compare freshness, open high findings, overdue findings, expiring risk acceptances, failed/stale OperationRuns, support-request volume, review-pack readiness, and trial/billing status where available - - separate health dimensions rather than hiding everything in one opaque score - - provide a simple health summary for founder/operator views and later portfolio surfaces - - keep customer-health calculations explainable and link back to underlying records -- **Scope boundaries**: - - **In scope**: health signal registry, derived health dimensions, explainable health summary, workspace/customer-level health read model, first operator view or dashboard card, and tests - - **Out of scope**: full customer-success CRM, automated churn prediction, external CRM sync, billing collection, sales pipeline scoring, or AI-generated account management actions -- **Acceptance points**: - - a workspace/customer health summary can be generated from product data - - each health warning links to underlying evidence such as provider health, findings, operations, review packs, or trial state - - stale/unknown data is represented explicitly and does not appear healthy by default - - customer health is scoped by workspace and respects authorization boundaries - - at least one dashboard or operator surface can list unhealthy or attention-needed workspaces -- **Risks / open questions**: - - A single numeric score can hide important nuance; dimensions should remain visible - - Missing data must not be treated as good data - - The first version should avoid predictive claims and stay evidence-based -- **Dependencies**: Product Usage & Adoption Telemetry, ProviderConnection health, OperationRun truth, Findings workflow, Risk Acceptance/Exceptions, StoredReports / EvidenceItems, Plans / Entitlements & Billing Readiness -- **Related specs / candidates**: MSP Portfolio Dashboard, Product Usage & Adoption Telemetry, Customer Lifecycle Communication, Support Diagnostic Pack -- **Strategic sequencing**: Second item after telemetry. It should follow reliable signal capture and feed portfolio/customer-success views later. -- **Priority**: high - -### Operational Controls & Feature Flags -- **Type**: operational safety / platform control plane -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails and Product Usage, Customer Health & Operational Controls -- **Problem**: Incidents or risky product areas may otherwise require code changes, deployments, manual database edits, or ad-hoc communication to pause a feature, block provider-backed actions, disable exports, pause AI functions, stop trials, or place a workspace into a temporary safe state. -- **Why it matters**: Solo-founder operations need safe operator controls. TenantPilot contains high-trust workflows such as restore, provider-backed actions, exports, AI-assisted summaries, and evidence/report generation. These need controlled kill switches and scoped feature flags before scale increases incident pressure. -- **Proposed direction**: - - introduce a minimal operational controls registry with global, workspace, and possibly tenant scope - - support kill switches / flags for risky features such as restore execution, provider-backed writes, exports, AI functions, trial provisioning, report generation, and maintenance/read-only modes - - expose operator-safe controls in the system/platform plane with strong capabilities and audit logging - - define enforcement points through services/gates rather than UI-only hiding - - allow time-bound controls with reason and owner where useful - - provide clear customer/operator messaging when a feature is disabled or paused -- **Scope boundaries**: - - **In scope**: feature flag / operational control model, scoped evaluation service, audited changes, first enforcement points, platform/system UI for controls, and tests - - **Out of scope**: full experimentation platform, A/B testing, remote-config product, external feature flag vendor integration, broad entitlement replacement, or customer-managed feature flags -- **Acceptance points**: - - at least one risky feature can be disabled globally and per workspace through a central control - - enforcement happens server-side at the action/service boundary - - changes are audited with actor, scope, reason, and timestamp - - disabled-state messaging is explicit and not confused with authorization failure - - tests prove UI hiding is not the only enforcement mechanism -- **Risks / open questions**: - - Operational controls must not bypass entitlement/RBAC semantics or become an untracked superpower - - Too many flags can create configuration drift; start with high-risk controls only - - Read-only modes need careful definition so evidence/audit access remains available -- **Dependencies**: System Panel Least-Privilege Capability Model, Provider-Backed Action Preflight and Dispatch Gate Unification, restore/provider action services, export/report services, audit log foundation, Plans / Entitlements & Billing Readiness -- **Related specs / candidates**: Provider-Backed Action Preflight and Dispatch Gate Unification, Plans / Entitlements & Billing Readiness, System Panel Least-Privilege Capability Model, Business Continuity / Founder Backup Plan -- **Strategic sequencing**: High priority once external customers or pilots depend on production. Can be promoted before telemetry if incident-control risk becomes immediate. -- **Priority**: high - -### Customer Lifecycle Communication -- **Type**: customer operations / notification automation -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails -- **Problem**: Welcome messages, onboarding reminders, trial expiry, provider health warnings, review-pack readiness, risk-expiry reminders, release updates, incidents, renewals, payment issues, and churn-feedback requests can become manual founder communication if they are not structured. -- **Why it matters**: Repeatable SaaS delivery depends on consistent customer communication. Some messages are product-triggered and should be model-backed; others belong to company operations. TenantPilot needs a clear product boundary so important lifecycle events can trigger communication without creating a generic marketing automation system. -- **Proposed direction**: - - define product-triggerable lifecycle communication events for high-value operational moments - - start with onboarding incomplete, provider unhealthy, review pack ready, risk acceptance expiring, trial expiring, incident/update notice, and release note availability where product-backed - - support templates, recipient resolution, locale, delivery channel abstraction, and audit/reference links where appropriate - - distinguish internal operator reminders from customer-facing communication - - keep marketing campaigns and CRM nurture sequences outside the first product slice -- **Scope boundaries**: - - **In scope**: product lifecycle event contract, template registry, recipient resolution, first delivery adapter or outbound hook, audit/reference behavior, and tests for tenant/workspace isolation - - **Out of scope**: full marketing automation, newsletter system, CRM pipeline, payment collection, two-way communication inbox, or generic campaign builder -- **Acceptance points**: - - at least two product-backed lifecycle events can generate structured communication tasks or outbound messages - - recipient selection respects workspace/tenant/customer membership and locale where applicable - - customer-facing messages reference the relevant product object such as tenant, run, finding, review pack, or risk acceptance - - communications are auditable or at least traceable to a product event - - customer-facing communication can be disabled or held for manual approval where needed -- **Risks / open questions**: - - Over-automated customer communication can become noisy or risky during incidents - - Billing/payment messages may depend on external billing systems and should not be over-modeled too early - - Legal/customer-facing statements may need approval rules before automatic sending -- **Dependencies**: Notification Targets / Alerts v1, Product Knowledge & Contextual Help, Plans / Entitlements & Billing Readiness, Customer Health Score, Risk Acceptance/Exceptions, review-pack generation -- **Related specs / candidates**: Alerts v1, AI-Assisted Customer Operations, Product Knowledge & Contextual Help, Release & Customer Communication Automation -- **Strategic sequencing**: Medium-high. Should follow the first telemetry/health foundations and reuse existing alert/notification infrastructure where possible. -- **Priority**: medium-high - -### Product Intake & No-Customization Governance -- **Type**: product operations / roadmap governance -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails -- **Problem**: Customer-specific requests can silently turn TenantPilot into consulting work if they are implemented as one-off behavior, hidden configuration, or customer-specific branches. Without a product intake and no-customization governance path, each sales/support conversation can create long-term maintenance obligations. -- **Why it matters**: A low-headcount SaaS must protect the product boundary. Feature requests should become product input, not direct custom work by default. This is especially important for MSP and enterprise customers, where individual requests can sound urgent but may not fit the platform direction. -- **Proposed direction**: - - define a lightweight feature/request intake model or documented operating process - - classify requests as no, later, candidate, planned, customer-specific exception, or already covered - - capture customer/segment, problem, workaround, business value, roadmap fit, and maintenance risk - - link accepted requests to spec candidates or promoted specs where appropriate - - require explicit approval and audit/record for any customer-specific exception - - document the no-custom-work policy in product principles or company operating guidance -- **Scope boundaries**: - - **In scope**: product request classification, link to roadmap/spec candidates, exception semantics, optional internal admin surface, and no-customization policy wording - - **Out of scope**: full product management suite, voting portal, public roadmap, customer community, consulting project management, or CRM replacement -- **Acceptance points**: - - customer requests can be classified consistently without becoming immediate implementation tasks - - customer-specific exceptions are explicit, rare, and reviewable - - accepted product requests can link to spec candidates or roadmap themes - - no-custom-work policy is visible in product/company guidance - - the process can be operated manually at first but is structured enough to delegate later -- **Risks / open questions**: - - This may be mostly process at first; only build product surfaces if manual tracking becomes a bottleneck - - Too much process too early could slow learning from pilots - - Exceptions need a business owner and expiry/review path so they do not become permanent hidden product variants -- **Dependencies**: roadmap/spec-candidate process, principles/constitution, customer support/intake process, Plans / Entitlements if exceptions affect limits or features -- **Related specs / candidates**: Plans / Entitlements & Billing Readiness, Customer Lifecycle Communication, Security Trust Pack Light -- **Strategic sequencing**: Medium. Add as a principle/process early; promote to product spec only if in-product request/exception tracking becomes necessary. -- **Priority**: medium - -### Data Retention, Export & Deletion Self-Service -- **Type**: data lifecycle / customer trust / operational scalability -- **Source**: roadmap update 2026-04-25 — Additional Solo-Founder Scale Guardrails -- **Problem**: Customer data export, archive, deletion request handling, trial data expiry, workspace deactivation, and evidence/report retention visibility can become manual support/legal work if the product does not provide clear lifecycle controls and customer-safe visibility. -- **Why it matters**: TenantPilot stores governance artifacts, evidence, reports, findings, and operation history. Customers will ask what is retained, what can be exported, what is deleted, and what remains for audit purposes. Self-service or operator-guided lifecycle flows reduce manual work and improve trust. -- **Proposed direction**: - - define a customer/workspace data lifecycle contract covering active, suspended, archived, trial-expired, deletion-requested, and deleted/retained states where appropriate - - expose retention visibility for reports, evidence, operation runs, findings, exceptions, and backups where already modeled - - provide customer/operator export request flows and deletion/archive request flows with audit events - - make trial data expiry explicit and configurable where tied to plan/entitlement state - - distinguish audit-retained records from deleted customer content and communicate that boundary clearly -- **Scope boundaries**: - - **In scope**: lifecycle state model or request model where needed, export/deletion request flow, retention visibility, audit events, trial expiry handling, and tests for authorization/isolation - - **Out of scope**: full GDPR portal, legal policy drafting, automated physical deletion of every historical artifact without retention analysis, external DSR tooling, or broad storage-engine redesign -- **Acceptance points**: - - customers/operators can see or request export/deletion/archive actions through a defined flow - - retention behavior for key artifact families is visible or documented in-product where appropriate - - trial-expired data handling is explicit and not ad-hoc - - deletion/archive requests are audited and authorized - - audit-retained metadata is clearly separated from customer content deletion semantics -- **Risks / open questions**: - - Legal retention, auditability, and deletion rights must be balanced carefully - - Evidence/report retention may intentionally outlive operation runs; this must be visible and not surprising - - Automation should start conservative until legal review confirms deletion/retention expectations -- **Dependencies**: StoredReports retention, EvidenceItems retention, OperationRun retention, backup retention, Plans / Entitlements & Billing Readiness, Security Trust Pack Light, audit log foundation -- **Related specs / candidates**: StoredReports Model, EvidenceItem Model, Export v1, Security Trust Pack Light, Customer Review Workspace v1 -- **Strategic sequencing**: Medium-high. Should be shaped before broad paid trials and enterprise security reviews, but can land after entitlement and trust-pack foundations. -- **Priority**: medium-high - -> Recommended sequence for this cluster: -> 1. **Product Usage & Adoption Telemetry** -> 2. **Customer Health Score** -> 3. **Operational Controls & Feature Flags** -> 4. **Data Retention, Export & Deletion Self-Service** -> 5. **Customer Lifecycle Communication** -> 6. **Product Intake & No-Customization Governance** -> -> Why this order: first capture reliable signals, then derive health and risk, then add operator control for incidents and risky features, then close customer trust/lifecycle gaps, then automate customer communication, and finally formalize request intake/no-customization once pilot feedback volume increases. - - - - - - -> Private AI Execution & Usage Governance Foundation cluster: these candidates come from the roadmap update on 2026-04-25. The goal is to make AI a governed platform capability, not a set of direct feature-level public API calls. TenantPilot should be local/private-first for tenant/customer data, provider-adapter-based, budgeted, cacheable, auditable, and human-approved where risk matters. External public AI providers must be disabled by default and only usable through explicit workspace policy, data classification, redaction, budget limits, and approval gates. - -### Private AI Execution & Policy Foundation -- **Type**: AI platform foundation / privacy boundary / provider abstraction -- **Source**: roadmap update 2026-04-25 — Private AI Execution & Usage Governance Foundation -- **Problem**: Future AI-assisted summaries, diagnostics, review packs, decision packs, support responses, and customer communications will be risky if individual features call model providers directly. Direct calls would make it hard to support local/private models, enforce data boundaries, audit usage, control costs, or answer German enterprise customers' privacy and compliance questions. -- **Why it matters**: TenantPilot sells governance, compliance readiness, evidence, and tenant trust. AI must therefore be governed like a platform capability: use-case registered, data-classified, policy-gated, budget-gated, provider-adapted, audited, and human-approved where needed. The architecture must support local/private/customer-hosted/EU-private models without later rewrites. -- **Proposed direction**: - - introduce an AI Use Case Registry for approved AI use cases such as finding summaries, operation summaries, support diagnostic summaries, review-pack executive summaries, decision-pack recommendations, and release/customer communication drafts - - introduce an AI Provider Registry with provider classes such as disabled, local/private, customer-hosted OpenAI-compatible, TenantPilot-private, EU-private, and external public provider adapters - - introduce Workspace AI Policy modes such as disabled, local-only, private-only, EU-only, external-allowed-with-redaction, and explicit external-allowed - - introduce AI Data Classification for product knowledge, operational metadata, tenant config summaries, redacted provider payloads, raw provider payloads, personal data, customer-confidential context, and legal/compliance statements - - ensure AI execution is only possible through a central policy gate and provider adapter, never direct feature-level model calls - - default external public AI providers to disabled for customer/tenant data - - define capability/RBAC boundaries for managing AI settings and viewing AI execution metadata -- **Scope boundaries**: - - **In scope**: AI use-case registry, AI provider registry, workspace AI policy, AI data classification, policy evaluation service, provider adapter interface, initial disabled/local/private-compatible provider seam, RBAC/capability checks, and audit metadata shape - - **Out of scope**: building a full AI chatbot, implementing every provider, model benchmarking, autonomous remediation, legal final approval of AI disclosures, customer-facing AI UI for all use cases, or sending real tenant data to external providers -- **Acceptance points**: - - feature code cannot invoke AI without going through the central AI execution boundary - - every AI request declares use case, workspace, data class, model/provider class, purpose, and output visibility - - external public providers are disabled by default for tenant/customer data - - workspace AI policy can block or allow AI execution modes predictably - - raw provider payload and personal/customer-confidential data classes are rejected for external public providers by default - - AI policy decisions are auditable with actor/system actor, workspace, use case, provider class, data class, and decision outcome - - tests prove a disallowed provider/data-class combination cannot execute -- **Risks / open questions**: - - The first version must avoid overbuilding a provider marketplace before real AI use cases exist - - Local/private model support may initially be an adapter seam rather than a fully operated inference stack - - Workspace AI policy must be simple enough for operators but precise enough for enterprise trust conversations - - Data classification must align with Security Trust Pack Light and actual stored product data -- **Dependencies**: Security Trust Pack Light, Product Knowledge & Contextual Help, Support Diagnostic Pack, Decision Pack Contract & Approval Workflow, audit log foundation, workspace/RBAC isolation, Operational Controls & Feature Flags -- **Related specs / candidates**: AI Usage Budgeting, Context & Result Governance, AI-Assisted Customer Operations, Decision Pack Contract & Approval Workflow, Support Diagnostic Pack, Security Trust Pack Light -- **Strategic sequencing**: Should land before broad AI-assisted customer operations or decision recommendations. This is the safety and provider boundary for all later AI features. -- **Priority**: high - -### AI Usage Budgeting, Context & Result Governance -- **Type**: AI cost governance / context governance / result lifecycle foundation -- **Source**: roadmap update 2026-04-25 — Private AI Execution & Usage Governance Foundation -- **Problem**: Even with local/private models, AI usage consumes compute, queue capacity, latency budget, and potentially paid provider credits. Without usage budgeting, context builders, redaction, fingerprinting, result caching, and output governance, AI-assisted support, reviews, decision packs, and summaries can become expensive, slow, inconsistent, or unsafe. -- **Why it matters**: AI-native SaaS margins depend on treating AI calls as metered, prioritized, cacheable product operations. TenantPilot also needs to avoid sending raw provider payloads or excessive customer context to models. Structured context builders and result governance make AI outputs safer, cheaper, more stable, and easier to audit. -- **Proposed direction**: - - introduce an AI Usage Ledger for use case, workspace, tenant/reference, provider class, model class, data class, token/compute estimate, credit/cost estimate, queue priority, cache hit/miss, status, and purpose - - introduce AI credits or budget counters at workspace/plan level, with monthly caps, soft/hard limits, and operator override semantics - - introduce model-tier routing so low-risk summarization can use cheaper/local models while high-value decision recommendations can require stronger reasoning models and approval - - introduce purpose-specific AI Context Builders for finding, drift, operation run, support diagnostic, review pack, and decision pack use cases - - introduce redaction and minimization rules so context is sanitized, referenced, or summarized instead of passing raw tenant/provider data - - introduce AI Result Store & Cache keyed by fingerprints such as finding fingerprint, drift fingerprint, operation-run context hash, report fingerprint, evidence bundle fingerprint, and decision-pack fingerprint - - introduce approval gates and lifecycle states for customer-facing, legal/compliance, risk-accepting, or tenant-changing AI outputs - - expose basic operator visibility into AI usage, budget status, cache reuse, and blocked/failed AI jobs -- **Scope boundaries**: - - **In scope**: usage ledger, budget/credit service, context-builder contracts for first use cases, redaction hooks, result cache/store, fingerprinting, basic model-tier routing, queue priority metadata, approval state for sensitive outputs, and tests - - **Out of scope**: full billing integration, public customer AI usage dashboard, complex cost accounting, prompt marketplace, model fine-tuning, autonomous execution, or broad AI observability suite -- **Acceptance points**: - - AI jobs are recorded in a ledger with use case, workspace, provider/model class, data class, status, cache hit/miss, and cost/credit estimate - - workspace/plan AI budgets can block or degrade non-critical AI jobs when limits are exceeded - - at least one AI use case uses a context builder instead of raw model input from feature code - - result cache/fingerprint reuse prevents repeated generation for unchanged inputs - - customer-facing or risk-relevant AI outputs can remain draft/pending approval before use - - tests prove budget enforcement, cache reuse, redaction boundary, and approval-required output behavior -- **Risks / open questions**: - - Cost estimation may be approximate for local/private models; the system should support both token-cost and compute-credit abstractions - - Over-caching could reuse stale summaries if invalidation rules are weak - - Under-caching could make AI features too expensive and inconsistent - - Approval gates should not create UX friction for low-risk internal summaries -- **Dependencies**: Private AI Execution & Policy Foundation, Plans / Entitlements & Billing Readiness, Product Usage & Adoption Telemetry, Support Diagnostic Pack, StoredReports / EvidenceItems, Decision Pack Contract & Approval Workflow, OperationRun truth -- **Related specs / candidates**: AI-Assisted Customer Operations, Customer Lifecycle Communication, Product Knowledge & Contextual Help, Operational Controls & Feature Flags, Decision-Based Governance Inbox v1 -- **Strategic sequencing**: Should follow or pair with Private AI Execution & Policy Foundation. It should land before AI is used at scale for reviews, support, decision packs, or customer communication. -- **Priority**: high - -> Recommended sequence for this cluster: -> 1. **Private AI Execution & Policy Foundation** -> 2. **AI Usage Budgeting, Context & Result Governance** -> 3. **AI-Assisted Customer Operations** -> -> Why this order: first establish the trust boundary and provider/data policy, then add cost/context/result controls, and only then scale AI-assisted customer operations on top of governed inputs, budgets, caches, audits, and approvals. - -### Decision-Based Governance Inbox v1 -- **Type**: product strategy / workflow automation / operator UX -- **Source**: roadmap update 2026-04-25 — Human-in-the-Loop Autonomous Governance (Microsoft-first, Provider-extensible Decision-Based Operating) -- **Problem**: TenantPilot has many rich governance surfaces, but customers and operators can still be forced into search-and-troubleshoot behavior: opening tenants, runs, findings, reports, evidence, provider health, and logs to discover what actually needs a decision. That does not scale for MSPs, customer read-only users, or a low-headcount operating model. -- **Why it matters**: TenantPilot should become the decision control plane for accountable Microsoft tenant governance first, not just a browser for tenant state and execution history. The default workflow should be guided decisions; raw detail pages remain available as evidence and diagnostics. -- **Proposed direction**: - - introduce a Governance Inbox / Action Center that surfaces decision-ready work items across tenants and workspaces - - derive inbox items from findings, drift, exceptions, risk acceptances, provider health, failed/stale OperationRuns, review-pack readiness, evidence gaps, and actionable alerts - - group, deduplicate, and prioritize related signals so operators do not work the same issue multiple times - - show clear decision actions such as review, approve, reject, snooze, assign, accept risk, create ticket, run compare, generate review pack, or request evidence - - link every inbox item to underlying evidence and diagnostic surfaces without making drilldown the primary workflow - - keep the first implementation Microsoft-first while using provider-neutral descriptors where existing platform abstractions support it -- **Scope boundaries**: - - **In scope**: decision inbox item model/read model, source adapters for a small set of high-value signals, grouping/dedup rules, severity/priority handling, action affordances, links to evidence/diagnostics, RBAC/workspace scoping, and first operator UI - - **Out of scope**: autonomous remediation, broad AI agent, full workflow engine, complete MSP portfolio dashboard replacement, customer-facing remediation actions without approval, or support/CRM replacement -- **Acceptance points**: - - operators can see a prioritized list of decision-ready governance items without manually visiting each tenant/run/finding/report first - - each item includes why it matters, affected tenant/workspace, source records, severity/priority, freshness, and available actions - - duplicate/related signals can be grouped or fingerprinted to avoid inbox noise - - actions are server-side authorized and routed through existing OperationRun/workflow/audit patterns where applicable - - detail pages are reachable as evidence, but the main workflow remains decision-first - - tests prove workspace/tenant isolation and prevent unrelated users from seeing inbox items -- **Risks / open questions**: - - Inbox noise is a major risk; grouping and confidence/freshness semantics matter from v1 - - The inbox must not become another dashboard that merely links to raw tables - - The first slice needs carefully selected sources, likely findings, provider health, stale/failed runs, expiring risk acceptances, and review-pack readiness - - Customer-facing visibility may need a later slice with redaction and read-only action limits -- **Dependencies**: Findings workflow, Risk Acceptance/Exceptions, OperationRun truth, ProviderConnection health, StoredReports / EvidenceItems, Alerts v1, Product Usage & Adoption Telemetry, Customer Health Score, Operational Controls & Feature Flags -- **Related specs / candidates**: Decision Pack Contract & Approval Workflow, Findings Operator Inbox v1, Findings Intake & Team Queue v1, Customer Review Workspace v1, MSP Portfolio Dashboard, AI-Assisted Customer Operations -- **Strategic sequencing**: High priority after onboarding/support/telemetry/control foundations because it converts those signals into the primary customer/operator workflow. -- **Priority**: high - -### Decision Pack Contract & Approval Workflow -- **Type**: workflow automation / human-in-the-loop governance contract -- **Source**: roadmap update 2026-04-25 — Human-in-the-Loop Autonomous Governance (Microsoft-first, Provider-extensible Decision-Based Operating) -- **Problem**: A decision inbox is only useful if each item contains enough context to make a safe decision. Without a structured decision pack, operators still have to manually correlate drift, findings, evidence, operations, provider state, risk acceptance, and recommended action before approving or rejecting work. -- **Why it matters**: Human-in-the-loop governance depends on trustworthy, reviewable decision packages: what happened, why it matters, what evidence supports it, what options exist, what the system recommends, what confidence/freshness applies, and what will happen if the operator approves. This is the bridge between detection and controlled execution. -- **Proposed direction**: - - define a Decision Pack contract with summary, impact, affected tenants/policies, source signals, evidence links, confidence/freshness, recommended action, available actions, and expected execution path - - include before/after evidence requirements where an approved action triggers follow-up execution - - require human approval for tenant-changing, customer-facing, or risk-accepting actions - - route approved follow-up through OperationRuns or controlled workflows rather than direct UI-side execution - - audit detection, recommendation, approval/rejection, execution, verification, and evidence attachment - - keep Microsoft-specific details contextual while preserving provider-neutral subject/action vocabulary where possible -- **Scope boundaries**: - - **In scope**: Decision Pack data contract, approval state machine, action registry for first safe actions, audit events, OperationRun handoff, evidence requirements, and tests - - **Out of scope**: autonomous remediation, broad policy engine, multi-approver enterprise workflow, advanced AI recommendation engine, external ticketing deep sync, or automatic legal/customer commitments -- **Acceptance points**: - - a decision pack can be generated for at least one high-value decision source such as critical drift, expiring risk acceptance, failed compare, or review-pack readiness - - the pack shows summary, impact, evidence, source records, recommendation, confidence/freshness, and available actions - - approval/rejection/snooze/assign actions are audited - - tenant-changing or customer-facing actions require explicit approval before execution - - approved execution creates or references an OperationRun or controlled workflow record - - verification and before/after evidence can be attached or requested where applicable -- **Risks / open questions**: - - Too much context can overwhelm operators; the pack must be concise with progressive disclosure - - Recommendations must not overstate certainty; confidence/freshness must be visible - - AI-generated recommendations should remain optional and clearly marked until AI governance boundaries are mature -- **Dependencies**: Decision-Based Governance Inbox v1, Support Diagnostic Pack, Product Knowledge & Contextual Help, OperationRun link contract, Findings workflow, StoredReports / EvidenceItems, Operational Controls & Feature Flags, audit log foundation -- **Related specs / candidates**: AI-Assisted Customer Operations, Operator Explanation Layer, Humanized Diagnostic Summaries for Governance Operations, Provider-Backed Action Preflight and Dispatch Gate Unification, Customer Lifecycle Communication -- **Strategic sequencing**: Should follow or pair with Governance Inbox v1. The inbox defines the work queue; decision packs make each item decision-ready. -- **Priority**: high - -### Governance Automation Policy Guardrails v1 -- **Type**: automation policy / safety guardrails / future autonomous governance foundation -- **Source**: roadmap update 2026-04-25 — Human-in-the-Loop Autonomous Governance (Microsoft-first, Provider-extensible Decision-Based Operating) -- **Problem**: As TenantPilot moves from detection to guided action, there will be pressure to automate more of the workflow. Without explicit automation policy guardrails, the product risks drifting into unsafe autopilot behavior or, conversely, never automating safe low-risk follow-up. -- **Why it matters**: The product promise is not blind automation. It is accountable governance with human approval where risk matters. Automation policies should define what can be auto-created, auto-assigned, auto-snoozed, auto-notified, or auto-executed, and where approval is mandatory. -- **Proposed direction**: - - define automation policy guardrails for decision item creation, grouping, assignment, notifications, snoozing, ticket creation, review-pack generation, compare runs, and future remediation execution - - classify actions by risk: informational, workflow-only, customer-facing, tenant-changing, risk-accepting, or destructive - - require approval for tenant-changing, customer-facing, risk-accepting, or destructive actions - - support workspace-level policy defaults and optional stricter tenant-level overrides later - - audit policy changes and automation outcomes - - integrate with Operational Controls & Feature Flags so automation can be paused safely -- **Scope boundaries**: - - **In scope**: first automation policy model, action risk taxonomy, approval-required rules, audited policy changes, and enforcement for a small set of workflow-safe actions - - **Out of scope**: full rules engine, customer-authored automation scripting, autonomous remediation, complex multi-step playbooks, or cross-provider policy marketplace -- **Acceptance points**: - - automation policies can distinguish safe workflow automation from approval-required actions - - at least one safe action can run automatically and at least one risky action is blocked until approval - - policy changes are audited with actor, reason, scope, and timestamp - - disabled automation states are clear to operators - - tests prove tenant-changing and risk-accepting actions cannot bypass approval through automation -- **Risks / open questions**: - - Premature policy complexity could slow delivery; start with a small risk taxonomy and a few actions - - Workspace vs tenant policy inheritance must be handled carefully to avoid surprising behavior - - Automation policy should align with future MSP baseline inheritance and customer override semantics -- **Dependencies**: Decision-Based Governance Inbox v1, Decision Pack Contract & Approval Workflow, Operational Controls & Feature Flags, RBAC/capabilities, audit log foundation, Customer Lifecycle Communication -- **Related specs / candidates**: Human-in-the-Loop Autonomous Governance, MSP Portfolio Dashboard, Rollouts v1, Customer Review Workspace v1, AI-Assisted Customer Operations -- **Strategic sequencing**: Medium-high. It should not precede decision inbox and decision pack foundations, but it should land before any autonomous or semi-autonomous remediation features. -- **Priority**: medium-high - -> Recommended sequence for this cluster: -> 1. **Decision-Based Governance Inbox v1** -> 2. **Decision Pack Contract & Approval Workflow** -> 3. **Governance Automation Policy Guardrails v1** -> -> Why this order: first create the decision queue, then make each item decision-ready with evidence and approval semantics, then introduce explicit automation policy guardrails before expanding toward semi-autonomous execution. - -### System Panel Least-Privilege Capability Model -- **Type**: security hardening / platform-plane RBAC -- **Source**: full codebase quality audit 2026-04-25 — tenant/workspace-plane isolation is strong, but System Panel directory visibility is intentionally global and currently gated by coarse platform capabilities -- **Problem**: The System Panel currently exposes global workspace and tenant directory views through broad platform capabilities. This is acceptable for trusted platform superadmins and break-glass operators, but too coarse for enterprise-grade least-privilege support roles, audit expectations, and future support delegation. -- **Why it matters**: TenantPilot has strong tenant/workspace isolation elsewhere. If the platform plane remains coarse, the product has an uneven security story: customer-facing tenant access is tight, while internal/operator metadata visibility can still be broader than necessary. Enterprise customers, MSP operators, and auditors will expect support roles to see only the minimum system metadata needed for their task. -- **Proposed direction**: - - split broad System Panel directory visibility into more granular platform capabilities - - distinguish System Panel access, workspace directory visibility, tenant directory visibility, operations visibility, support diagnostics, and break-glass access - - keep platform superadmin and emergency break-glass behavior intact - - enforce the new boundaries server-side on System Panel pages, not only through navigation hiding - - add explicit tests for restricted platform users so unrelated workspace/tenant metadata cannot be enumerated accidentally -- **Candidate capabilities**: - - `platform.system.access` - - `platform.workspaces.view` - - `platform.tenants.view` - - `platform.operations.view` - - `platform.support_diagnostics.view` - - `platform.break_glass.use` -- **Scope boundaries**: - - **In scope**: System Panel page access, platform capability split, server-side authorization checks, navigation visibility alignment, audit-friendly role behavior, and regression tests for non-superadmin platform users - - **Out of scope**: redesigning tenant/workspace membership RBAC, changing admin-panel tenant isolation semantics, removing break-glass, adding impersonation, or building a full support-role management UI unless explicitly needed for test fixtures -- **Acceptance points**: - - existing platform superadmin behavior remains intact - - a platform user with only workspace-directory visibility cannot view tenant-directory pages - - a platform user with only tenant-directory visibility cannot view workspace-directory pages unless explicitly granted - - operations visibility is separately controllable from directory visibility - - System Panel pages return forbidden or not-found consistently when capability is missing - - tests prove navigation hiding is not the only protection -- **Risks / open questions**: - - Over-fragmenting capabilities could make platform-user administration noisy before there is a polished role UI - - The product needs an explicit decision on whether support diagnostics can reveal tenant metadata without full tenant-directory access - - Break-glass behavior must remain simple, auditable, and unmistakably separate from normal support access -- **Dependencies**: `PlatformCapabilities`, System Panel providers/pages, platform-user model/policies, existing System Directory tests, existing tenant/workspace isolation tests -- **Related specs / candidates**: enterprise auth structure, platform superadmin / break-glass rules, RBAC hardening, System Directory residual surface tests -- **Strategic sequencing**: First item in this cluster because it is the only finding with direct enterprise security / least-privilege implications. -- **Priority**: high - -### Audited Support Sessions / Assisted Tenant Access -- **Type**: security hardening / platform-plane-to-tenant-plane access boundary -- **Source**: product candidate 2026-04-27 — explicit separation between Platform Control Plane (`/system`) and Customer/Tenant Admin Plane (`/admin`) -- **Problem**: Platform operators sometimes need tenant-context visibility for support, but the current control-plane/admin-plane split makes a plain `Open in /admin` affordance misleading for platform-only users. Granting permanent tenant memberships or hidden cross-tenant superuser access would solve the support friction in the wrong way by collapsing least privilege, auditability, and customer trust. -- **Why it matters**: TenantPilot needs an enterprise-safe answer to "how can support look at a tenant?" The answer cannot be silent impersonation or blanket `/admin` access. It must be explicit, tenant-scoped, reason-bound, time-limited, visible in the UI, and fully auditable. -- **Proposed direction**: - - introduce a `support_sessions` model bound to one workspace/tenant, one platform user, one mode, one reason, and one expiry - - allow support sessions to be started only from `/system`, never implicitly from `/admin` - - make read-only the default and smallest promotable slice; keep elevated support as a separately capability-gated follow-up or tightly bounded extension - - show a persistent non-dismissible `/admin` banner with workspace/tenant, platform user, mode, reason, expiry, and `End session` - - thread `support_session_id` and support-context metadata into audit events and any allowed elevated mutations - - replace ambiguous `Open in /admin` affordances with decision-based copy: real tenant membership -> `Open in tenant admin`; support-capable platform user -> `Start support session`; otherwise -> `Admin access requires tenant membership` - - enforce expiry and scope server-side so a session cannot cross tenants or remain valid after `expires_at` -- **Candidate capabilities**: - - `platform.support_sessions.view` - - `platform.support_sessions.start` - - `platform.support_sessions.end` - - `platform.support_sessions.end_any` - - `platform.support_sessions.elevate` - - `platform.support_sessions.audit_view` -- **Scope boundaries**: - - **In scope**: support-session model/status lifecycle, required reason, duration defaults/limits, start/end/expiry workflow, visible `/admin` support banner, read-only enforcement for support sessions, audit events, link/CTA semantics, and tests for isolation and expiry - - **Out of scope**: silent impersonation as a tenant user, permanent platform-user tenant memberships, Entra-side membership automation, full customer approval workflow, recording/screenshots, or broad privileged write access by default -- **Acceptance points**: - - a platform user still has no automatic `/admin` access without real tenant membership or an active support session - - a support-capable platform user can start a time-limited session for one target workspace/tenant with a mandatory reason - - read-only support sessions can open tenant/admin pages and inspect relevant records but cannot trigger mutations such as restore, settings change, exception creation, membership change, or other tenant-changing actions - - the `/admin` banner remains visible for the full session and shows enough information to make the access state unmistakable - - ended or expired sessions immediately lose access server-side and emit explicit audit events - - tests prove a support session cannot be reused for another tenant or hidden behind UI-only checks -- **Risks / open questions**: - - v1 should likely stay read-only to avoid collapsing this candidate into a broader privileged-write workflow - - Elevated support may be necessary later, but it needs its own capability, shorter expiry, stronger audit semantics, and likely separate prioritization - - The product must decide whether some diagnostics/support surfaces belong in `/system` instead of requiring tenant-plane access at all - - Support-session audit records must remain visibly distinct from real customer-user actions -- **Dependencies**: System Panel Least-Privilege Capability Model, tenant/admin access boundary, platform-user capability model, audit log foundation, workspace/tenant scoping helpers, support-diagnostics surfaces -- **Related specs / candidates**: Support Diagnostic Pack, In-App Support Request with Context, Operational Controls & Feature Flags, Security Trust Pack Light, platform superadmin / break-glass rules -- **Strategic sequencing**: Immediately after System Panel least-privilege hardening. First shrink coarse platform visibility, then introduce an explicit audited bridge for rare tenant-context support access instead of relying on hidden superuser semantics. -- **Priority**: high - -### Static Analysis Baseline for Platform Code -- **Type**: quality gate / developer experience hardening -- **Source**: full codebase quality audit 2026-04-25 — the repo has strong Pest and lane-based tests but no visible PHPStan/Larastan/Psalm/Rector gate -- **Problem**: Runtime tests and feature tests are strong, but the codebase lacks a visible static-analysis baseline. In a growing Laravel / Filament / Livewire codebase with large services and resources, relying only on runtime tests leaves type drift, unsafe API usage, dead paths, and refactoring regressions too easy to introduce. -- **Why it matters**: TenantPilot is increasingly agent-assisted and spec-driven. Agents can move quickly, but without static analysis they can also reinforce invalid assumptions across dynamic Laravel boundaries. A pragmatic static-analysis gate gives both humans and agents a fast feedback loop before full suites run. -- **Proposed direction**: - - add Larastan/PHPStan configuration for `apps/platform` - - start at a realistic level rather than attempting perfect strictness on day one - - generate an explicit baseline if existing findings are too broad for immediate cleanup - - make CI fail on new non-baselined findings - - document the local and CI workflow for developers and repo agents - - track baseline reduction as a future maintenance path rather than bundling all fixes into this spec -- **Scope boundaries**: - - **In scope**: PHPStan/Larastan setup, baseline generation if needed, CI integration, developer documentation, and a small number of configuration fixes required to make analysis meaningful for Laravel/Filament patterns - - **Out of scope**: fixing all existing static-analysis findings, broad refactoring, Rector-driven code rewrites, changing app architecture, or blocking unrelated feature delivery on full strictness immediately -- **Acceptance points**: - - static analysis runs locally for `apps/platform` - - static analysis runs in CI or the active repository pipeline - - existing accepted findings are captured in a reviewed baseline - - new non-baselined findings fail the quality gate - - README, handover, or developer docs explain how to run and update the baseline - - configuration accounts for Laravel, Filament, Eloquent factories, and dynamic container usage where appropriate -- **Risks / open questions**: - - Starting too strict could create a large noisy cleanup spec instead of a useful guardrail - - Starting too loose could give false confidence without catching meaningful drift - - The repo must decide whether PHPStan/Larastan is enough initially or whether Rector belongs in a later separate modernization lane -- **Dependencies**: current Composer tooling, Pest lanes, Gitea workflows, `apps/platform/phpunit.xml`, developer documentation -- **Related specs / candidates**: Architecture Boundary Guard Tests, codebase quality hardening, CI/DX hardening -- **Strategic sequencing**: Second item in this cluster. It should land before broad hotspot refactors so those refactors have stronger safety rails. -- **Priority**: high - -### Architecture Boundary Guard Tests -- **Type**: architecture hardening / regression guardrail -- **Source**: full codebase quality audit 2026-04-25 — product tests are strong, but architecture-level enforcement is still thin compared with the size and complexity of the codebase -- **Problem**: The repo has strong feature, RBAC, browser, and operation-flow tests, but only limited architecture-boundary enforcement. As the platform grows, Filament UI, services, jobs, provider code, models, support registries, and operation-run semantics can drift silently unless dependency and responsibility rules are executable. -- **Why it matters**: TenantPilot already has clear architectural intent: UI should not become provider-write logic, jobs should delegate business logic, platform and tenant capabilities should remain separate, and operation-run semantics should stay service-owned. Without guard tests, these principles remain review conventions and can be weakened by future agent-led changes. -- **Proposed direction**: - - introduce architecture tests that encode the most important dependency and responsibility boundaries - - start with high-signal rules rather than broad brittle pattern matching - - baseline or explicitly document accepted legacy violations - - connect new tests to the active quality-gate lane - - use the tests as a safety rail before decomposing large Filament/service hotspots -- **Candidate guardrails**: - - Filament Resources must not directly perform provider writes - - Filament Resources must not own large workflow orchestration - - Jobs should delegate business logic to services or handlers - - provider-specific code must not leak into neutral platform domains - - Models must not depend on Filament - - Services must not depend on Filament Resources - - Support registries must not depend on UI classes - - platform capabilities and tenant/workspace capabilities must remain separated - - OperationRun lifecycle and outcome semantics stay service-owned -- **Scope boundaries**: - - **In scope**: executable architecture tests, pragmatic baselines/exceptions, dependency-direction checks, responsibility-boundary checks, and CI integration - - **Out of scope**: perfect clean-architecture purity, mass refactoring to satisfy idealized rules, changing Laravel/Filament conventions where the framework reasonably expects dynamic coupling, or enforcing line-count thresholds as the only quality metric -- **Acceptance points**: - - architecture tests run locally and in CI - - new violations for selected boundaries fail tests - - accepted existing violations are explicitly documented with exit paths or reasons - - tests protect at least UI/provider, model/UI, service/UI, platform-capability, and OperationRun ownership boundaries - - the rules are specific enough to guide future agent work without blocking legitimate Laravel/Filament usage -- **Risks / open questions**: - - Over-broad static rules may produce noise and encourage blanket exceptions - - Some legacy hotspots may need temporary exceptions until decomposition specs land - - The tests should complement, not duplicate, PHPStan/Larastan -- **Dependencies**: Static Analysis Baseline for Platform Code, current architecture test setup, existing Action Surface guard tests, platform capability registry, provider contracts -- **Related specs / candidates**: Static Analysis Baseline for Platform Code, Filament Hotspot Decomposition Foundation, RestoreService Responsibility Split, Provider Boundary Hardening -- **Strategic sequencing**: Third item in this cluster. It can begin alongside static analysis but should be in place before large decomposition work accelerates. -- **Priority**: high - -### Filament Hotspot Decomposition Foundation -- **Type**: maintainability hardening / UI architecture -- **Source**: full codebase quality audit 2026-04-25 — several Filament Resources/Pages are large multi-responsibility hotspots despite an otherwise structured architecture -- **Problem**: Several Filament surfaces have grown into large classes that combine table/query construction, form or infolist schema, action definitions, presentation rules, state labels, authorization glue, notifications, and workflow orchestration. This does not make the codebase bad, but it increases review cost, bus-factor risk, regression risk, and future feature cost. -- **Known hotspots**: - - `ManagedTenantOnboardingWizard.php` - - `TenantResource.php` - - `FindingResource.php` - - `RestoreRunResource.php` -- **Why it matters**: Filament is the primary operator UI. If every major surface keeps accumulating local query, action, presenter, and workflow code, the admin experience becomes hard to evolve safely. This is especially risky in an agent-led workflow where large files encourage local patching rather than clean extraction. -- **Proposed direction**: - - define a repeatable decomposition pattern for large Filament Resources and Pages - - extract complex query builders into dedicated query/read-model objects where useful - - extract action construction into action builder classes or surface-specific action objects - - extract badge, label, state, and helper-text rules into presenters - - extract complex form/infolist/table section schemas into reusable schema builders - - keep routes, resource names, permissions, and user-facing behavior unchanged during the foundation slice - - adopt the pattern on one representative Resource first before migrating all hotspots -- **First adoption target**: Prefer `FindingResource.php` or `TenantResource.php` as the first representative target because both expose dense operator-facing surfaces and repeated action/presentation/query patterns. -- **Scope boundaries**: - - **In scope**: decomposition pattern, first representative Resource/Page adoption, tests proving behavior is unchanged, and one or more architecture guardrails that prevent immediate regression - - **Out of scope**: broad UI redesign, changing product behavior, permission-semantic changes, schema changes, visual redesign, or mass migration of every large Filament surface in one spec -- **Acceptance points**: - - selected Resource/Page loses meaningful line count without changing behavior - - extracted classes have clear responsibility names and are easier to test or review - - existing UI/feature tests pass unchanged or are updated only for intentional structure-aware guardrails - - new or updated architecture tests prevent action/query/presenter logic from growing back into the Resource in the same form - - the resulting pattern is documented so future specs and agents can reuse it -- **Risks / open questions**: - - Extracting too aggressively could create more indirection than clarity - - Extracting too little would reduce line count without actually improving responsibility boundaries - - Choosing the first adoption surface matters; a volatile feature surface may make behavior-preserving decomposition harder -- **Dependencies**: Static Analysis Baseline for Platform Code, Architecture Boundary Guard Tests, existing Filament resource tests, action-surface guard tests -- **Related specs / candidates**: Record Page Header Discipline & Contextual Navigation (Spec 192), Monitoring Surface Action Hierarchy & Workbench Semantics (Spec 193), Governance Friction & Operator Vocabulary Hardening (Spec 194), RestoreService Responsibility Split -- **Strategic sequencing**: Fourth item in this cluster. It should follow static analysis and initial architecture guardrails so the extraction work is safer and easier to review. -- **Priority**: high - -### RestoreService Responsibility Split -- **Type**: maintainability hardening / safety-critical workflow architecture -- **Source**: full codebase quality audit 2026-04-25 — restore logic is safety-critical but currently concentrated in a large service hotspot -- **Problem**: `RestoreService.php` has grown into a large multi-responsibility class. Restore is one of TenantPilot's highest-risk workflows because it can affect customer tenant state. Concentrating preview, validation, payload mapping, provider writes, operation tracking, result normalization, and failure classification in one service increases regression risk and makes review harder. -- **Why it matters**: Restore is not just another CRUD operation. Operators need predictable preview/apply semantics, accurate failure handling, and auditable operation results. A large service can still work, but it becomes increasingly difficult to change safely, especially as provider-backed actions and restore semantics mature. -- **Proposed direction**: - - keep `RestoreService` as a thin application-facing facade if preserving its public API is useful - - extract restore preview calculation into a focused collaborator - - extract restore payload mapping into provider-aware mappers - - extract restore validation / precondition checks into a dedicated validator or gate - - extract provider write execution into explicit execution handlers - - extract restore result normalization and failure classification into focused components - - preserve existing OperationRun and audit semantics -- **Target responsibility slices**: - - restore preview calculation - - restore payload mapping - - restore validation and preconditions - - provider write execution - - restore operation/run tracking - - restore result normalization - - restore failure classification -- **Scope boundaries**: - - **In scope**: internal responsibility split, behavior-preserving tests, collaborator extraction, thin facade preservation where appropriate, and restore-specific architecture guardrails - - **Out of scope**: changing restore UI, changing provider behavior, changing restore operation semantics, adding new restore features, broad provider abstraction redesign, or rewriting the restore engine from scratch -- **Acceptance points**: - - `RestoreService.php` becomes materially smaller - - each extracted class has one clear responsibility - - existing restore tests pass - - new tests cover at least preview, validation/preconditions, provider write execution, and failure/result handling boundaries - - OperationRun lifecycle and audit behavior remain unchanged - - the public restore workflow remains behavior-compatible unless an explicit spec requirement says otherwise -- **Risks / open questions**: - - Restore has real execution risk; decomposition must be behavior-preserving and heavily tested - - Poor extraction could hide execution order or transactional semantics across too many classes - - Provider-boundary cleanup and restore decomposition must be coordinated so neither creates competing abstractions -- **Dependencies**: Static Analysis Baseline for Platform Code, Architecture Boundary Guard Tests, restore tests, OperationRun semantics, Provider Boundary Hardening -- **Related specs / candidates**: Restore Lifecycle Semantic Clarity, Provider-Backed Action Preflight and Dispatch Gate Unification (Spec 216), Provider Boundary Hardening (Spec 237), Filament Hotspot Decomposition Foundation -- **Strategic sequencing**: Fifth item in this cluster. It should follow or run shortly after the generic quality gates, but it can be promoted earlier if restore changes become frequent. -- **Priority**: high - -> Recommended sequence for this cluster: -> 1. **System Panel Least-Privilege Capability Model** -> 2. **Audited Support Sessions / Assisted Tenant Access** -> 3. **Static Analysis Baseline for Platform Code** -> 4. **Architecture Boundary Guard Tests** -> 5. **Filament Hotspot Decomposition Foundation** -> 6. **RestoreService Responsibility Split** -> -> Why this order: first close the coarse platform-capability gap, then add an explicit audited bridge for rare tenant-context support access, then add quality gates, then protect architecture boundaries, and only then start behavior-preserving decomposition of the largest UI/service hotspots. This avoids a broad rewrite while directly addressing the audit's highest-leverage security and maintainability risks. - - -> Platform Hardening — OperationRun UX Consistency cluster: these candidates prevent OperationRun-starting features from drifting into surface-local UX behavior. The goal is not to rebuild the Operations Hub, progress system, or notification architecture in one step. The immediate priority is to make OperationRun start UX contract-driven so new features cannot hand-roll local toasts, operation links, browser events, and queued-notification decisions independently. - -### OperationRun Start UX Contract -- **Type**: hardening / architecture guardrail -- **Source**: OperationRun UX consistency analysis 2026-04-24 — `Refresh evidence` creates `tenant.evidence.snapshot.generate` runs correctly but does not consistently expose the same start UX as other OperationRun-backed flows -- **Problem**: OperationRun lifecycle state and terminal notifications are partially centralized, but the start UX is still assembled per Filament surface. Different flows independently decide local toast copy, `Open operation` links, run-enqueued browser events, dedup/no-op messaging, artifact links, and queued DB-notification behavior. This creates UX drift whenever a new feature starts, deduplicates, blocks, or links to an OperationRun. -- **Why it matters**: TenantPilot uses OperationRun as canonical execution truth. If every feature surface composes its own start UX, operators receive inconsistent signals for the same execution concept, and agent-led development will repeatedly miss one of the required pieces. Governance flows such as evidence refresh, review-pack generation, baseline capture/compare, inventory sync, backup, and restore need one reusable start contract instead of surface-local conventions. -- **Proposed direction**: - - introduce a central OperationRun start result contract that can represent new run queued, already queued, already running, already available/deduped, blocked, and failed-to-start states - - introduce a central presenter that turns the start result into consistent Filament notifications, `Open operation` links, artifact links, and browser-event decisions - - keep OperationRun detail links routed through the existing canonical link resolver rather than manually composing URLs - - keep queued DB-notifications explicit opt-in; do not globally enable queued database notifications as part of this slice - - preserve existing terminal notification behavior through the central OperationRun lifecycle - - migrate `Refresh evidence` as the first adoption and `Review pack generate` or `Create snapshot` as the second adoption - - add a guard test that prevents new hand-rolled OperationRun start-success UX in selected Filament/Livewire surfaces unless a spec explicitly allows an exception -- **Scope boundaries**: - - **In scope**: OperationRun start result contract, generic start-result presenter, `Refresh evidence` adoption, one second adoption, `Open operation` action consistency, run-enqueued browser event dispatch, queued DB-notification opt-in decision, dedup/no-op copy, guard-test coverage, and minimal spec/template guidance where appropriate - - **Out of scope**: generic progress-system redesign, `BulkOperationProgress` rename or rewrite, global queued notification policy, migration of every existing OperationRun start surface, new OperationRun types, and broad Operations Hub redesign -- **Acceptance points**: - - `Refresh evidence` distinguishes new-run from dedup/already-available outcomes in its returned start result and UI copy - - a new `tenant.evidence.snapshot.generate` run started from the UI exposes an `Open operation` action and dispatches the central run-enqueued browser event - - unchanged fingerprint / already available snapshot does not claim that a new run was queued, links the existing snapshot, and does not dispatch a run-enqueued event - - queued DB-notification remains opt-in and is not accidentally enabled globally - - terminal completed/failed/blocked notification behavior does not regress - - at least one second flow, preferably `Review pack generate`, proves the contract is not evidence-specific - - a guard test blocks new local OperationRun start-success toasts that bypass the central presenter, with explicitly documented legacy exceptions -- **Risks / open questions**: - - Over-broad static guard rules could flag legitimate non-OperationRun notifications; the first guard should be pragmatic and exception-based rather than attempting perfect static analysis - - Provider-backed flows already have a stronger gate/presenter pattern; the new generic contract should reuse or align with that pattern rather than create a competing presenter stack - - Existing manual flows should not all be migrated in this spec, otherwise the slice becomes too large -- **Dependencies**: `OperationRunService`, `OperationRunLinks`, `OperationRunUrl`, `OperationUxPresenter`, `ProviderOperationStartResultPresenter`, `OpsUxBrowserEvents`, `EvidenceSnapshotService`, `ReviewPackService`, existing OperationRun notification tests, existing link contract guard tests -- **Related specs / candidates**: Operation Run Link Contract Enforcement (Spec 232), Operation Run Active-State Visibility & Stale Escalation (Spec 233), Provider-Backed Action Preflight and Dispatch Gate Unification (Spec 216), OperationRun Notification Lifecycle, Generic Active Run Surface, OperationRun Startsurface Migration -- **Strategic sequencing**: First item in the OperationRun UX Consistency cluster. It should land before additional large evidence, review, baseline, or governance operations are added, so new OperationRun-backed features inherit the same start contract from the beginning. -- **Priority**: high - -### Generic Active Run Surface -- **Type**: hardening / monitoring UX -- **Source**: OperationRun UX consistency analysis 2026-04-24 — current active-run surface is historically named `BulkOperationProgress` and does not clearly separate true progress metrics from active-run visibility -- **Problem**: The global run/progress surface is historically shaped around bulk operations, while the product now has many OperationRun types. Some runs have meaningful progress metrics, while others only have queued/running/completed state. The current surface can be refreshed by run-enqueued browser events, but not every run-starting flow dispatches those events, and the widget does not consistently model whether a run is progress-capable or active-status-only. -- **Why it matters**: Operators need immediate confidence that a started operation is visible and still active, but the UI must not imply fake percent progress for jobs that do not expose real progress. As OperationRun becomes the execution truth for evidence, reviews, baselines, inventory, backup, restore, and provider checks, the active-run surface must communicate the right level of detail without noise. -- **Proposed direction**: - - classify OperationRun types by display capability: progress-capable, active-status-only, or terminal-only - - decide whether `BulkOperationProgress` remains bulk-specific, is wrapped by a generic active-run surface, or is gradually renamed/refactored - - display progress bars only for runs with real progress counters - - display simple queued/running indicators for short or active-status-only runs - - ensure run-enqueued events from the OperationRun Start UX Contract refresh the active-run surface reliably - - keep tenant/workspace filtering explicit so the surface never leaks cross-context runs -- **Scope boundaries**: - - **In scope**: active-run surface semantics, progress-capability classification, polling rules, browser-event refresh behavior, active-status-only rendering, and tenant/workspace filtering - - **Out of scope**: OperationRun start contract itself, DB-notification lifecycle policy, external alerting, and migration of all start surfaces -- **Acceptance points**: - - every displayed run type is classified as progress-capable or active-status-only - - runs without real progress metrics are not rendered with misleading percent/progress values - - active run polling remains quiet when no relevant active runs exist - - run-enqueued events refresh the surface consistently for relevant active runs - - tenant/workspace scoping is enforced in queries and tests -- **Dependencies**: OperationRun Start UX Contract, `BulkOperationProgress`, `ActiveRuns`, `OpsUxBrowserEvents`, `OperationRun`, `OperationRunType` -- **Related specs / candidates**: Operation Run Active-State Visibility & Stale Escalation (Spec 233), OperationRun Start UX Contract, OperationRun Startsurface Migration -- **Strategic sequencing**: Second item in the OperationRun UX Consistency cluster, after the start contract establishes when run-enqueued events should be emitted. -- **Priority**: medium-high - -### OperationRun Notification Lifecycle -- **Type**: hardening / notification policy -- **Source**: OperationRun UX consistency analysis 2026-04-24 — queued DB-notifications exist but are not broadly used, while terminal notifications are handled centrally through the OperationRun lifecycle -- **Problem**: OperationRun notifications currently mix clear terminal lifecycle behavior with unclear queued/running policy. Queued DB-notifications are technically available but intentionally opt-in and rarely used in app code. Terminal completed/failed/blocked messaging is more centralized, but failed and blocked states are still communicated through a shared terminal notification path rather than a clearly documented lifecycle policy. -- **Why it matters**: Persisted DB-notifications can quickly become noisy in an MSP/operator product. At the same time, long-running or critical operations may need durable notifications beyond local toasts. The product needs an explicit policy for when a run deserves a persisted queued/running/terminal notification, who receives it, and how duplicates are prevented. -- **Proposed direction**: - - define a lifecycle notification policy for queued, running, completed, failed, blocked, and any relevant stale/canceled states - - keep queued DB-notifications opt-in unless the spec defines precise criteria for enabling them - - clarify whether terminal states should continue through a shared `OperationRunCompleted` channel or split failed/blocked into explicit notification classes - - define recipient rules: initiator-only, workspace members, tenant members, or capability-based recipients - - define duplicate-prevention and one-terminal-notification guarantees - - decide whether Filament database notifications should poll or remain manually refreshed -- **Scope boundaries**: - - **In scope**: DB-notification lifecycle policy, recipient policy, queued opt-in criteria, terminal notification semantics, duplicate prevention, and panel polling decision - - **Out of scope**: Teams/email alert routing, findings escalation rules, external webhook/PSA notifications, generic progress surface, and the OperationRun start-result contract itself -- **Acceptance points**: - - lifecycle notification policy is explicit for queued/running/completed/failed/blocked states - - queued DB-notifications remain controlled and are not globally enabled accidentally - - terminal notifications are emitted exactly once per relevant run lifecycle - - failed and blocked terminal messaging is clear to operators - - recipient selection is documented and tested - - Filament DB-notification polling is intentionally enabled or intentionally disabled with tests -- **Dependencies**: OperationRun Start UX Contract, `OperationRunService`, `OperationRunQueued`, `OperationRunCompleted`, `OperationUxPresenter`, Filament panel providers -- **Related specs / candidates**: Findings Notifications & Escalation v1 (Spec 224), Findings Notification Presentation Convergence (Spec 230), OperationRun Start UX Contract, Generic Active Run Surface -- **Strategic sequencing**: Third item in the OperationRun UX Consistency cluster. It should follow the start contract so local start toasts and persisted DB-notifications remain clearly separated. -- **Priority**: medium - -### OperationRun Startsurface Migration -- **Type**: hardening / migration slice -- **Source**: OperationRun UX consistency analysis 2026-04-24 — many existing OperationRun-starting surfaces already implement partial local patterns and should be migrated gradually after the shared contract exists -- **Problem**: Even after a central OperationRun Start UX Contract exists, older start surfaces will continue to contain manual toast, link, dedup, browser-event, and artifact-link behavior. If they remain indefinitely, they become examples for future features and keep the UX inconsistent. -- **Why it matters**: The contract prevents new drift, but existing drift still affects daily operator experience. A controlled migration strand lets the product converge without turning the initial contract spec into an oversized refactor. -- **Proposed direction**: - - inventory OperationRun-starting surfaces and classify them as migrated, legacy/manual allowed, out of scope, or not actually OperationRun-starting - - reduce guard-test exceptions over time - - migrate flows in priority order: remaining Evidence/Review Pack surfaces, Baseline Capture, Baseline Compare, Inventory Sync, Backup Schedule, Backup Set bulk operations, Restore Execute, Provider Operations if still needed - - for each migrated flow, route toast, `Open operation`, artifact link, browser event, and queued DB-notification decision through the central contract - - preserve existing operation semantics while removing surface-local UX composition -- **Scope boundaries**: - - **In scope**: migration of existing OperationRun start surfaces to the PH.1 contract, reduction of guard exceptions, tests per migrated flow, and documentation of any permanent exceptions - - **Out of scope**: new OperationRun architecture, generic active-run surface design, notification lifecycle redesign, broad UI redesign, and changing job execution semantics -- **Acceptance points**: - - an inventory of known OperationRun-starting surfaces exists and is kept near the guard test or migration documentation - - migrated surfaces no longer hand-roll the combination of start toast, operation link, and browser event - - dedup/already-running/blocked states are represented through the central contract where applicable - - guard-test exceptions are reduced or justified with spec references - - each migrated flow has new-run and dedup/already-running tests where applicable -- **Dependencies**: OperationRun Start UX Contract; optionally Generic Active Run Surface and OperationRun Notification Lifecycle for later migration waves -- **Related specs / candidates**: OperationRun Start UX Contract, Generic Active Run Surface, OperationRun Notification Lifecycle, Provider-Backed Action Preflight and Dispatch Gate Unification (Spec 216) -- **Strategic sequencing**: Fourth item in the OperationRun UX Consistency cluster. It should not block PH.1 but should be used to retire manual legacy patterns incrementally. -- **Priority**: medium - -> Recommended sequence for this cluster: -> 1. **OperationRun Start UX Contract** -> 2. **Generic Active Run Surface** -> 3. **OperationRun Notification Lifecycle** -> 4. **OperationRun Startsurface Migration** -> -> Why this order: first establish the mandatory start contract and guardrails, then clarify active-run visibility, then define durable notification policy, and only then migrate remaining legacy/manual surfaces in controlled waves. - -> Provider-boundary / future-provider portability cluster: these candidates are intentionally **not** a multi-cloud execution program. The goal is to keep Microsoft-first hotspots small at shared platform seams so a later second provider remains a bounded follow-up instead of a rewrite. Current product truth stays Microsoft-first; these candidates only harden where provider-specific semantics are at risk of becoming platform-core truth. - -### Provider Identity & Target Scope Neutrality -- **Type**: hardening / identity boundary -- **Source**: provider portability audit 2026-04-23 — `ProviderConnection` is generic by name but still carries Entra-shaped identity and target-scope semantics -- **Problem**: Provider connection persistence, validation, and UI currently mix generic provider language with Microsoft-specific tenant-identity concepts. The result is a boundary that looks generic while still encoding one provider's identity model as shared truth. -- **Why it matters**: Identity and target scope are true foundation hotspots. If they stay Microsoft-shaped in shared persistence and shared UI contracts, later provider expansion becomes schema and workflow surgery instead of a bounded follow-up. -- **Proposed direction**: - - separate provider-neutral connection / target-scope truth from provider-specific identifiers and verification metadata - - move Microsoft-specific descriptors into provider-owned metadata or explicitly named provider-specific fields only where current-release truth requires them - - harden create/edit/view flows so shared platform language stays generic while Microsoft specifics remain contextual and bounded -- **Scope boundaries**: - - **In scope**: `ProviderConnection` semantics, create/edit/view vocabulary, provider target-scope descriptors, validation shape - - **Out of scope**: full provider marketplace, multi-provider onboarding, or broad credential-model redesign beyond the boundary cleanup -- **Dependencies**: Provider Boundary Hardening, current `ProviderConnection` model/resource/migration shape -- **Priority**: high - -### Governance Subject Taxonomy Decoupling -- **Type**: hardening / platform vocabulary -- **Source**: provider portability audit 2026-04-23 — governance subject taxonomy still derives too directly from Microsoft/Intune policy semantics -- **Problem**: Governed-subject categories, capability language, and cross-cutting governance taxonomies still lean on provider-specific policy vocabulary more than platform-owned subject vocabulary. -- **Why it matters**: This is a platform-core hotspot. If the governance subject taxonomy remains provider-shaped, later provider support forces rework across evidence, compare, review, and navigation surfaces instead of one bounded taxonomy seam. -- **Proposed direction**: - - define platform-owned governed-subject terms first and map provider-specific policy families into them - - keep provider-specific policy metadata in adapters or registries instead of turning it into platform-visible truth - - align filters, query keys, summaries, and operator vocabulary to the same governed-subject contract -- **Scope boundaries**: - - **In scope**: taxonomy ownership, platform-visible query/state keys, governed-subject vocabulary at shared surfaces - - **Out of scope**: full provider-agnostic policy engine, broad inventory-model redesign, or a generic ontology framework -- **Dependencies**: Provider Boundary Hardening; should align with any broader platform vocabulary work instead of creating a competing taxonomy effort -- **Priority**: high - -### Compare Strategy Boundary Hardening -- **Type**: hardening / strategy boundary -- **Source**: provider portability audit 2026-04-23 — compare strategy selection still falls back to Intune-first behavior at a shared orchestration seam -- **Problem**: Compare strategy selection and compare orchestration still assume one provider family at a seam that will eventually need to stay explicit about provider-specific logic. -- **Why it matters**: A second provider is not needed now, but shared compare orchestration must not quietly become "Intune compare with generic names" if the platform is meant to keep the door open. -- **Proposed direction**: - - keep provider-specific compare logic behind explicit strategy contracts - - harden registry/orchestration behavior so unsupported subject/provider combinations fail explicitly instead of inheriting Intune semantics by fallback - - preserve current-release Microsoft behavior without promoting it to platform-default truth -- **Scope boundaries**: - - **In scope**: registry fallback semantics, unsupported-path handling, compare orchestration contract boundaries - - **Out of scope**: implementing non-Microsoft compare engines or redesigning baseline compare product semantics end to end -- **Dependencies**: Governance Subject Taxonomy Decoupling and existing compare cleanup work -- **Priority**: medium - -### Provider Surface Vocabulary & Descriptor Cleanup -- **Type**: hardening / UX vocabulary -- **Source**: provider portability audit 2026-04-23 — supposedly generic provider surfaces still expose Microsoft-first descriptors and defaults -- **Problem**: Shared resource/page vocabulary still uses labels such as `Entra tenant ID` or single-provider defaults on surfaces that are otherwise framed as generic provider management. -- **Why it matters**: This is lower risk than contract or taxonomy drift, but it trains contributors and operators to treat provider-specific language as the platform default and makes later cleanup broader than necessary. -- **Proposed direction**: - - keep shared page/resource vocabulary generic by default - - render provider-specific descriptors contextually when the selected provider is Microsoft - - remove single-provider defaults from supposedly generic filter/field contracts unless the surface is intentionally provider-owned -- **Scope boundaries**: - - **In scope**: provider connection labels, descriptors, filters, helper copy, and contextual provider-specific presentation rules - - **Out of scope**: broad copy-only polishing unrelated to provider boundaries or a full platform terminology rewrite -- **Dependencies**: Provider Identity & Target Scope Neutrality -- **Priority**: medium - -> Recommended sequence for this cluster: -> 1. **Provider Identity & Target Scope Neutrality** -> 2. **Governance Subject Taxonomy Decoupling** -> 3. **Compare Strategy Boundary Hardening** -> 4. **Provider Surface Vocabulary & Descriptor Cleanup** -> -> Why this order: Provider Boundary Hardening is now specced as the first bounded anti-drift pass. The remaining items keep that foundation from being weakened again at identity scope, governance taxonomy, compare fallback, and shared provider-facing vocabulary. - -### Workspace Access Context and Navigation Cost Hardening -- **Type**: hardening -- **Source**: admin/workspace access-path analysis 2026-03-28 — repeated current-workspace, membership, navigation-visibility, and policy-adjacent access resolution across admin requests -- **Problem**: TenantPilot already has request-local caching in some capability resolvers, but the wider workspace/admin access path still pays a repeated request tax. Current workspace resolution, workspace membership lookups, navigation visibility checks, page access checks, and policy-adjacent access helpers can rebuild overlapping context multiple times before the actual screen content has even rendered. The issue is not one slow page; it is a hidden cost shape spread across many admin requests. -- **Why it matters**: As admin, monitoring, review, evidence, and future portfolio/workspace surfaces grow, this hidden context tax will compound across almost every workspace-scoped request. Left unbounded, it also increases the risk of access logic drifting into scattered local helpers instead of one explicit request-level contract. -- **Proposed direction**: - - Introduce an explicit request-scoped workspace access context that carries the current workspace ID/model, the membership decision, and any capability-access snapshot needed for repeated checks - - Harden `currentWorkspace()` or equivalent paths so the active workspace model is request-stable instead of repeatedly reloaded - - Make navigation visibility, resource visibility, page access helpers, and similar admin-panel checks consume the shared access context rather than rebuilding workspace/membership state locally - - Reuse the same context in policy-side or policy-adjacent workspace access decisions where repeated lookup is currently common - - Keep cross-panel workspace-aware transitions aligned with the same context contract rather than introducing special-case handoff helpers -- **Scope boundaries**: - - **In scope**: request-scoped workspace access context, current-workspace reuse, membership reuse, navigation/page-access context reuse, and migration of at least a subset of admin-sensitive helpers to the shared path - - **Out of scope**: RBAC redesign, capability-semantic changes, navigation IA restructuring, tenant-panel RBAC rewrite, or product-model changes to workspace-first behavior -- **Acceptance points**: - - The current workspace is not separately loaded multiple times within the same request path - - Repeated workspace-scoped access checks reuse the same membership/access context instead of rebuilding it - - At least two admin-sensitive request paths are migrated to the shared access context - - Navigation visibility uses request-wide reusable workspace context rather than repeated local lookups - - Access semantics remain unchanged while the request-path cost is hardened -- **Risks / open questions**: - - A wrong or overly broad shared context could create subtle access bugs - - The boundary between session-persisted workspace choice and request-scoped access context must stay explicit - - Over-centralization could hide legitimate special cases if exceptions are not consciously modeled -- **Suggested order**: third, after the derived-state and tenant-aggregate foundation work has clarified the shared request-scoped patterns. -- **Priority**: medium - -### Tenant Draft Discard Lifecycle and Orphaned Draft Visibility -- **Type**: hardening -- **Source**: domain architecture analysis 2026-03-16 — tenant lifecycle vs onboarding workflow lifecycle review -- **Problem**: TenantPilot correctly separates durable tenant lifecycle (`draft`, `onboarding`, `active`, `archived`) from onboarding workflow lifecycle (`draft` → `completed` / `cancelled`), but there is no end-of-life path for abandoned draft tenants. When all onboarding sessions for a tenant are cancelled, the tenant reverts to `draft` and remains visible indefinitely without a semantically correct cleanup action. Archive/restore do not apply (draft tenants have no operational data worth preserving), and force delete requires archive first (which is semantically wrong for a provisional record). Operators cannot remove orphaned drafts. -- **Why it matters**: Without a discard path, abandoned draft tenants accumulate as orphaned rows in the tenant list. This creates operator confusion (draft vs. archived vs. active ambiguity), data hygiene issues, and forces operators to either ignore stale records or misuse lifecycle actions that don't fit the domain semantics. The gap also makes tenant list UX harder to trust for enterprise operators managing many tenants. -- **Proposed direction**: - - Introduce a canonical **draft discardability contract** (central service/policy, not scattered UI visibility logic) that determines whether a draft tenant may be safely removed, considering linked onboarding sessions, downstream artifacts, and operational traces - - Add a **discard draft** destructive action for tenant records in `draft` status with no resumable onboarding sessions, gated by the discardability contract, capability authorization (`tenant.delete` or a dedicated `tenant.discard_draft`), and confirmation modal - - Add an **orphaned draft indicator** to the tenant list/detail views — visual distinction between a resumable draft (has active session) and an abandoned draft (all sessions terminal or none exist) - - Emit a **distinct audit event** (`tenant.draft_discarded`) separate from `tenant.force_deleted`, capturing workspace context, tenant identifiers, linked session state, and acting user - - Preserve and reinforce the existing domain separation: `archive/restore/force_delete` remain reserved for durable tenant lifecycle; `cancel/delete` remain reserved for onboarding workflow lifecycle; `discard` is the new end-of-life action for provisional drafts -- **Key domain rules**: - - `archive` = preserve durable tenant for compliance while removing from active use - - `restore` = reactivate an archived durable tenant - - `force delete` = permanently destroy an already archived durable tenant - - `discard draft` = permanently remove a provisional tenant that never became a durable operational entity - - Draft tenants must NOT become archivable or restorable -- **Safety preconditions for discard**: tenant is in `draft` status, not trashed, no resumable onboarding sessions exist, no accumulated operational data (no policies, backups, operation runs beyond onboarding) -- **Out of scope**: automatic cleanup without operator confirmation, retention policy for cancelled onboarding sessions, changes to the 4-state tenant lifecycle enum, changes to the 7-state onboarding session lifecycle enum -- **Dependencies**: Spec 140 (onboarding lifecycle checkpoints — already shipped), Spec 143 (tenant lifecycle operability context semantics) -- **Related specs**: Spec 138 (draft identity), Spec 140 (lifecycle checkpoints), Spec 143 (lifecycle operability semantics) -- **Priority**: medium - -### Baseline Capture Truthful Outcomes and Upstream Guardrails -- **Type**: hardening -- **Source**: product/domain analysis 2026-03-21 — baseline capture no-op success case (Operation Run #112), follow-up to 2026-03-08 discovery "Drift engine hard-fail when no Inventory Sync exists" -- **Problem**: Baseline Capture can currently finish as `Completed / Succeeded` even when no credible baseline was produced. In the observed case, the run accepted a completed-but-blocked upstream Inventory Sync, resolved `subjects_total = 0`, captured `items_captured = 0`, reused an empty snapshot, and still presented a successful baseline capture. The current contract treats all-zero capture as a benign empty result instead of a prerequisite or trust-semantics problem. -- **Why it matters**: This is a governance credibility issue, not just a debugging bug. A green "Baseline capture succeeded" state implies that TenantPilot established or refreshed a trustworthy baseline artifact. When the actual result is "no usable inventory basis" or "no in-scope subjects", operators, auditors, and MSP reviewers are misled. False-green run outcomes weaken operations transparency, baseline trust, and the product's auditability story. -- **Proposed direction**: - - **Precondition guardrails for `baseline.capture`**: require a *usable* upstream inventory basis, not merely the existence of the latest `OperationRun` with `status = completed`. `outcome` and coverage usability must be part of the decision. - - **Stricter upstream source selection**: Baseline Capture should select the latest *credible* Inventory Sync, not simply the latest completed one. `blocked` and `failed` inventory runs must not be accepted as baseline inputs. - - **Truthful outcome semantics**: define stable terminal behavior for no-data and bad-upstream cases: - - no inventory exists → `blocked` - - latest inventory run is `blocked` → `blocked` - - latest inventory run is `failed` → `blocked` - - inventory is valid, but zero relevant subjects resolve → `partially_succeeded` - - success is reserved for captures that produce a usable baseline result - - **Stable reason-code contract**: add baseline-capture-specific reason codes for blocked/no-data outcomes so operator messaging, audit logs, and run detail pages are deterministic instead of heuristic. - - **Empty snapshot semantics**: an empty snapshot must not be silently treated as a credible active baseline. If zero-subject capture remains representable for audit/history reasons, it must be visibly marked as a no-data artifact and must not auto-promote to `active_snapshot_id` by default. - - **Operator-facing run UX**: the Operation Run detail page should lead with a primary message such as `No baseline was captured`, `Latest inventory sync was blocked`, `Run tenant sync first`, or `No subjects were in scope`, instead of allowing an all-zero run to read as neutral or successful. Raw JSON/context remains secondary. - - **All-zero count visibility rule**: `0 / 0 / 0 / 0` counts must not read as semantically blank. If a run completes with all-zero counts, the surface must explain *why*. -- **Key domain decisions to encode**: - - `baseline.capture` must not accept a fachlich unbrauchbarer upstream inventory run - - `status = completed` alone is insufficient for capture eligibility - - `subjects_total = 0` must not render as a full success state - - empty or reused empty snapshots are not inherently trustworthy baseline artifacts -- **Scope boundaries**: - - **In scope**: baseline capture runtime preconditions, inventory-run eligibility for capture, capture outcome mapping, baseline-capture reason codes, empty-snapshot promotion rules, operation-run detail messaging for baseline capture, focused start-surface preflight/copy if needed - - **Out of scope**: full redesign of the entire `OperationRun` platform, broad rewrite of inventory coverage semantics, generalized platform-wide no-data heuristics for every operation type, drift-engine compare semantics (except where this candidate explicitly borrows its warning/reason-code pattern), roadmap-wide operations naming or badge redesign -- **Strategic importance**: This is a small-scope but high-trust hardening item. It protects one of TenantPilot's core governance promises: that a baseline artifact is meaningful, reviewable, and safe to reason about. It also strengthens MSP/operator confidence that Monitoring surfaces are operationally truthful, not cosmetically green. -- **Roadmap fit**: Aligns with **Active / Near-term — Governance & Architecture Hardening** (canonical run-view trust semantics) and acts as follow-through on **Baseline Drift Engine (Cutover)** by tightening the capture side of baseline truth, not just the compare side. -- **Acceptance points**: - - A capture run without a credible inventory basis cannot finish as `succeeded` - - A capture run with valid inventory but zero in-scope subjects ends as `partially_succeeded` with explicit reason code - - Operation Run detail for baseline capture exposes primary cause + next action before raw context payloads - - Existing tests that encode "empty capture succeeds" are replaced with truthful-outcome coverage - - Empty snapshots are not silently promoted as active baselines unless explicitly allowed and visibly marked by the spec -- **Risks / open questions**: - - Whether zero-item snapshots should still be persisted as audit traces or suppressed entirely needs one explicit product decision; recommendation is "persist only if visibly marked and not auto-promoted" - - There is already a broader inconsistency in `blocked` / `failed` / `skipped` semantics across some operation families (notably Inventory Sync job vs service path). This candidate should mention that risk but remain tightly scoped to Baseline Capture truthfulness. - - If product wants stale successful inventory fallback instead of strict "latest credible only", that needs an explicit rule rather than hidden fallback behavior. -- **Dependencies**: Baseline drift engine stable (Specs 116–119), inventory coverage context, canonical operation-run presentation work (Specs 054, 114, 144), audit log foundation (Spec 134), tenant operability and execution legitimacy direction (Specs 148, 149) -- **Related specs / candidates**: Spec 101 (baseline governance), Specs 116–119 (baseline drift engine), Spec 144 (canonical operation viewer context decoupling), Spec 149 (queued execution reauthorization), `discoveries.md` entry "Drift engine hard-fail when no Inventory Sync exists" - -> **Operator Truth Initiative — Sequencing Note** -> -> The operator-truth work now has two connected lanes: a shared truth-foundation lane and a governance-surface compression lane. Together they address the systemic gap between backend truth richness and operator-facing truth quality without forcing operators to parse raw internal semantics. -> -> **Recommended order:** -> 1. **Operator Outcome Taxonomy and Cross-Domain State Separation** — defines the shared vocabulary, state-axis separation rules, and color-severity conventions that all other operator-facing work references. This is the smallest deliverable (a reference document + restructuring guidelines) but the highest-leverage decision. Without it, the other candidates will invent local vocabularies that diverge. -> 2. **Operator Reason Code Translation and Humanization Contract** — defines the translation bridge from internal codes to operator-facing labels using the Outcome Taxonomy's vocabulary. Can begin in parallel with the taxonomy using pragmatic interim labels, but final convergence depends on the taxonomy. -> 3. **Governance Artifact Truthful Outcomes & Fidelity Semantics** — establishes the full internal truth model for governance artifacts, keeping existence, usability, freshness, completeness, publication readiness, and actionability distinct. -> 4. **Operator Explanation Layer for Degraded / Partial / Suppressed Results** — defines the cross-cutting interpretation layer that turns internal truth dimensions into operator-readable explanation: multi-dimensional outcome separation (execution, evaluation, reliability, coverage, action), shared explanation patterns for degraded/partial/suppressed states, count semantics rules, "why no findings" patterns, and reliability visibility. Consumes the taxonomy, translation, and artifact-truth foundations; provides the shared explanation pattern library that compression and humanized summaries adopt on their respective surfaces. Baseline Compare is the golden-path reference implementation. -> 5. **Governance Operator Outcome Compression** — now promoted to Spec 214, applying the foundation and explanation layer to governance workflow surfaces so lists and details answer the operator's primary question first while preserving diagnostics as second-layer detail. -> 6. **Humanized Diagnostic Summaries for Governance Operations** — now promoted to Spec 220 (`governance-run-summaries`), the run-detail explainability companion to compression that makes governance run detail self-explanatory using the explanation patterns established in step 4. -> 7. **Provider-Backed Action Preflight and Dispatch Gate Unification** — now promoted to Spec 216, extending the proven Gen 2 gate pattern to all provider-backed operations and establishing a shared result presenter as the adjacent hardening lane. -> -> **Why this order rather than the inverse:** The semantic-clarity audit classified the taxonomy problem as P0 (60% of warning badges are false alarms — actively damages operator trust). Reason code translation creates the shared human-facing language. Spec 158 establishes the correct internal truth engine for governance artifacts. The Operator Explanation Layer then defines the cross-cutting interpretation patterns that all downstream surfaces need — the systemwide rules for how degraded, partial, suppressed, and incomplete results are separated, explained, and acted upon. Compression and humanized summaries are adoption slices that apply those patterns to specific surface families (governance artifact lists and governance run details respectively). Gate unification remains highly valuable but is a neighboring hardening lane. -> -> **Why these are not one spec:** Each candidate has a different implementation surface, different stakeholders, and different shippability boundary. The taxonomy is a cross-cutting decision document. Reason code translation touches reason-code artifacts and notification builders. Spec 158 defines the richer artifact truth engine. The Operator Explanation Layer defines the shared interpretation semantics and explanation patterns. Governance operator outcome compression is a UI-information-architecture adoption slice across governance artifact surfaces. Humanized diagnostic summaries are an adoption slice for governance run-detail pages. Gate unification touches provider dispatch and notification plumbing across ~20 services. Merging them would create an unshippable monolith. Keeping them sequenced preserves independent delivery while still converging on one operator language. - -> Architecture contract-enforcement cluster: these candidates come from the targeted repository drift audit on 2026-04-22. Most are intentionally narrower than naming, presentation, or IA cleanup work. The shared contracts already exist; the gap is that they are not yet treated as mandatory on every platform-owned path. The operation-type candidate is the deliberate exception because the audit found two active competing semantic contracts, not just a missing guardrail. - -### Canonical Operation Type Source of Truth -- **Type**: hardening / source-of-truth decision -- **Source**: strict read-only legacy / compatibility audit 2026-04-22; operation-type drift review -- **Problem**: The repo still carries two competing operation-type languages. `OperationCatalog` and several UI or read-model paths prefer canonical dotted names, while persistence, runtime writes, tests, registries, and supporting services still rely on historical underscore values. This is no longer just a thin alias shim; it is parallel semantic truth. -- **Why it matters**: This is now the strongest active compatibility debt in the repo. As long as underscore and dotted forms remain co-equal in specs, code, filters, registries, and tests, every new operation type or execution surface can reinforce drift instead of converging on one contract. -- **Goal**: Define one repo-wide normative operation-type language and make explicit which form is persisted, written, resolved at boundaries, and reflected in specs, resources, and tests. -- **In scope**: - - explicit decision between underscore and dotted operation-type language as repo-wide truth - - normative persistence, write, read, and resolution contract for operation types - - cleanup or narrowing of the current alias matrix in `OperationCatalog` - - convergence of `OperationCatalog`, `OperationRunType`, resources, supporting services, specs, and tests - - guardrails that prevent new dual-semantics operation types from being introduced without an explicit exit path -- **Out of scope**: cosmetic label-only renaming, generic repo-wide naming cleanup outside operation types, provider identity redesign, and Baseline Scope V2. -- **Key requirements**: - - exactly one normative operation-type language must exist - - the persisted and written truth must be explicit rather than inferred - - underscore and dotted forms must not remain permanent parallel truths - - any remaining compatibility boundary must be explicit, narrow, and exit-bounded - - specs, code, resources, and tests must converge on the same contract -- **Boundary with Operations Naming Harmonization Across Run Types, Catalog, UI, and Audit**: naming harmonization owns visible operator vocabulary and naming grammar. This candidate owns the underlying semantic and persistence contract for operation types. -- **Boundary with Operator Presentation & Lifecycle Action Hardening**: presentation hardening owns operator-facing labels and badges. This candidate owns the operation-type truth that capability, triage, provider, and related behavior decisions depend on. -- **Dependencies**: `OperationCatalog`, `OperationRunType`, capability/triage/provider decision points, operation resources and link helpers, and any repository guardrail infrastructure introduced by Spec 201. -- **Strategic sequencing**: Third step of the repository cleanup strand, after `Dead Transitional Residue Cleanup` and `Onboarding State Fallback Retirement`. -- **Priority**: high - -### Platform Vocabulary Boundary Enforcement for Governed Subject Keys -- **Type**: hardening / platform-boundary clarification -- **Source**: targeted repository architecture/pattern-drift audit 2026-04-22; governed-subject vocabulary drift review -- **Problem**: The repo already treats `policy_type` as compatibility vocabulary rather than active platform language, yet platform-visible query keys, page state, filters, and read-model builders still expose `policy_type` alongside or instead of governed-subject terms. Legacy terminology therefore survives not just in storage or adapters, but in platform-visible boundaries. -- **Why it matters**: This undercuts the repo's own vocabulary migration contract. Contributors and operators continue to read an Intune-shaped key as active platform language even where the platform already has canonical governed-subject terms. -- **Proposed direction**: - - inventory platform-visible uses of `policy_type` and adjacent legacy keys in query/state/read-model boundaries - - distinguish allowed compatibility or storage boundaries from platform-visible vocabulary surfaces - - move platform-visible filter/state/query/read-model contracts to canonical governed-subject terminology - - preserve legacy input compatibility only through explicit normalizers or adapters - - add a guardrail that catches new platform-visible legacy key exposure outside allowed boundary zones -- **Explicit non-goals**: Not a full storage-column rename sweep, not a broad Intune debranding project, not a full governance-taxonomy redesign, and not a generic repo-wide terminology cleanup campaign. -- **Boundary with Spec 202 (Governance Subject Taxonomy)**: Spec 202 defines the taxonomy and canonical governed-subject vocabulary. This candidate enforces which keys are allowed to remain platform-visible at runtime boundaries. -- **Boundary with Spec 204 (Platform Core Vocabulary Hardening)**: If Spec 204 remains the active vocabulary-hardening vehicle, this candidate should be absorbed as the governed-subject boundary-enforcement slice rather than promoted as a second parallel vocabulary spec. -- **Dependencies**: Spec 202, `PlatformVocabularyGlossary`, `PlatformSubjectDescriptorNormalizer`, and the baseline/governance builder surfaces that currently expose platform-visible legacy keys. -- **Strategic sequencing**: Third of this cluster. It should follow the operations contract-enforcement work unless Spec 204 is revived sooner and absorbs it directly. -- **Priority**: high - -> Recommended sequence for this cluster: -> 1. **Operation Run Link Contract Enforcement** -> 2. **Canonical Operation Type Source of Truth** -> 3. **Platform Vocabulary Boundary Enforcement for Governed Subject Keys** -> -> If Spec 204 is reactivated as the live vocabulary-hardening vehicle, candidate 3 should fold into that spec instead of creating a competing parallel effort. - -### Baseline Snapshot Fidelity Semantics -- **Type**: hardening -- **Source**: semantic clarity & operator-language audit 2026-03-21 -- **Vehicle**: new standalone candidate -- **Problem**: Baseline snapshots currently conflate renderer maturity, capture depth, metadata-only capture modes, and real API/capture failures into operator-facing fidelity and gap badges. `FidelityState`, `GapSummary`, and related baseline snapshot presentation surfaces make standard-renderer or summary-only situations read like governance warnings. The result is a high-volume false-warning pattern on one of the product's most frequently scanned detail surfaces. -- **Why it matters**: This is the single highest-volume false-warning source identified in the audit. If baseline surfaces keep teaching operators that yellow means "the product used a standard renderer" instead of "you should investigate a governance problem," every later baseline warning becomes less credible. -- **Proposed direction**: Create a bounded baseline-specific follow-up that consumes the Operator Outcome Taxonomy and separates snapshot-internal fidelity semantics from diagnostic renderer metadata. Split gap causes into product limitation vs capture-mode choice vs actual capture problem, move product-support facts to diagnostics, and replace vague snapshot labels such as "Captured with gaps" with truthful operator language tied to the correct axis. -- **Dependencies**: Operator Outcome Taxonomy and Cross-Domain State Separation, Structured Snapshot Rendering (Spec 130) -- **Priority**: high - -### Baseline Compare Scope Guardrails & Ambiguity Guidance -- **Type**: hardening -- **Source**: product/operator-trust analysis 2026-03-24 — baseline compare ambiguity and scope communication review -- **Vehicle**: new standalone candidate -- **Problem**: Baseline Compare currently produces confusing results when the baseline snapshot contains generic Microsoft/Intune default objects or subjects with non-unique display names. Not-uniquely-matchable subjects are surfaced as "duplicates" in the UI, implying operator error even when the root cause is provider-side generic naming. Separate truth dimensions — identity confidence (could subjects be matched uniquely?), evidence fidelity (how deep was the compare?), and result trust (how reliable is the overall outcome?) — are collapsed into ambiguous operator signals such as `No Drift Detected` + `Limited confidence` + `Fidelity: Meta` without explaining whether the issue is baseline scope, generic names, limited compare capability, or actual tenant drift. -- **Why it matters**: Operators reading compare results cannot distinguish between "everything is fine" and "we couldn't compare reliably." False reassurance (`No Drift Detected` at limited confidence) and false blame ("rename your duplicates" when subjects are provider-managed defaults) erode trust in the product's core governance promise. MSP operators managing baselines for multiple tenants need clear signals about what they can rely on and what requires scope curation — not academic-sounding fidelity labels next to misleading all-clear verdicts. -- **Product decision**: Baseline Compare in V1 is designed for uniquely identifiable, intentionally curated governance policies — not for arbitrary tenant-wide default/enrollment/generic standard objects. When compare subjects cannot be reliably matched due to generic names or weak identity, TenantPilot treats this primarily as a scope/suitability problem of the current baseline content and a transparency/guidance topic in the product — not as an occasion for building a large identity classification engine. -- **Proposed direction**: - - **Compare wording correction**: replace pausal "rename your duplicates" messaging with neutral, scope-aware language explaining that some subjects cannot be matched uniquely by the current compare strategy, that this can happen with generic or provider-managed display names, and that the visible result is therefore only partially reliable - - **Scope guidance on compare surfaces**: make explicit that Baseline Compare is for curated governance-scope policies, not for every tenant content. Baseline/capture surfaces must frame Golden Master as a deliberate governance scope, not an unfiltered tenant full-extract - - **Actionable next-step guidance**: when ambiguity is detected, direct operators to review baseline profile scope, remove non-uniquely-identifiable subjects from governance scope, and re-run compare after scope cleanup — not to pauschal rename everything - - **Meta-fidelity and limited-confidence separation**: separate identity-matchability, evidence/compare-depth, and overall result trustworthiness in the communication so operators can tell which dimension is limited and why - - **Conservative framing for problematic V1 domains**: for known generically-named compare domains, allow conservative copy such as "not ideal for baseline compare," "limited compare confidence," "review scope before relying on result" — without introducing deep system-managed default detection - - **Evidence/snapshot surface consistency**: terms like `Missing input`, `Not collected yet`, `Limited confidence` must not read as runtime errors when the actual issue is scope suitability -- **Scope boundaries**: - - **In scope**: compare result warning copy, limited-confidence explanation, next-step guidance, baseline profile/capture scope framing, conservative guardrail copy for problematic domains, evidence/snapshot surface term consistency - - **Out of scope**: comprehensive Microsoft default policy detection, new global identity strategy engine, object-class-based system-managed vs user-managed classification, new deep fidelity matrix for all policy types, automatic exclusion or repair of problematic baseline items, compare engine architecture redesign -- **UX direction**: - - **Bad (current)**: "32 policies share the same display name" / "Please rename the duplicates" / `No Drift Detected` despite `Limited confidence` - - **Good (target)**: neutral, honest, operator-actionable — e.g. "Some policies in the current baseline scope cannot be matched uniquely by the current compare strategy. This often happens with generic or provider-managed display names. Review your baseline scope and keep only uniquely identifiable governance policies before relying on this result." -- **Acceptance criteria**: - - AC1: ambiguous-match UI no longer pauschal blames operators for duplicates without explaining scope/generic-name context - - AC2: limited-trust compare results are visually and linguistically distinguishable from fully reliable results; operators can tell the result is technically complete but content-wise only partially reliable - - AC3: primary V1 guidance directs operators to baseline-scope review/cleanup and re-compare — not to pauschal rename or assume tenant misconfiguration - - AC4: baseline/compare surfaces convey that Golden Master is a curated governance scope - - AC5: `No Drift Detected` at `Limited confidence` is understandable as not-fully-trustworthy, not as definitive all-clear -- **Tests / validation**: - - Warning text for ambiguous matches uses neutral scope/matchability wording - - Next-step guidance points to baseline scope review, not pauschal rename - - `Limited confidence` + `No Drift Detected` is not presented as unambiguous all-clear - - Baseline/compare surfaces include governance-scope hint - - Known compare gaps do not produce misleading "user named everything wrong" messaging - - Existing compare status/outcome logic remains intact - - No new provider-specific special classification logic required for consistent UI -- **Risks**: - - R1: pure copy changes alone might address the symptom too weakly → mitigation: include scope/guidance framing, not just single-sentence edits - - R2: too much guidance without technical guardrails might let operators keep building bad baselines → mitigation: conservative framing and later evolution via real usage data - - R3: team reads this spec as a starting point for large identity architecture → mitigation: non-goals are explicitly and strictly scoped -- **Roadmap fit**: Aligns directly with Release 1 — Golden Master Governance (R1.1 BaselineProfile, R1.3 baseline.compare, R1.4 Drift UI: Soll vs Ist). Improves V1 sellability without domain-model expansion: less confusing drift communication, clearer Golden Master story, no false operator blame, better trust basis for compare results. -- **Dependencies**: Baseline drift engine stable (Specs 116–119), Baseline Snapshot Fidelity Semantics candidate (complementary — snapshot-level fidelity clarity), Spec 161 (operator-explanation-layer), Spec 214 (Governance Operator Outcome Compression) as the complementary governance artifact presentation layer -- **Related specs / candidates**: Specs 116–119 (baseline drift engine), Spec 101 (baseline governance), Baseline Capture Truthful Outcomes candidate, Baseline Snapshot Fidelity Semantics candidate, Spec 161 (operator-explanation-layer), Spec 214 (Governance Operator Outcome Compression) -- **Recommendation**: Treat before any large matching/identity extension. Small enough for V1, reduces real operator confusion, protects against scope creep, and sharpens the product message: TenantPilot compares curated governance baselines — not blindly every generic tenant default content. -- **Priority**: high - -### Restore Lifecycle Semantic Clarity -- **Type**: hardening -- **Source**: semantic clarity & operator-language audit 2026-03-21 -- **Vehicle**: new standalone candidate -- **Problem**: Restore flows currently overload run-level status, item-level result status, preview decision state, and manual follow-up guidance into a vocabulary that is too ambiguous for a safety-critical workflow. "Partial" means different things at different levels, preview and apply states are visually too close, and blocked/manual-required states do not consistently tell operators what happened or what to do next. -- **Why it matters**: Restore is one of the few product areas where semantic ambiguity can directly lead to the wrong remediation action. Operators need to know whether a tenant is in a consistent state, which items were skipped intentionally versus prevented by prerequisites, and whether the next step is retry, manual completion, or no action. -- **Proposed direction**: Define a restore-specific semantic cleanup that consumes the taxonomy and rationalizes run lifecycle, item results, preview decisions, and next-action language without changing restore execution mechanics. The candidate should reduce ambiguous states, quantify mixed outcomes, and move raw technical detail behind secondary diagnostics. -- **Dependencies**: Operator Outcome Taxonomy and Cross-Domain State Separation, restore-run and restore-preview surfaces already delivered through the restore spec family -- **Priority**: high - -### Inventory, Provider & Operability Semantics -- **Type**: hardening -- **Source**: semantic clarity & operator-language audit 2026-03-21 -- **Vehicle**: new standalone candidate -- **Problem**: Inventory KPI badges, provider connection status labels, sync/prerequisite messages, verification outcomes, and onboarding helper messages all reuse warning language for product/platform readiness facts that are not the same as governance problems. "Metadata only," "Needs attention," "Blocked," degraded provider health, and onboarding prerequisite messages currently collapse prerequisite state, stale verification state, and product support tier into one operator-facing severity bucket. -- **Why it matters**: These surfaces shape daily operator trust and first-run product impression. If inventory and onboarding surfaces treat every prerequisite or product-tier distinction like a warning, operators either ignore the guidance or cannot tell what actually needs intervention. -- **Proposed direction**: Create a single operability-semantic candidate that consumes the taxonomy and normalizes how inventory capture mode, provider health/prerequisites, verification results, and onboarding next-step messaging are presented. The candidate should make prerequisite state and recovery guidance explicit while demoting pure product-maturity facts to neutral/diagnostic treatment. -- **Dependencies**: Operator Outcome Taxonomy and Cross-Domain State Separation, provider connection vocabulary/cutover work, onboarding and verification spec family -- **Priority**: medium - -### Tenant Operational Readiness & Status Truth Hierarchy -- **Type**: hardening -- **Source**: product/operator-trust analysis 2026-03-24 — tenant-facing status presentation and source-of-truth hierarchy review -- **Vehicle**: new standalone candidate -- **Problem**: Tenant-facing surfaces expose multiple parallel status domains — lifecycle, legacy app status, provider connection state, provider health, verification report availability, RBAC readiness, and recent run evidence — without a clear hierarchy. Some domains are valid but poorly explained; at least one (`Tenant.app_status`) is legacy/orphaned truth still presented as if authoritative. The combined presentation does not answer the operator's actual question: "Can I trust this tenant right now, and is any action required?" Instead, operators must mentally reconcile six semi-related status fragments with no clear precedence, creating three distinct risks: legacy truth leakage (dead fields displayed as current truth), state collision without hierarchy (valid domains answering different questions but appearing to compete), and support/trust burden (operators asking why a tenant is "active" yet also "unknown," or provider is "connected" but health is "unknown," even when operational evidence proves usability). -- **Why it matters**: TenantPilot is moving further into governance, evidence, reviews, drift, and portfolio visibility. As the product becomes more compliance- and operations-centric, source-of-truth quality on core tenant surfaces becomes more important, not less. If left unresolved: support load scales with tenant count, MSP operators learn to distrust or ignore status surfaces, future governance views inherit ambiguous foundations, and headline truth across baselines, evidence, findings, and reviews remains semantically inconsistent. For an enterprise governance platform, this is a product-truth and operator-confidence issue, not just a wording problem. -- **Core insight**: Not every status belongs at the same level. The product currently exposes multiple truths belonging to different semantic layers: - - **Layer 1 — Headline operator truth**: "Can I work with this tenant, and is action required?" - - **Layer 2 — Domain truth**: lifecycle, provider consent/access, verification, RBAC, recent operational evidence - - **Layer 3 — Diagnostic truth**: low-level or specialized states useful for investigation, not competing with headline summary - - **Layer 4 — Legacy/orphaned truth**: stale, weakly maintained, deprecated, or no longer authoritative fields -- **Proposed direction**: - - **Headline readiness model**: define a single tenant-facing readiness summary answering whether the tenant is usable and whether action is needed. Concise operator-facing states such as: Ready, Ready with follow-up, Limited, Action required, Not ready. - - **Source-of-truth hierarchy**: every tenant-facing status shown on primary surfaces classified as authoritative, derived, diagnostic, or legacy. Authoritative sources: lifecycle, canonical provider consent/access state, canonical verification state, RBAC readiness, recent operational evidence as supporting evidence. - - **Domain breakdown beneath headline**: each supporting domain exists in a clearly subordinate role — lifecycle, provider access/consent, verification state, RBAC readiness, recent operational evidence. - - **Action semantics clarity**: primary surfaces must distinguish between no action needed, recommended follow-up, required action, and informational only. - - **Verification semantics**: UI must distinguish between never verified, verification unavailable, verification stale, verification failed, and verified but follow-up recommended. These must not collapse into ambiguous "unknown" messaging. - - **Provider truth clarity**: provider access state must clearly differentiate access configured/consented, access verified, access usable but not freshly verified, access blocked or failed. - - **RBAC semantics clarity**: RBAC readiness must clearly state whether write actions are blocked, without implying that all tenant operations are unavailable when read-only operations still function. - - **Operational evidence handling**: recent successful operations may contribute supporting confidence, but must not silently overwrite or replace distinct provider verification truth. - - **Legacy truth removal/demotion**: fields that are legacy, orphaned, or too weak to serve as source of truth must not remain prominent on tenant overview surfaces. Explicit disposition for orphaned fields like `Tenant.app_status`. - - **Reusable semantics model**: the resulting truth hierarchy and readiness model must be reusable across tenant list/detail and future higher-level governance surfaces. -- **Functional requirements**: - - FR1 — Single tenant-facing readiness summary answering operability and action-needed - - FR2 — Every primary-surface status classified as authoritative, derived, diagnostic, or legacy - - FR3 — Legacy/orphaned fields not displayed as current operational truth on primary surfaces - - FR4 — No peer-level contradiction on primary surfaces - - FR5 — Verification semantics explicitly distinguishing not yet verified / unavailable / stale / failed / verified with follow-up - - FR6 — Provider access state clearly differentiating configured, verified, usable-but-not-fresh, blocked - - FR7 — RBAC readiness clarifying write-block vs full-block - - FR8 — Operational evidence supportive but not substitutive for verification truth - - FR9 — Actionability clarity on primary surfaces - - FR10 — Reusable semantics for future governance surfaces -- **UX/product rules**: - - Same question, one answer: if several states contribute to the same operator decision, present one synthesized answer first - - Summary before diagnostics: operator summary belongs first, domain detail underneath or behind expansion - - "Unknown" is not enough: must not substitute for not checked, no report stored, stale result, legacy field, or unavailable artifact - - Evidence is supportive, not substitutive: successful operations reinforce confidence but do not replace explicit verification - - Lifecycle is not health: active does not mean provider access is verified or write operations are ready - - Health is not onboarding history: historical onboarding verification is not automatically current operational truth -- **Likely surfaces affected**: - - Primary: tenant detail/overview page, tenant list presentation, tenant widgets/cards related to verification and recent operations, provider-related status presentation within tenant views, helper text/badge semantics on primary tenant surfaces - - Secondary follow-up: provider connection detail pages, onboarding completion/follow-up states, future portfolio rollup views -- **Scope boundaries**: - - **In scope**: truth hierarchy definition, headline readiness model, tenant detail/overview presentation rules, provider state presentation rules on tenant surfaces, verification semantics on tenant surfaces, RBAC relationship to readiness, role of recent operational evidence, legacy truth cleanup on primary tenant surfaces - - **Out of scope**: redesigning OperationRun result semantics in general, revisiting every badge/helper in the product, evidence/reporting semantics outside tenant readiness, changing onboarding lifecycle requirements unless directly necessary for truth consistency, provider architecture overhaul, full data-model cleanup beyond what is needed to remove legacy truth from primary surfaces, full badge taxonomy standardization everywhere, color palette / visual design overhaul, findings severity or workflow semantics, broad IA/navigation redesign, portfolio-level rollup semantics beyond stating compatibility goals -- **Acceptance criteria**: - - AC1: Primary tenant surfaces present a single operator-facing readiness truth rather than several equal-weight raw statuses - - AC2: Lifecycle, provider access, verification, RBAC, and operational evidence shown with explicit semantic roles and no ambiguous precedence - - AC3: Legacy/orphaned status fields no longer presented as live operational truth on primary surfaces - - AC4: System clearly differentiates not yet verified / verification unavailable / stale / failed - - AC5: Operator can tell within seconds whether tenant is usable / usable with follow-up / limited / blocked / in need of action - - AC6: Recent successful operations reinforce confidence where appropriate but do not silently overwrite explicit verification truth - - AC7: Primary tenant status communication suitable for MSP/enterprise use without requiring tribal knowledge to interpret contradictions -- **Boundary with Dead Transitional Residue Cleanup**: That cleanup strand absorbs the earlier quick removal of the single most obvious legacy truth field (`Tenant.app_status`) plus adjacent dead-symbol residue. This candidate defines the broader truth hierarchy and presentation model that decides how all tenant status domains interrelate. The residue cleanup is a subset action that can proceed independently as a quick win; this candidate provides the architectural direction that prevents future truth leakage. -- **Boundary with Provider Connection Status Vocabulary Cutover**: Provider vocabulary cutover owns the `ProviderConnection` model-level status field cleanup (canonical enum sources replacing legacy varchar fields). This candidate owns how provider truth appears on tenant-level surfaces in relation to other readiness domains. The vocabulary cutover provides cleaner provider truth; this candidate defines where and how that truth is presented alongside lifecycle, verification, RBAC, and evidence. -- **Boundary with Inventory, Provider & Operability Semantics**: That candidate normalizes how inventory capture mode, provider health/prerequisites, and verification results are presented on their own surfaces. This candidate defines the tenant-level integration layer that synthesizes those domain truths into a single headline readiness answer. Complementary: domain-level semantic cleanup feeds into the readiness model. -- **Boundary with Spec 161 (Operator Explanation Layer)**: The explanation layer defines cross-cutting interpretation patterns for degraded/partial/suppressed results. This candidate defines the tenant-specific readiness truth hierarchy. The explanation layer may inform how degraded readiness states are explained, but the readiness model itself is tenant-domain-specific. -- **Boundary with Spec 214 (Governance Operator Outcome Compression)**: Governance compression improves governance artifact scan surfaces. This candidate improves tenant operational readiness surfaces. Both reduce operator cognitive load but on different product domains. -- **Dependencies**: Operator Outcome Taxonomy and Cross-Domain State Separation (shared vocabulary foundation), Dead Transitional Residue Cleanup (quick win that removes the most obvious legacy truth plus adjacent dead residue — can proceed independently before or during this spec), Provider Connection Status Vocabulary Cutover (cleaner provider truth sources — soft dependency), existing tenant lifecycle semantics (Spec 143) -- **Related specs / candidates**: Spec 143 (tenant lifecycle operability context semantics), Dead Transitional Residue Cleanup, Provider Connection Status Vocabulary Cutover, Provider Connection UX Clarity, Inventory Provider & Operability Semantics, Operator Outcome Taxonomy (Spec 156), Operation Run Active-State Visibility & Stale Escalation -- **Strategic sequencing**: Best tackled after the Operator Outcome Taxonomy provides shared vocabulary and after Dead Transitional Residue Cleanup removes the most obvious legacy truth and adjacent dead residue as a quick win. Should land before major portfolio, MSP rollup, or compliance readiness surfaces are built, since those surfaces will inherit the tenant readiness model as a foundational input. -- **Priority**: high - -> Findings execution layer cluster: complementary to existing Spec 154 (`finding-risk-acceptance`). Keep these split so prioritization can pull workflow semantics, operator work surfaces, alerts, external handoff, and later portfolio operating slices independently instead of collapsing them into one oversized "Findings v2" spec. - -### Finding Comments & Decision Log v1 -- **Type**: collaboration / audit depth -- **Source**: findings execution layer candidate pack 2026-04-17; operator handoff and context gap analysis -- **Problem**: Audit can show that a transition happened, but not the day-to-day operator reasoning or handover context behind triage, resolve, close, or risk-accept decisions. -- **Why it matters**: Human workflow needs concise contextual notes that do not belong in status fields or reason-code taxonomies. Without them, operator handover quality stays low. -- **Proposed direction**: Add comments and lightweight decision-log entries on findings; surface them in a timeline alongside immutable audit events; use them to support triage, handover, and rationale capture without turning findings into a chat product. -- **Explicit non-goals**: `@mentions`, attachments, chat, and realtime collaboration. -- **Dependencies**: Finding detail surface, audit/timeline rendering, RBAC. -- **Roadmap fit**: Spec-candidate only for now; not required as a standalone roadmap theme. -- **Priority**: medium - -### Findings External Ticket Handoff v1 -- **Type**: external integration / execution handoff -- **Source**: findings execution layer candidate pack 2026-04-17; MSP/enterprise workflow alignment gap -- **Problem**: Enterprise and MSP operators often need to hand findings into external service-desk or PSA workflows, but the current findings model has no first-class outbound ticket link or handoff state. -- **Why it matters**: Outbound handoff is a sellable bridge between TenantPilot governance and existing customer operating models. Without it, findings remain operationally isolated from the systems where remediation work actually gets tracked. -- **Proposed direction**: Add an external ticket reference and link on findings, support simple outbound handoff or "ticket created/linked" flows, and audit those transitions explicitly; make internal versus external execution state visible without promising full synchronization. -- **Explicit non-goals**: Bidirectional sync, OAuth-native integrations, and full ITSM domain modeling. -- **Dependencies**: Findings UI, workspace settings or handoff target configuration, audit events, stable ownership semantics. -- **Roadmap fit**: Future PSA/ticketing lane. -- **Priority**: medium - -### Cross-Tenant Findings Workboard v1 -- **Type**: MSP / portfolio operations -- **Source**: findings execution layer candidate pack 2026-04-17; portfolio-scale findings operations gap -- **Problem**: Once operators manage many tenants, tenant-local inboxes and queues stop scaling. There is no cross-tenant work surface for open findings across a workspace or portfolio. -- **Why it matters**: MSP portfolio work requires cross-tenant prioritization by severity, due date, assignee, and tenant. This is the operational complement to a portfolio dashboard. -- **Proposed direction**: Add a cross-tenant findings workboard or queue for open findings with filters for severity, due date, assignee, tenant, and status; preserve tenant drilldown and RBAC boundaries; position it as the portfolio-operating surface next to the dashboard, not a replacement for per-tenant detail. -- **Explicit non-goals**: Rollout orchestration, full portfolio remediation planning, and team capacity views. -- **Dependencies**: Operator inbox, intake queue, notifications/escalation, workspace-level finding visibility, cross-tenant RBAC semantics. -- **Roadmap fit**: MSP portfolio and operations. -- **Priority**: medium-low - -### Canonical Control Catalog Foundation -- **Type**: foundation -- **Source**: governance-engine gap analysis 2026-04-22, roadmap/principles alignment 2026-04-10, compliance modeling discussion, future framework-oriented readiness planning -- **Vehicle**: new standalone candidate -- **Layer position**: **S1** — normative control core -- **Problem**: TenantPilot already has a real governance engine across baseline profiles, baseline capture and compare, drift findings, findings workflow, exceptions, alerts, stored reports, evidence items, and tenant review packs, but it still lacks the shared canonical object those features should point at. Today the product risks modeling control meaning in three competing places: framework-specific overlays such as NIS2, BSI, ISO, or COBIT mappings; Microsoft service- or subject-specific lists such as Entra, Intune, Exchange, or Purview subjects; or feature-local assumptions embedded separately in baseline, drift, findings, evidence, and report logic. Without a framework-neutral canonical control catalog, the same technical control objective will be duplicated, evidence and control truth will blur together, and later readiness or reporting work will inherit inconsistent semantics. -- **Why it matters**: This is the missing structural bridge between the current governance engine and later compliance-readiness overlays. Operators, customers, and auditors need one stable answer to "what control is this about?" before the platform can credibly say which Microsoft subjects support it, which evidence proves it, which findings violate it, or which frameworks map to it. A canonical control layer prevents framework duplication, keeps control, evidence, finding, exception, and report semantics aligned, and lets the product communicate detectability honestly instead of over-claiming technical verification. -- **Proposed direction**: - - Introduce a framework-neutral canonical control catalog centered on control themes and objectives rather than framework clauses or raw Microsoft API objects - - Define canonical domains and subdomains plus stable product-wide control keys that outlive individual APIs, workloads, or framework versions - - Classify each control by control class, detectability class, evaluation strategy, evidence archetypes, and artifact suitability for baseline, drift, findings, exceptions, reports, and evidence packs - - Add a Microsoft subject-binding layer that links one canonical control to supported subject families, workloads, and signals without collapsing the control model into service-specific schema mirrors - - Start with a deliberately small seed catalog of high-value tenant-near control families such as strong authentication, conditional access, privileged access exposure, guest or cross-tenant boundaries, endpoint compliance and hardening, sharing boundaries, mail protection, audit retention, data protection readiness, and delegated admin boundaries - - Make baseline profiles, compare and drift, findings, exceptions, stored reports, and evidence items able to reference a `canonical_control_key` or equivalent control-family contract instead of each feature inventing local control meaning - - Keep framework mappings as a later overlay: prepare mapping structure now if useful, but do not make NIS2, BSI, ISO, COBIT, or similar frameworks the primary shape of the foundation -- **Scope boundaries**: - - **In scope**: canonical control vocabulary, domain and subdomain taxonomy, stable canonical keys, detectability and evaluation classifications, evidence archetypes, Microsoft subject binding model, a small seed catalog for priority control families, and integration contracts for baseline, findings, exceptions, evidence, and reports - - **Out of scope**: full framework catalogs, full NIS2, BSI, ISO, COBIT, or similar mappings, exhaustive Microsoft service coverage, giant control-library breadth, a full attestation engine, stakeholder-facing readiness or report UI, posture scoring models, or replacing the evidence domain with a second artifact store -- **Explicit non-goals**: - - Not a certification engine or legal interpretation layer - - Not a framework-first registry where the same control is duplicated once per standard - - Not a mirror of raw Microsoft API payload shapes as the product's control model - - Not a CIS-specific baseline library or template pack layer; that belongs above the catalog, not inside it - - Not a requirement to ingest every framework in full before the first useful control family ships - - Not a promise that every control becomes directly technically evaluable; indirect, attested, and external-evidence-only controls remain first-class -- **Acceptance points**: - - The platform can represent canonical domains, subdomains, and controls with stable keys independent of framework source versions - - Every seed control declares control class, detectability class, evaluation strategy, and at least one evidence archetype - - Every seed control can declare whether it is baseline-capable, drift-capable, finding-capable, exception-capable, and report or evidence-pack-capable - - The model can bind one canonical control to multiple Microsoft subject families or signal sources without redefining the control per workload - - Baselines, findings, evidence, exceptions, and later readiness or reporting work have a defined path to consume the canonical control layer instead of hardcoding local control semantics - - The foundation can explicitly represent controls that are direct-technical, indirect-technical, workflow-attested, or external-evidence-only without collapsing them into one false compliant/non-compliant path -- **Boundary with Spec 202 (Governance Subject Taxonomy and Baseline Scope V2)**: Spec 202 defines governed-subject vocabulary and baseline-scope input contracts. This candidate defines the higher-order canonical control objects that can bind to those subject families and later unify baseline, findings, evidence, and reporting semantics above raw governed-subject lists. -- **Boundary with Evidence Domain Foundation**: Evidence Domain Foundation owns evidence capture, completeness, freshness, and immutable artifacts. This candidate owns the canonical control definitions those evidence artifacts can support and the detectability and evaluation semantics that explain what the evidence means. -- **Boundary with Spec 154 (Finding Risk Acceptance Lifecycle)**: Risk Acceptance owns the lifecycle for approved deviations once a finding or control gap exists. This candidate owns the stable control object that exceptions and compensating-control semantics should refer to. -- **Boundary with CIS Baseline Library**: The CIS library owns reusable template packs and benchmark libraries built on top of canonical controls. This candidate owns the control ontology itself and must not absorb CIS-specific expected-state packs into the normative control core. -- **Boundary with Compliance Readiness & Executive Review Packs**: Compliance Readiness owns stakeholder-facing framework and readiness views plus report packaging. This candidate owns the framework-neutral control core those later views should map onto instead of inventing per-framework local logic. -- **Dependencies**: Spec 202 (governed-subject vocabulary), Spec 153 (evidence-domain-foundation) for evidence-contract alignment, Spec 154 (finding-risk-acceptance), baseline and drift foundations, and downstream stored-report or review-pack consumers -- **Related specs / candidates**: Spec 153 (evidence-domain-foundation), Spec 154 (finding-risk-acceptance), Spec 155 (tenant-review-layer), Spec 202 (governance-subject-taxonomy), CIS Baseline Library, Compliance Readiness & Executive Review Packs, Security Posture Signals Foundation, Entra Role Governance, SharePoint Tenant-Level Sharing Governance -- **Strategic sequencing**: This is best treated as the bridge between the current governance engine and later framework-facing readiness work. It should land before substantial NIS2, BSI, ISO, COBIT, or similar mapping and auditor-pack expansion, and ideally before evidence or review surfaces hardcode control meaning locally. -- **Roadmap fit**: Early-R2 foundation layer between the shipped governance engine and later compliance-readiness overlays. This is no longer treated as a late compliance feature; it is the semantic foundation for tenant reviews, evidence packs, readiness views, and later auditor-facing outputs. -- **Priority**: high - -### CIS Baseline Library -- **Type**: feature / library layer -- **Source**: roadmap layering alignment 2026-04-22, baseline-library planning, future benchmark/template packaging -- **Vehicle**: new standalone candidate -- **Layer position**: **S2** — catalog-based template and library layer -- **Problem**: Once TenantPilot has a framework-neutral canonical control catalog, it still needs a reusable library layer for widely recognized baseline packs such as CIS without turning CIS into the product's primary control ontology. Today that distinction does not exist explicitly in the candidate stack. Without a separate library-layer candidate, CIS guidance will tend to leak downward into the canonical catalog or upward into readiness views, blurring three different concerns: what a control is, what a reusable benchmark template recommends, and how a framework-specific readiness statement should be derived. -- **Why it matters**: CIS is valuable to TenantPilot as a reusable template and benchmark library, not as the platform's canonical control object model. MSPs and operators need versioned, explainable baseline packs they can adopt, compare against, and use as a curated starting point. Keeping CIS in a library layer preserves the framework-neutral core, makes benchmark evolution manageable, and avoids letting one external source define the entire product architecture. -- **Proposed direction**: - - Introduce versioned CIS-aligned template packs and baseline libraries that map onto canonical controls rather than redefining them - - Keep library-pack lifecycle explicit: import or activate, preview, diff, archive, and supersede without mutating the underlying control ontology - - Let one library item express expected-state guidance, applicability, severity or importance hints, and subject-level realization on top of the canonical control catalog - - Allow baseline profiles and later compare or reporting features to reference CIS library packs as curated starters or benchmark templates rather than a second control taxonomy - - Preserve room for future non-CIS libraries such as company standards, MSP reference packs, or vertical-specific benchmark packs built on the same catalog -- **Scope boundaries**: - - **In scope**: CIS-aligned library-pack model, versioning and lifecycle, mapping to canonical controls and governed subjects, baseline-template consumption paths, and bounded operator-visible library metadata - - **Out of scope**: replacing the canonical control catalog, full framework readiness mapping, certification semantics, stakeholder-facing readiness reporting, or a generic pack marketplace -- **Explicit non-goals**: - - Not a second control ontology beside the canonical catalog - - Not a readiness or evidence-mapping layer for NIS2, BSI, ISO, or COBIT - - Not a requirement that every canonical control must have a CIS template entry - - Not a forced replacement of operator-defined baseline profiles; library packs remain reusable starting points and references -- **Acceptance points**: - - The platform can represent a CIS library version independently from canonical catalog versions and framework-readiness overlays - - A CIS library entry can point to canonical controls and governed-subject realizations without redefining the control itself - - Baseline workflows can consume CIS library packs as reusable templates or benchmark references without collapsing the product into a CIS-first model - - Library-pack evolution can show added, changed, retired, or superseded guidance without changing historical control meaning - - Future company-standard or MSP-specific libraries can reuse the same template-layer mechanics without inventing another control taxonomy -- **Boundary with Canonical Control Catalog Foundation**: The canonical catalog defines what the control is. The CIS library defines one reusable benchmark or template expression built on top of that control. -- **Boundary with Compliance Readiness & Executive Review Packs**: Compliance Readiness owns mapping, evidence assembly, and readiness statements for frameworks or stakeholder views. The CIS library owns reusable benchmark packs and templates, not readiness scoring or framework interpretation. -- **Dependencies**: Canonical Control Catalog Foundation, Spec 202 (governed-subject vocabulary), baseline and drift foundations, and evidence alignment where benchmark reporting later consumes library references -- **Related specs / candidates**: Canonical Control Catalog Foundation, Compliance Readiness & Executive Review Packs, Spec 202 (governance-subject-taxonomy), Spec 203 (baseline-compare-strategy), company standards / policy quality work -- **Strategic sequencing**: Conceptually this is the S2 layer between the canonical control core and later framework-readiness overlays. It can ship after the control foundation once the catalog and governed-subject bindings are stable enough to host reusable benchmark templates. -- **Roadmap fit**: S2 library layer for reusable benchmark and baseline packs. -- **Priority**: medium-high - -### Compliance Readiness & Executive Review Packs -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, R2 theme completion, product positioning for German midmarket / MSP governance -- **Vehicle note**: Tenant review and publication-readiness semantics should extend existing Spec 155 (`tenant-review-layer`), not become a separate candidate. This candidate remains about broader management/stakeholder-facing readiness outputs beyond the current review-layer spec. -- **Layer position**: **S3** — mapping, evidence, and readiness layer -- **Problem**: TenantPilot is building a strong evidence/data foundation (Spec 153, StoredReports, review pack export via Spec 109, findings, baselines), but there is no product-level capability that assembles this data into management-ready, customer-facing, or auditor-oriented readiness views. Enterprise customers, MSP account managers, and CISOs need structured governance outputs for recurring tenant reviews, audit preparation, and compliance conversations — not raw artifact collections or manual export assembly. The gap is not data availability; it is the absence of a dedicated readiness presentation and packaging layer that turns existing governance evidence into actionable, consumable deliverables. -- **Why it matters**: This is a core product differentiator and revenue-relevant capability for the MSP and German midmarket audience. Without it, TenantPilot remains an operator tool — powerful but invisible to the stakeholders who sign off on governance, approve budgets, and evaluate vendor value. Structured readiness outputs for NIS2, BSI, executive summaries, customer review packs, and later governance-assurance overlays make TenantPilot sellable as a governance review platform, not just a backup and configuration tool. This directly strengthens the MSP sales story for quarterly reviews, security health checks, and audit preparation. -- **Proposed direction**: - - A dedicated readiness/review presentation layer that consumes evidence domain artifacts, findings summaries, baseline/drift posture, permission posture signals, and operational health data - - Management-ready output surfaces: executive summary views, customer-facing review dashboards, and structured readiness pages oriented toward frameworks such as BSI Grundschutz and NIS2 — in a lightweight, non-certification sense (governance evidence, not formal compliance claims) - - Exportable review packs that combine multiple evidence dimensions into a single coherent deliverable (PDF or structured export) for external stakeholders - - Tenant-scoped and workspace-scoped views — individual tenant reviews as well as portfolio-level readiness summaries - - Clear separation from the Evidence Domain Foundation: evidence foundation owns curation, completeness tracking, and artifact storage; compliance readiness owns presentation, assembly, and stakeholder-facing output - - Keep ISO and COBIT in governance-, assurance-, ISMS-, and readiness-oriented overlays rather than introducing them as a separate technical control library - - Readiness views should be composable: an operator selects which dimensions to include in a review pack (e.g. baseline posture + findings summary + permission evidence + operational health) rather than a monolithic fixed report -- **Explicit non-goals**: Not a formal certification engine — TenantPilot does not certify compliance or issue attestations. Not a legal or compliance advice system. Not a replacement for the Evidence Domain Foundation (which owns the data layer). Not a generic BI dashboard or data warehouse initiative. Not a PDF-only export task — the primary value is the structured readiness view, with export as a secondary delivery mechanism. Not a reimplementation of review pack export (Spec 109 handles CSV/ZIP). Not a customer-facing analytics suite. -- **Boundary with Evidence Domain Foundation**: Evidence Domain Foundation = curation, completeness tracking, artifact storage, immutable snapshots. Compliance Readiness = presentation, assembly, framework-oriented views, stakeholder-facing outputs. Evidence Foundation is a prerequisite; Compliance Readiness is a consumer. -- **Boundary with Canonical Control Catalog Foundation**: Canonical Control Catalog Foundation = framework-neutral control core, detectability semantics, and control-to-subject or evidence alignment. Compliance Readiness = framework-aware presentation, rollup, and stakeholder-facing output built on top of that shared control layer. -- **Boundary with CIS Baseline Library**: The CIS library owns reusable template packs and benchmark baselines. Compliance Readiness owns NIS2, BSI, and later governance-assurance overlays that map evidence and control coverage into readiness statements. -- **Dependencies**: Canonical Control Catalog Foundation, Evidence Domain Foundation (data layer), review pack export (Spec 109), findings workflow (Spec 111), baseline/drift engine (Specs 116–119), permission posture (Specs 104/105), audit log foundation (Spec 134) -- **Strategic sequencing**: This is the S3 layer. It should consume the canonical control core and evidence model, and it should remain separate from the CIS template-library layer so benchmark packs and readiness mappings do not collapse into the same object family. -- **Priority**: medium (high strategic value, but depends on evidence foundation maturity) - -### Enterprise App / Service Principal Governance -- **Type**: feature -- **Source**: platform domain coverage planning, governance gap analysis -- **Problem**: TenantPilot covers tenant configuration and governance workflows, but lacks a first-class governance surface for enterprise applications and service principals. Operators cannot easily answer which app identities exist, which ones hold privileged permissions, which credentials are nearing expiry, and where renewal/review workflows are needed. -- **Why it matters**: Enterprise apps and service principals are a major governance and security pain point in Microsoft cloud environments. Expiring secrets/certificates, over-privileged app permissions, and unclear ownership create real audit, operational, and risk-management gaps. This is highly relevant for MSP reviews, customer reporting, and exception workflows. -- **Proposed direction**: Add a governance-oriented domain surface for enterprise applications and service principals, starting with inventory, privileged-permission visibility, expiring credential visibility, ownership/review metadata, alerting hooks, and exception/renewal workflow support. Keep the scope centered on governance and reviewability rather than trying to model all enterprise app administration. -- **Dependencies**: Evidence/reporting direction, alerting foundations, RBAC/capability model, domain coverage strategy -- **Priority**: high - -### SharePoint Tenant-Level Sharing Governance -- **Type**: feature -- **Source**: platform domain coverage planning, audit/compliance positioning -- **Problem**: TenantPilot currently focuses on device and identity governance domains, but does not yet cover one of the most audit-relevant Microsoft 365 data-governance control surfaces: tenant-level SharePoint and OneDrive external sharing settings. Operators lack a governance view for high-risk sharing posture at tenant scope. -- **Why it matters**: Tenant-level sharing controls are central to data exposure, external collaboration, and audit readiness. For many customers, especially compliance-oriented SMB and midmarket environments, these settings are part of the core governance story and should not remain outside the platform's planned coverage. -- **Proposed direction**: Introduce a bounded governance surface for tenant-level SharePoint and OneDrive sharing/access settings, focused on inventory, reviewability, explainability, and later alignment with evidence/reporting workflows. Start at tenant-level controls rather than attempting full site-level administration or a broad SharePoint management surface. -- **Dependencies**: Domain coverage strategy, Microsoft 365 policy-domain expansion, reporting/evidence direction -- **Priority**: medium - -### Entra Role Governance -- **Type**: feature -- **Source**: platform domain coverage planning, identity governance expansion -- **Problem**: TenantPilot does not yet provide a first-class governance surface for Microsoft Entra roles. Built-in roles, custom role definitions, and role assignments are highly relevant for identity governance, but today they are not planned as a dedicated product capability. -- **Why it matters**: Role governance is a central part of tenant security posture, privileged access control, and audit readiness. Customers need visibility into how administrative authority is defined and assigned, especially as Entra role usage grows beyond default out-of-the-box roles. -- **Proposed direction**: Add a first-class Entra role governance capability focused on role definitions and assignments as governable objects. Start with inventory, visibility, and review-oriented explainability. Preserve the possibility of future attestation/review workflows without making them mandatory in V1. -- **Dependencies**: Identity governance expansion, RBAC/capability model, reporting/evidence direction -- **Priority**: medium - -### Security Posture Signals Foundation -- **Type**: feature -- **Source**: platform domain coverage planning, compliance/readiness reporting direction -- **Problem**: TenantPilot's evidence and reporting direction is strong, but high-value security posture signals such as Defender Vulnerability Management exposure data and backup assurance signals are not yet represented as a bounded product capability. This leaves a gap between governance findings and the operational evidence customers want in recurring reviews. -- **Why it matters**: Customers and MSP operators increasingly want proof that security operations are functioning, not just that configurations exist. Exposure trends, vulnerability posture, and backup success/failure signals are highly valuable inputs for executive reviews, customer reporting, and audit preparation. -- **Proposed direction**: Establish a bounded evidence/signal foundation for ingesting, historizing, correlating, and reporting on selected posture signals, starting with Defender Vulnerability Management and backup success/failure/protection-state signals. Keep this clearly in the evidence domain, not the policy domain. -- **Dependencies**: StoredReports/Evidence direction, signal ingestion foundations, reporting/export maturity -- **Priority**: medium - -### Security Suite Layer — Posture Score, Blast Radius, High-Risk Opt-In Controls -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming, roadmap "Security Suite Layer" long-term theme -- **Problem**: TenantPilot's security-related capabilities are growing — findings, baselines, drift detection, permission posture — but they remain siloed as individual data outputs. The Security Posture Signals Foundation candidate addresses lower-level signal ingestion and evidence collection, but there is no product layer that aggregates, interprets, and prioritizes these signals into actionable security posture surfaces for operators. An MSP operator managing twenty tenants cannot currently answer: which tenants have the weakest security posture? Which single misconfiguration has the widest blast radius across users and devices? Which high-risk settings are intentionally enabled vs. accidentally exposed? The gap is not signal availability — it is the absence of a higher-level interpretation and prioritization layer that turns raw posture data into operator-facing security value. -- **Why it matters**: Raw signals and individual findings are necessary but insufficient. Operators, CISOs, and MSP account managers need aggregated, prioritized, and contextualized security views that surface the most consequential risks first. Without this, security-relevant data is scattered across findings tables, drift reports, permission posture views, and evidence exports — forcing operators to mentally assemble a posture picture themselves. A productized security posture layer is the difference between "we collect security data" and "we help you act on the most important risks." This is a strategic differentiator for MSP positioning and enterprise customer conversations where security posture is a recurring review topic. -- **Proposed direction**: - - **Posture scoring or posture rollups**: Tenant-level and optionally workspace-level security posture summaries that aggregate signals from findings, baselines, drift state, permission posture, and (when available) external posture signals into a structured posture indicator. Not a single arbitrary number — a structured rollup showing posture dimensions (configuration compliance, identity hygiene, protection coverage, exposure risk) with clear drill-down paths. The goal is "where should I focus?" not "what is my score?" - - **Blast-radius and impact-oriented interpretation**: For high-severity findings, misconfigurations, or risky conditions, show the scope of impact — how many users, devices, or groups are affected? Which policies target broad populations with permissive or risky settings? Impact context helps operators prioritize consequential risks over technically-severe-but-narrow ones. This is interpretation layered on top of existing assignment and scope data, not a separate data collection effort. - - **High-risk opt-in and guarded enablement surfaces**: Where tenants have intentionally enabled high-risk settings (e.g. broad sharing, disabled MFA for service accounts, permissive conditional access), make these visible as explicit, acknowledged decisions rather than hidden configuration details. Support opt-in acknowledgement patterns where operators confirm that a high-risk condition is intentional versus accidental. This is about operator awareness and explicit decision capture, not about enforcing or blocking configurations. - - **Security prioritization surfaces**: Operator-facing views that rank and filter posture conditions by severity, blast radius, and recency — helping operators focus on the few conditions that matter most rather than reviewing flat lists. Supports "top 5 risks across my portfolio" and "highest-impact unresolved findings" patterns. - - **Tenant-scoped and portfolio-aware**: Security posture is evaluated per tenant; portfolio-level aggregation surfaces which tenants are strongest and weakest for MSP operators managing fleets. Supports fleet-level security posture comparisons and trend tracking over time. -- **Explicit non-goals**: Not a SIEM, SOC, or XDR platform — this is posture interpretation for governance operators, not a security operations center tool. Not a vulnerability management system — TenantPilot does not own vulnerability remediation workflows or patch management. Not a generic security analytics platform or BI dashboard. Not a replacement for Security Posture Signals Foundation — this candidate consumes signals; the foundation candidate collects them. Not a compliance certification engine (Compliance Readiness handles audit-ready reporting). Not a threat detection or incident response system. Not a catch-all security backlog bucket — scope is bounded to posture aggregation, interpretation, prioritization, and guarded-visibility patterns. Not a broad platform hardening initiative — infrastructure, delivery, and application-level hardening are separate candidates. -- **Boundary with Security Posture Signals Foundation**: Security Posture Signals Foundation = signal ingestion, historization, correlation, and evidence-layer representation of security-relevant data (Defender exposure, backup health, etc.). Security Suite Layer = aggregation, interpretation, prioritization, and operator-facing posture value built on top of those signals (and other existing governance data like findings, baselines, and drift). Foundation is the substrate; Suite Layer is the product interpretation. Foundation answers "what signals exist?" Suite Layer answers "what do they mean, and what should I act on first?" -- **Boundary with Compliance Readiness & Executive Review Packs**: Compliance Readiness = framework-oriented, stakeholder-facing reporting and evidence assembly for audit conversations. Security Suite Layer = operator-facing, action-oriented posture interpretation for day-to-day security prioritization. Compliance readiness produces reports; security suite layer guides operational focus. Posture data may feed into compliance readiness outputs, but the two serve different audiences and decision patterns. -- **Boundary with Script & Secrets Governance**: Script & Secrets Governance = lifecycle controls, diff, review, and scanning for high-risk content (scripts, secrets). Security Suite Layer may consume secrets governance findings as posture inputs, but does not own the scanning, diffing, or lifecycle management of scripts and secrets. -- **Boundary with Findings and Baselines**: Findings (Spec 111+) and baselines (Specs 116–119) produce governance data points. Security Suite Layer aggregates and reinterprets those data points through a security-prioritization lens. Findings workflow owns the individual finding lifecycle; security suite layer owns the cross-finding posture picture. -- **Dependencies**: Security Posture Signals Foundation (primary signal source), findings workflow (Spec 111+), baseline/drift engine (Specs 116–119), permission posture (Specs 104/105), RBAC/capability system (066+), audit log foundation (Spec 134) -- **Priority**: medium (high strategic value for MSP positioning and enterprise security conversations, but realistically sequenced after signal foundations and current governance hardening work are stable) - -### Recovery Confidence — Automated Restore Testing & Readiness Reporting -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming ("Killer Feature"), product positioning for enterprise trust and MSP differentiation -- **Problem**: TenantPilot has a mature backup and restore pipeline — including restore preview, dry-run, risk checking, and audit logging — but there is no product-level capability that answers the question "how confident are we that restores will actually succeed when needed?" Backup existence proves data is captured; restore execution proves the mechanism works when manually triggered. Neither proves ongoing recoverability. Operators cannot answer: when was recoverability last validated for a given tenant or policy family? Which restore paths have never been exercised? Which tenants have backup coverage but zero restore confidence? What is the overall recovery posture across the portfolio? The gap is not restore capability — it is the absence of a confidence and readiness layer that continuously proves, measures, and reports on recoverability. -- **Why it matters**: Backup without proven recoverability is a false safety net. Enterprise customers, auditors, and MSP account managers increasingly ask not "do you have backups?" but "can you prove you can recover?" Recovery confidence is the difference between a backup tool and a trusted governance platform. It directly strengthens audit conversations (proving restore paths work), MSP differentiation (recovery readiness as a reportable SLA dimension), and operator trust (visibility into which restore paths are validated vs. assumed). This was identified as a "killer feature" in product brainstorming because it shifts TenantPilot from reactive restore capability to proactive recovery assurance — a category few competitors occupy. -- **Proposed direction**: - - **Automated restore confidence checks**: scheduled or operator-triggered restore validation runs that exercise restore paths without modifying the production tenant — leveraging existing dry-run/preview infrastructure, targeted at proving that backed-up configurations can be successfully restored. Confidence checks produce structured results (pass/fail/partial, coverage, blockers) rather than just logs. - - **Recoverability tracking model**: per-tenant, per-policy-family tracking of when each restore path was last validated, what the result was, and which paths remain unexercised. This is the persistent readiness state, not a one-time report. Tracks coverage (which policy families have been validated), freshness (how recently), and result quality (clean pass vs. partial vs. failed). - - **Restore readiness summaries and reporting**: tenant-level and workspace-level views that show recovery posture — coverage gaps, stale validations, unexercised restore paths, confidence scores or readiness indicators. Exportable for audit evidence, customer reviews, and management reporting. Integrates with the evidence/reporting direction as a high-value signal source. - - **Preflight scoring**: before a real restore is needed, operators can see a structured readiness assessment — which policy families are covered by recent successful validation, which have known blockers, which have never been tested. This turns restore from a "hope it works" moment into a predictable, pre-validated operation. - - **Validation evidence trail**: each confidence check produces an immutable evidence artifact — what was tested, when, by whom, what the result was, what blockers were found. This evidence feeds into review packs, audit conversations, and compliance readiness outputs. The validation run itself is an auditable governance event. - - **Tenant-scoped and portfolio-aware**: recovery confidence is evaluated per tenant; portfolio-level aggregation surfaces which tenants have strong vs. weak recovery posture for MSP operators managing fleets. -- **Explicit non-goals**: Not a rewrite or replacement of the restore engine (Spec 011 and related specs handle restore execution; this handles confidence measurement and readiness reporting on top of that foundation). Not a full disaster-recovery orchestration platform or automated failover system. Not a synthetic test lab that provisions isolated test tenants and deploys configurations into them — confidence checks leverage existing dry-run/preview/validation infrastructure, not a separate execution environment. Not a generic backup-health dashboard — backup health (coverage, freshness, size) is a prerequisite signal, not the same problem as restore confidence (proven recoverability). Not a vague "resilience" umbrella — this is specifically about proving and reporting on restore path readiness. Not a replacement for the Evidence Domain Foundation (which owns artifact curation) or Compliance Readiness (which owns presentation assembly) — recovery confidence produces evidence artifacts that those layers consume. -- **Boundary with restore execution (Spec 011, restore pipeline)**: Restore execution = the mechanism to restore configurations from backup to a tenant. Recovery confidence = the layer that exercises, measures, tracks, and reports on whether those mechanisms are ready and reliable. Execution is the tool; confidence is the proof. -- **Boundary with Security Posture Signals Foundation**: Security Posture Signals = ingestion and historization of external posture data (Defender, backup success/failure signals) as evidence inputs. Recovery Confidence = active validation of restore paths and structured readiness reporting. Posture signals may include backup-health inputs; recovery confidence actively exercises restore paths and produces readiness-specific evidence. Backup health signals are passive; restore confidence checks are active. They are complementary: posture signals feed portfolio health views; recovery confidence proves operational readiness. -- **Boundary with Evidence Domain Foundation**: Evidence Foundation = curation, completeness tracking, immutable artifact storage. Recovery Confidence = produces validation evidence artifacts (confidence check results) that Evidence Foundation may curate and bundle. Recovery confidence is a producer; evidence foundation is a curator/consumer. -- **Boundary with MSP Portfolio Dashboard**: Portfolio Dashboard = fleet-level health aggregation and SLA reporting. Recovery confidence signals (per-tenant readiness posture) are a high-value input to the portfolio dashboard, not a replacement for it. The dashboard consumes; this candidate produces the recovery-specific signal. -- **Dependencies**: Restore pipeline stable (Spec 011 and follow-ups), backup infrastructure mature, dry-run/preview infrastructure (restore preview), audit log foundation (Spec 134), RBAC/capability system (066+), evidence/reporting direction for downstream consumption -- **Priority**: medium (high strategic value and strong product differentiation potential, but depends on restore pipeline maturity and is realistically sequenced after current hardening work) - -### Decision-First Operating Constitution Hardening -- **Type**: foundation -- **Source**: product strategy discussion 2026-04-12, constitution delta analysis against the decision-first operating model -- **Problem**: TenantPilot already has strong constitution rules around truth, action surfaces, and progressive disclosure, but the product identity and review gates needed for a decision-first governance model are still distributed across separate rules. Upcoming MSP portfolio and governance-inbox work could otherwise reproduce entity-first, detail-heavy surfaces under a new label. -- **Why it matters**: This is the rules-before-features step. Without it, later governance automation lands on the wrong UX contract and the product keeps growing by adding more pages instead of reducing operator attention load. -- **Proposed direction**: - - Make TenantPilot's decision-first governance identity explicit in the constitution - - Add workflow-first navigation and "one case, one decision context" as binding principles - - Add an automation guardrail: automation must reduce attention load, not create more UI - - Extend spec/PR review gates with surface-role classification, human-in-the-loop justification, immediate-vs-on-demand information checks, and explicit search/click-load questions - - Treat this as a targeted evolution of the existing constitution, not as a second parallel manifesto -- **Explicit non-goals**: Not a visual redesign. Not a rewrite of every current screen. Not a second constitution document. -- **Dependencies**: Existing constitution baseline, action surface contract work, operator-truth vocabulary, current navigation/context hardening specs -- **Priority**: high - -### Surface Taxonomy & Workflow-First IA Classification -- **Type**: foundation -- **Source**: product strategy discussion 2026-04-12, decision-first operating follow-up -- **Problem**: TenantPilot has grown strong per-domain depth across operations, evidence, baselines, reviews, alerts, and tenant detail surfaces, but there is no canonical classification of which screens are primary decision surfaces, which are secondary context surfaces, and which are tertiary evidence/diagnostic surfaces. Existing navigation and prominence can still mirror entities and implementation structure more than operator workflows. -- **Why it matters**: Without a classification-first audit, retrofits will stay local and future governance automation will just add another layer on top of the current detail-page landscape. This candidate produces the target IA truth before new inbox or portfolio-operating surfaces land. -- **Proposed direction**: - - Catalogue major existing surfaces and classify them as primary decision, secondary context, or tertiary evidence/diagnostic - - Evaluate which information must be immediate versus on-demand per surface - - Identify where multiple pages should collapse into one decision context, and where detail should move behind tabs, drawers, or expansion - - Produce a workflow-first target IA and a bounded retrofit map rather than one umbrella redesign - - Reuse existing implemented/draft specs where they already solve local parts of the problem instead of inventing a second parallel IA program -- **Explicit non-goals**: Not immediate implementation of all retrofits. Not a mass rewrite of every navigation group in one release. Not a justification for deleting audit or diagnostic depth. -- **Dependencies**: Decision-First Operating Constitution Hardening, existing navigation/context/action-surface specs, product surface inventory -- **Priority**: high - -### Audience-Aware Decision Surface Adoption Closure -- **Priority**: P0 -- **Type**: UX architecture adoption / platform hardening -- **Roadmap fit**: Cross-cutting platform quality, customer-readiness, MSP operator UX, customer read-only foundation -- **Depends on**: Existing `EnterpriseDetail`, `OperatorExplanation`, `GovernanceArtifactTruth`, `VerificationReportViewer`, `SupportDiagnosticBundle`, RBAC/capability model -- **Do not build**: A parallel UI framework -- **Problem**: TenantPilot already has strong shared UI foundations for decision-grade detail pages, governance artifact truth, operator explanations, verification reports, and support diagnostics, but adoption is inconsistent across the platform. Several operational and governance pages still expose too much internal diagnostic, lifecycle, context, reason, JSON, and support/debug information in the default reading path. The recurring issue is not missing data quality. It is information hierarchy: decision content, diagnostics, evidence, and raw support/debug payloads are often rendered as equal-priority blocks. -- **Goal**: Harden the existing decision-first UI system by introducing audience-aware disclosure rules and applying them to the highest-risk surfaces first. Default pages should become customer-readable while retaining full operator and support depth through progressive disclosure and role/capability gates. -- **Non-goals**: - - Do not introduce a new UI framework. - - Do not replace `EnterpriseDetail`. - - Do not remove diagnostics or raw evidence. - - Do not weaken auditability. - - Do not hide evidence from authorized operators/support users. - - Do not redesign the whole platform in one pass. - - Do not migrate every page in this spec. -- **Existing foundations to reuse**: - - `EnterpriseDetail` - - `OperatorExplanation` - - `GovernanceArtifactTruth` - - `VerificationReportViewer` - - `SupportDiagnosticBundle` - - existing Filament-native Sections/Infolists/Actions/Tabs/Accordions -- **Reuse rule**: Any new helper must be a small visibility/disclosure layer, not a competing rendering system. -- **First slice**: - 1. `OperationRun` viewer - 2. Managed Tenant Onboarding verify step -- **Requirements**: - - **Decision surface**: each target page MUST show a clear default decision surface with status, reason, impact, primary next action, one dominant CTA, optional one secondary CTA, and a short artifact/result summary. - - **Diagnostics**: lifecycle, timing, related operation, verification-report detail, drift/report detail, supporting evidence, and provider diagnostic summaries MUST be secondary, collapsed, tabbed, or visually lower priority. - - **Support / raw evidence**: raw JSON, context payloads, fingerprints, reason owner, platform reason family, viewer context, tenant selector context, monitoring detail, and copy/show-raw actions MUST NOT appear in the default customer-readable path. They must be collapsed and capability-gated where applicable. - - **Audience modes**: the target surfaces MUST distinguish customer/read-only default, operator/MSP diagnostics, and platform/support raw evidence. Customer/read-only users MUST NOT see internal debug semantics by default. Operators MAY expand diagnostics. Support/platform users MAY access raw evidence when authorized. - - **One primary action**: each target surface MUST expose only one dominant next action. Secondary links such as `Open operation`, `View tenant`, `Technical details`, or `Show JSON` must not visually compete with the primary remediation action. - - **No duplicate truth**: the same blocker, reason, or next action MUST NOT be repeated across multiple visible cards. If the dominant blocker is `Admin consent required`, the page may show it once in the decision surface and then provide supporting evidence in diagnostics, but it must not repeat that message in readiness, permission diagnostics, contextual help, verification summary, and issue lists at equal priority. -- **OperationRun viewer target state**: - - **Default visible**: operation status/outcome, human-readable reason, customer-readable impact, primary next action, artifact/result summary, limited actions - - **Secondary / collapsed**: lifecycle, timings, related context, support diagnostics, verification/drift/report internals - - **Support/raw gated**: raw context, JSON, fingerprints, reason ownership, platform reason family, monitoring detail -- **Managed Tenant Onboarding verify-step target state**: - - **Default visible**: onboarding readiness, current step, status, dominant blocker, primary next action, supporting evidence links - - **Secondary**: verification summary, required permissions, operation link, technical details - - **Hidden / fallback**: permission diagnostics should be visible only as fallback when no stored verification report is available. Once a verification report exists, permission details move into supporting evidence or technical details. -- **Acceptance criteria**: - - `OperationRun` viewer default path is readable in under 5 seconds. - - `OperationRun` viewer shows one dominant next action. - - `OperationRun` viewer default path does not show raw JSON, raw context, fingerprints, reason owner, platform reason family, or monitoring detail. - - `OperationRun` diagnostics remain accessible to authorized operators. - - `OperationRun` raw/support details are collapsed and capability-gated where applicable. - - Managed Tenant Onboarding verify step shows exactly one primary next action. - - Managed Tenant Onboarding verify step does not duplicate permission/consent blockers across readiness, diagnostics, contextual help, and report sections. - - Permission diagnostics are hidden when a stored verification report exists and visible only as fallback when no report exists. - - `Current checkpoint` or other internal lifecycle wording is replaced with customer/operator-friendly wording such as `Step`. - - Duplicate visible headings such as `Verification report / Verification report` are removed. - - Existing support diagnostics and verification report data remain available. -- **Required tests**: - - focused Pest coverage proving the default view shows status, reason, impact, and primary next action - - internal debug semantics are not default-visible - - raw JSON/context/fingerprints are collapsed or gated - - customer/read-only role does not see support/raw details by default - - operator role can access diagnostics - - support/platform role can access raw evidence where authorized - - onboarding verify step renders one primary action - - onboarding permission diagnostics are fallback-only when a verification report exists - - no duplicate visible decision headings exist -- **Implementation notes**: - - Prefer small, composable changes. - - Add a visibility/disclosure helper only if existing policies are insufficient. - - Extend existing EnterpriseDetail/Verification/Support surfaces instead of replacing them. - - Reduce one-off Blade/Tailwind cards where shared patterns can express the same concept. - - Preserve auditability and evidence depth. - - Preserve existing RBAC/capability enforcement. - - Preserve Livewire v4 and Filament v5 conventions. -- **Out of scope**: - - full platform-wide migration - - customer read-only portal implementation - - PDF/export redesign - - redesign of all Findings/Baseline/Evidence pages - - new AI explanation features - - new support ticketing workflow - -> Later / dependent candidates: after the adoption-closure slice above is specified and implemented, keep the next migrations explicitly dependent instead of bundling them into the same P0 effort. - -#### Later / dependent candidates - -##### Findings & Risk Acceptance Decision Surface Migration -- **Priority**: P1 -- **Type**: pattern adoption / workflow UX consolidation -- **Depends on**: Audience-Aware Decision Surface Adoption Closure -- **Roadmap fit**: R1.5 Findings Workflow V2, R1.6 Exceptions / Risk Acceptance, R2 Customer Read-only View -- **Problem**: Finding and Finding Exception detail pages currently expose workflow status, ownership, severity, timestamps, exception state, and remediation context through local Filament sections. These pages are functional, but they do not consistently use the shared decision-first detail patterns. As Findings and Risk Acceptance become customer-facing governance workflows, the default view must clearly separate risk decision, owner/action, diagnostic detail, and evidence/support context. -- **Goal**: Migrate Finding and Finding Exception detail pages toward the shared decision surface model. -- **First slice**: - - Finding detail page - - Finding Exception detail page -- **Requirements**: - - default view shows risk status, severity, reason, impact, owner, due date, and next action - - workflow actions are clearly prioritized - - risk acceptance state is shown as a decision state, not just metadata - - evidence and occurrence history are secondary - - raw/internal context is hidden or collapsed - - customer/read-only users see risk posture and accepted-risk status without internal debug semantics -- **Acceptance criteria**: - - Finding detail page has one clear primary action based on status - - Finding Exception page clearly shows accepted risk, owner, expiry, scope, and renewal/expiry state - - evidence/history is secondary - - no raw/debug payload appears in the default customer-readable view - -##### Baseline & Drift Decision Surface Migration -- **Priority**: P1 -- **Type**: pattern adoption / governance UX consolidation -- **Depends on**: Audience-Aware Decision Surface Adoption Closure -- **Roadmap fit**: R1 Golden Master Governance, R1.4 Drift UI, R2 Reports/Evidence -- **Problem**: Baseline and Drift pages contain decision-grade governance data but can become dense because comparison state, drift findings, policy metadata, diagnostics, and technical evidence compete in the same reading path. -- **Goal**: Apply the decision-first hierarchy to Baseline Profile, Baseline Snapshot, and Baseline Compare surfaces. -- **First slice**: - - Baseline Profile view - - Baseline Compare landing / matrix summary -- **Requirements**: - - default view shows baseline status, last compare result, drift summary, severity, and primary next action - - detailed matrix data is secondary - - technical comparison diagnostics are collapsed or moved behind filters/tabs - - customer-readable summaries are separated from operator investigation tools - - raw comparison context and fingerprints are not default-visible -- **Acceptance criteria**: - - Baseline Profile view shows one primary governance action - - Baseline Compare default view summarizes drift before showing the dense matrix - - drift details remain accessible to operators - - raw/fingerprint-level data is hidden or support-only - -##### Customer Read-only Decision Views -- **Priority**: P2 -- **Type**: customer-facing UX / access model -- **Depends on**: Audience-Aware Decision Surface Adoption Closure, Findings & Risk Acceptance Migration, Reports/Evidence foundation -- **Roadmap fit**: R2.6 Customer Read-only View v1 -- **Problem**: TenantPilot has strong operator and support detail surfaces, but customer-facing users need a calmer read-only experience focused on governance status, accepted risks, findings, evidence summaries, and review outcomes. -- **Goal**: Introduce customer read-only views that reuse the same decision-surface contracts but hide operator diagnostics and support/raw internals by default. -- **Requirements**: - - customers see baseline status, findings, accepted risks, reports, and evidence summaries - - customers do not see raw JSON, internal reason ownership, fingerprints, platform reason families, or debug context - - customers can open review-ready explanations and evidence summaries - - admin/operator actions are hidden - - support/operator users keep access to deeper diagnostics -- **Acceptance criteria**: - - customer members can understand tenant posture without technical debug details - - customer views remain audit-friendly but not overwhelming - - all customer-facing detail pages follow decision-first, diagnostics-second, evidence-third - -### Personal Work IA / My Work -- **Type**: IA / workflow foundation -- **Source**: admin workspace IA discussion 2026-04-21; personal work architecture candidate pack -- **Problem**: TenantPilot now has a real assignee-facing work surface in Spec 221 (`findings-operator-inbox`), but future personal work would otherwise fragment across findings, approvals, reviews, alerts, and exception-renewal surfaces without one stable "what is my work today?" entry point. -- **Why it matters**: This is not just a navigation tweak. As TenantPilot becomes more workflow- and decision-oriented, personally addressed actionable work needs its own IA layer. Without that layer, discoverability, counts, and operator mental models drift by domain. -- **Proposed direction**: - - Add a top-level `My Work` group in the admin panel as the personal lens on domain work, not as a second monitoring tree or favorites bucket - - Allow only surfaces that are explicitly assigned to the current user or awaiting that user's concrete decision - - Keep global domain navigation canonical for browsing, reporting, and non-personal work - - Treat the dashboard as a signal and entry surface, not the durable home of personal queues - - Start with the IA contract and admission rules; do not require every future child surface to ship together -- **Admission rules**: - - Personal addressability: explicit assignee, approver, or decision owner; generic responsibility is insufficient - - Concrete next action: triage, approve, renew, close, escalate, or equivalent; reports and diagnostics alone are out - - Workspace-safe scope: rows, counts, and badges stay limited to visible, authorized workspace and tenant scope - - Personal value-add: the surface does more than deep-link to a global list by adding personal filtering, prioritization, or decision support - - No replacement of domain navigation: domain collections remain canonical outside the personal lens -- **Vehicle note**: `My Work — Assigned Findings` is already materially represented by Spec 221 (`findings-operator-inbox`) and should be treated as the first concrete child surface rather than a second open candidate. -- **Activation rule**: Introduce `My Work` as actual top-level navigation only once at least two real personal work surfaces exist or are committed near-term. Before that, the IA contract may exist without forcing a single-link top-level group. -- **Explicit non-goals**: Not a generic "My Area", not profile/settings relocation, not favorites/bookmarks, not a universal task engine, not a dashboard replacement, and not a notification center. -- **Boundary with Spec 221 (Findings Operator Inbox)**: Spec 221 defines the first concrete personal findings queue. This candidate defines the durable admin-IA rule that decides when that queue graduates into a broader personal-work group and how future personal surfaces should join it. -- **Boundary with Human-in-the-Loop Autonomous Governance / Governance Inbox**: Governance Inbox is the long-horizon cross-workflow decision cockpit with structured recommendations and controlled execution. `My Work` is the nearer-term IA layer for personally addressed queues in the existing admin workspace. It should not absorb the full governance-inbox ambition. -- **Dependencies**: Surface Taxonomy & Workflow-First IA Classification, Spec 221 (`findings-operator-inbox`), workspace/tenant scope enforcement, future assignment and approval routing semantics -- **Priority**: high - -> `My Work` candidate family: keep child surfaces and cross-cutting semantics split so prioritization can land the IA contract, the next concrete personal queues, and the routing/count foundations independently instead of turning personal work into one oversized umbrella spec. - -### My Work — Pending Approvals -- **Type**: workflow execution / approvals -- **Source**: personal work architecture candidate pack 2026-04-21; future approval-bearing workflows -- **Problem**: Approval work would otherwise be scattered across risk acceptance, drift governance, restore, or rollout surfaces without one trustworthy personal decision queue. -- **Why it matters**: Approval is the cleanest form of personally addressed work. If it remains buried in domain pages, operators lose the "awaiting my decision" contract. -- **Proposed direction**: Add a personal approvals queue for decisions that explicitly await the current user's approval or rejection; show decision summary, urgency, scope, and safe drilldown; keep FYI notifications and passive review signals out. -- **Explicit non-goals**: Notification center, knowledge-only acknowledgements, general automation orchestration, or inventing a full approval engine before approval-producing domains exist. -- **Dependencies**: Risk acceptance lifecycle (Spec 154), drift/change approval direction, restore or rollout approval producers, routing semantics -- **Strategic sequencing**: Strong candidate for the second real `My Work` child surface because it naturally satisfies the admission rules. -- **Priority**: high - -### My Work — Assigned Reviews -- **Type**: workflow execution / review work -- **Source**: personal work architecture candidate pack 2026-04-21; governance/review responsibility gap -- **Problem**: Review work can easily remain hidden in tenant review, evidence, or governance surfaces even when a specific reviewer is responsible. -- **Why it matters**: Reviews are person-bound work, but not all reviews are findings or approvals. A dedicated personal review queue keeps governance responsibility visible without flattening everything into one findings model. -- **Proposed direction**: Add a review queue for review packs, evidence bundles, or governance review steps explicitly assigned to the current user; emphasize due state, review scope, and next action. -- **Explicit non-goals**: Generic reporting hub, passive read receipts, or turning `My Work` into a full collaboration suite. -- **Dependencies**: Review-layer maturity, evidence surfaces, assignment semantics, due-date conventions -- **Priority**: medium - -### My Work — Risk Acceptance Renewals -- **Type**: workflow execution / time-bound governance -- **Source**: personal work architecture candidate pack 2026-04-21; exception-renewal follow-up -- **Problem**: Expiring risk acceptances or exceptions create person-addressed renewal work, but that work is neither standard findings triage nor generic monitoring. -- **Why it matters**: Renewal work is deadline-driven and materially important, so it needs a calm but trustworthy personal queue instead of disappearing inside exception detail pages. -- **Proposed direction**: Add a renewal queue for expiring or expired risk acceptances where the current user is owner or required approver; support renew, close, or escalate next steps. -- **Explicit non-goals**: Full exception lifecycle redesign or generic reminder infrastructure for every dated object in the product. -- **Dependencies**: Spec 154 (`finding-risk-acceptance`), due/expiry semantics, routing semantics -- **Priority**: medium - -### My Work — Actionable Alerts -- **Type**: alerts / workflow execution -- **Source**: personal work architecture candidate pack 2026-04-21; action-vs-notification boundary review -- **Problem**: Some alerts represent concrete assigned follow-up, while others are only awareness signals. Without a boundary, `My Work` either becomes noisy or misses genuine action-bearing alerts. -- **Why it matters**: `My Work` must stay quiet and trustworthy. Admitting every notification would destroy the queue's meaning; admitting none would keep action-bearing alerts disconnected from work. -- **Proposed direction**: Route only alerts with explicit ownership and one clear next action into `My Work`; keep generic notifications, telemetry, and passive monitoring signals outside the group. -- **Explicit non-goals**: General notification center, chat/activity feed, or bulk alert triage system. -- **Dependencies**: Alert infrastructure, ownership semantics, escalation rules, personal count semantics -- **Priority**: medium - -### My Work — Approval & Escalation Routing -- **Type**: foundation / routing semantics -- **Source**: personal work architecture candidate pack 2026-04-21; ownership and fallback analysis -- **Problem**: Personal queues become inconsistent when owner, assignee, approver, escalation target, and fallback role mean different things in each domain. -- **Why it matters**: `My Work` cannot be trustworthy without a shared answer to "why did this item land on me?" and "who gets it if no person is assigned?". -- **Proposed direction**: Define shared routing semantics for assignee versus owner versus approver, fallback-to-role behavior, no-assignee escalation, and future delegation boundaries; keep this as a governance contract, not a UI-only heuristic. -- **Explicit non-goals**: Full org-chart modeling, absence management, or automatic load balancing. -- **Dependencies**: Ownership semantics (Spec 219), findings workflow, approval-producing domains, RBAC/capability model, alerting -- **Strategic sequencing**: Foundational before `My Work` expands beyond findings into approvals, reviews, or renewals. -- **Priority**: high - -### My Work — Personal Counts & Priority Semantics -- **Type**: foundation / queue semantics -- **Source**: personal work architecture candidate pack 2026-04-21; count-trust and priority-shaping analysis -- **Problem**: Once more than one personal queue exists, badges and ordering can drift, double-count, or leak hidden scope unless the inclusion and weighting rules are explicit. -- **Why it matters**: Personal counts are operator trust surfaces. If badges are noisy, inconsistent, or scope-leaky, the IA layer becomes less usable than the domain pages it was meant to simplify. -- **Proposed direction**: Define group-badge inclusion, visible-scope count rules, urgency weighting for overdue versus pending approval versus reopened work, and the relationship between workspace-wide truth and active-tenant context. -- **Explicit non-goals**: Complex cross-domain scoring engine, productivity gamification, or predictive prioritization. -- **Dependencies**: `My Work` IA, routing semantics, alerting/approval/review producers, RBAC scope enforcement -- **Strategic sequencing**: Must exist before a multi-surface `My Work` badge ships. -- **Priority**: high - -### My Work — Dashboard Signals & Personal Entry Points -- **Type**: IA / entry-point semantics -- **Source**: personal work architecture candidate pack 2026-04-21; dashboard-versus-nav continuity analysis -- **Problem**: Dashboard summary cards, CTA strips, and future personal queues can easily duplicate or contradict each other unless their roles are defined together. -- **Why it matters**: The workspace dashboard should signal personal work, not become a second queue. Operators need consistent drill-in and return behavior between the dashboard and `My Work`. -- **Proposed direction**: Define which personal signals belong on `/admin`, when a CTA is enough versus when a nav point is required, and how context/filter carry-over works between dashboard signals and personal queues. -- **Explicit non-goals**: Full dashboard redesign or a second summary layer that mirrors every `My Work` list. -- **Dependencies**: Spec 221 workspace signal, `My Work` IA, dashboard surface conventions, personal count semantics -- **Priority**: medium - -### MSP Multi-Tenant Portfolio Dashboard & SLA Reporting -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming (pillar #1 — MSP Portfolio & Operations), product positioning for MSP portfolio owners -- **Problem**: TenantPilot provides strong per-tenant governance, monitoring, and operational surfaces, but MSP operators and portfolio owners managing 10–100+ tenants across workspaces have no fleet-level view that answers "how is my portfolio doing?" There is no cross-tenant health summary, no SLA/compliance risk overview, no portfolio-level operational monitoring, and no structured reporting surface that supports recurring customer portfolio reviews. Operators must navigate tenant by tenant to assemble a portfolio picture, which does not scale and prevents proactive governance. -- **Why it matters**: MSP portfolio visibility is the #1 brainstorming priority and a core product differentiator. Without it, TenantPilot serves individual tenant management well but cannot position itself as the operational cockpit for MSP businesses. Portfolio-level health, SLA tracking, compliance risk summaries, and cross-tenant operational monitoring are the capabilities that justify platform-level pricing and recurring MSP engagement. This is the difference between a per-tenant tool and an MSP operations platform. -- **Proposed direction**: - - Workspace-level portfolio dashboard: aggregated health, governance, and operational status across all managed tenants in a workspace - - Key portfolio signals: backup health (last successful backup age, coverage), sync health (last successful sync, staleness), drift/findings posture (open findings count, severity distribution, trend), operational health (recent failures, stuck runs, throttling indicators), provider connection status (consent/verification posture across fleet) - - SLA/compliance risk summary views: which tenants are below operational health thresholds, which tenants have governance gaps, which tenants need attention — sortable, filterable, visually prioritized - - Cross-tenant operational monitoring: portfolio-level view of recent operation runs, failure clustering, and common error patterns across tenants - - Structured portfolio reporting: exportable portfolio health summaries for MSP-internal use, customer-facing SLA reports, and recurring review preparation - - Workspace-scoped, RBAC-gated: portfolio views respect workspace membership and capability authorization -- **Explicit non-goals**: Not a replacement for per-tenant dashboards or detail views (those remain the primary tenant-level surfaces). Not a generic BI/data warehouse initiative or a drag-and-drop report builder. Not a customer-facing analytics suite — this is an operator/MSP-internal tool. Not a cross-tenant compare/diff/promotion surface (that is the Cross-Tenant Compare & Promotion candidate). Not a system-console-level platform triage view (that is the System Console Multi-Workspace Operator UX candidate). Not a replacement for alerting (Specs 099/100 handle event-driven notifications; this is a review/monitoring surface). -- **Boundary with Cross-Tenant Compare & Promotion**: Portfolio Dashboard = fleet-level monitoring, health aggregation, SLA reporting, operational overview. Cross-Tenant Compare = policy-level diff, staging-to-production promotion, configuration comparison. They share the multi-tenant dimension but solve fundamentally different problems. -- **Boundary with System Console Multi-Workspace Operator UX**: Portfolio Dashboard = workspace-scoped MSP operator view, health/SLA/governance focus. System Console = platform-level triage, cross-workspace operator tooling, infrastructure focus. Different audiences, different panels. -- **Dependencies**: Decision-First Operating Constitution Hardening, Surface Taxonomy & Workflow-First IA Classification, per-tenant operational health signals (backup, sync, drift, findings, provider connection status), workspace model, tenant inventory, alerting foundations (Specs 099/100), RBAC/capability system (066+) -- **Priority**: medium (high strategic value, significant data aggregation effort; depends on per-tenant signal maturity) - -### Human-in-the-Loop Autonomous Governance / Governance Inbox -- **Type**: feature -- **Source**: product strategy discussion 2026-04-12, MSP Portfolio OS direction -- **Problem**: Operators still have to pull together drift, evidence, operation runs, policy versions, alerts, and exception history themselves to decide what to do next. Alerting can notify, but the product does not yet provide a workspace-level decision inbox, structured action payloads, recommended next steps, or controlled execution contracts that let the system continue after approval. -- **Why it matters**: This is the long-term decision-based operating moat. TenantPilot stops being just a place to inspect tenant state and becomes the system that detects, triages, drafts the decision, collects approval, executes within guardrails, and preserves the full evidence chain. That is much harder for generic AI tooling to replace than simple configuration explainability. -- **Proposed direction**: - - Governance inbox for pending decisions across tenants and workflows - - Actionable alerts/events with structured payloads that open directly into a decision context rather than a generic detail page - - Continuous detection and auto-triage against baselines, findings, accepted deviations, run history, and risk signals - - Decision packs: what changed, why it matters, recommended action, blast radius, confidence, approvals required, rollback path - - Controlled execution after approval: approve, reject, defer, or accept deviation, with automation policies, maintenance windows, dual approval, and scope guardrails where needed - - Closed-loop evidence: before snapshot, approval record, execution run, after snapshot, audit trail, review-pack linkage -- **Explicit non-goals**: Not blind autopilot remediation. Not a chat-first admin experience. Not a replacement for drift/change governance, findings, or exception workflows; it orchestrates across them. -- **Boundary with Drift Change Governance**: Drift Change Governance owns drift-specific approval, freeze-window, and tamper rules. This candidate owns the broader operating model: inbox, decision routing, recommended actions, controlled execution, and evidence closure across drift, evidence, review, and operational workflows. -- **Dependencies**: Decision-First Operating Constitution Hardening, Surface Taxonomy & Workflow-First IA Classification, MSP Multi-Tenant Portfolio Dashboard & SLA Reporting, drift/findings/exception workflow maturity, actionable alert/event payloads, canonical operation/evidence truth -- **Priority**: medium (high strategic value, intentionally sequenced after the decision-first foundations) - -### Policy Lifecycle / Ghost Policies (Spec 900 refresh) -- **Type**: feature -- **Source**: Spec 900 draft (2025-12-22), HANDOVER risk #9 -- **Problem**: Policies deleted in Intune remain in TenantAtlas indefinitely. No deletion indicators. Backup items reference "ghost" policies. -- **Why it matters**: Data integrity, user confusion, backup reliability -- **Proposed direction**: Soft delete detection during sync, auto-restore on reappear, "Deleted" badge, restore from backup. Draft in Spec 900. -- **Dependencies**: Inventory sync stable -- **Priority**: medium - -### Standardization & Policy Quality — Linting, Company Standards, Hygiene -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming (pillar #3 — Standardization & Policy Quality / "Intune Linting") -- **Problem**: TenantPilot captures, versions, and governs Intune policy configurations, but provides no capability to evaluate whether those configurations meet quality, consistency, or organizational standards. Operators cannot answer questions like: "Do all policies follow our naming convention?", "Are there duplicate or near-duplicate policies?", "Which policies have no assignments?", "Are scope tags applied consistently?", "Does this tenant meet our company's minimum configuration standard?" Today, quality and hygiene assessment is manual, tenant-by-tenant, and invisible to governance workflows. -- **Why it matters**: Configuration quality is a distinct governance dimension from baseline drift and compliance findings. Drift detection answers "has something changed?"; standardization answers "is it correct and well-structured?" Enterprise customers and MSPs need both. Policy linting, hygiene checks, and company standards create a repeatable quality layer that reduces configuration debt, catches structural problems early, and supports standardization across managed tenants. This is the #3 brainstorming priority and a natural complement to the existing governance stack. -- **Proposed direction**: - - **Policy linting / quality checks**: rule-based evaluation of policy configurations against defined quality criteria — naming conventions, scope tag requirements, assignment presence, setting completeness, structural validity. Rules should be composable and extensible per workspace or tenant. - - **Company standards as reusable reference packs**: operators or MSPs define their own configuration standards ("Company Standard 2026") as reference expectations that policies can be evaluated against. Distinct from Microsoft baselines — these are organization-defined, not vendor-defined. A standard pack is a set of expected configuration postures, not a deployable template. - - **Hygiene checks**: automated detection of structural problems — duplicate or near-duplicate policies, unassigned policies, orphaned scope tags or filters, policies with no settings or empty payloads, stale policies not updated in extended periods, inconsistent naming patterns across policy families. - - **Quality findings integration**: hygiene and linting results should produce structured findings or quality signals that integrate with the existing findings workflow, not a separate parallel reporting system. - - **Tenant-scoped and portfolio-aware**: quality evaluation runs per tenant; portfolio views can aggregate quality posture across tenants for MSP operators. -- **Explicit non-goals**: Not a full compliance framework or certification engine (compliance readiness is a separate candidate). Not a generic recommendation engine or AI assistant. Not a replacement for baseline/drift detection (which answers "has it changed from a known-good state?" — standardization answers "is it well-structured and consistent?"). Not a policy deployment or remediation engine — this is evaluation and visibility, not automated correction. Not a replacement for the existing findings workflow — quality signals should flow into findings, not bypass them. -- **Boundary with baseline/drift engine**: Baselines compare current state against a snapshot of known-good state. Standardization evaluates current state against quality rules and organizational expectations. A policy can be drift-free (unchanged from baseline) but still fail quality checks (bad naming, missing assignments, no scope tags). These are complementary, not overlapping. -- **Boundary with Policy Setting Explorer**: Policy Setting Explorer = reverse lookup ("where is this setting defined?"). Standardization = quality evaluation ("is this policy well-structured and consistent?"). Different questions, different surfaces. -- **Dependencies**: Inventory sync stable, policy versioning, tenant context model, findings workflow (Spec 111) for quality findings integration, RBAC/capability system (066+) -- **Priority**: medium (high strategic value, incremental delivery possible starting with high-value hygiene checks) - -### Schema-driven Secret Classification -- **Type**: hardening -- **Source**: Spec 120 deferred follow-up -- **Problem**: Secret redaction currently uses pattern-based detection. A schema-driven approach via `GraphContractRegistry` metadata would be more reliable. -- **Why it matters**: Reduces false negatives in secret redaction -- **Proposed direction**: Central classifier in `GraphContractRegistry`, regression corpus -- **Dependencies**: Secret redaction (120) stable, registry completeness (095) -- **Priority**: medium - -### Script & Secrets Governance — Diff, Review, Scanning, and Lifecycle Controls for High-Risk Content -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming (Script & Secrets Governance pillar), platform hardening direction -- **Problem**: TenantPilot governs a wide range of Intune policy configurations, but a subset of these configurations carries disproportionate operational risk: PowerShell remediation scripts, detection scripts, custom compliance scripts, proactive remediations, and policy artifacts that embed or reference secrets (pre-shared keys, certificate data, credentials, API tokens). These artifacts are fundamentally different from declarative policy settings — they contain executable logic or sensitive material where a single change can have outsized blast radius, and where silent or unreviewed modification creates real security and operational exposure. Today, TenantPilot treats script-bearing and secret-sensitive artifacts with the same governance depth as any other policy: they are versioned and backed up, but there is no dedicated diff/review surface for script content, no approval or guarded workflow for high-risk script changes, no scanning or policy checks for obviously unsafe secret-handling patterns, and no structured visibility into which configurations carry elevated risk because they contain executable or secret-sensitive content. -- **Why it matters**: Script and secret governance is a distinct risk dimension that cuts across policy families. A naming convention violation in a device configuration policy is a hygiene problem; an unreviewed script change in a remediation policy is a potential security incident. Enterprise customers and MSP operators need to trust that high-risk content changes are visible, reviewable, and governable — not just captured as another version snapshot. This capability strengthens audit conversations (proving that script changes are reviewed), operator safety (preventing silent high-risk modifications from going unnoticed), and platform credibility (demonstrating that TenantPilot understands which parts of Intune configuration carry elevated risk). Without it, backup and versioning give a false sense of governance completeness — the most dangerous artifacts receive the same governance treatment as the least dangerous ones. -- **Proposed direction**: - - **Script-aware diff and review surfaces**: dedicated diff views for script-bearing policy artifacts that render script content changes readably — not just JSON diff of the enclosing policy payload, but structured presentation of the script text itself (before/after, syntax-highlighted where practical, change summary). These surfaces make script changes reviewable by operators rather than buried in raw payload diffs. - - **Risk classification for script/secret-bearing artifacts**: extend the inventory or governance metadata so that policy artifacts containing scripts or secret-sensitive fields are identifiable as elevated-risk items. This classification enables filtering, alerting, and governance workflow differentiation — operators can see "which of my policies are script-bearing?" or "which versions changed script content?" without manually inspecting payloads. - - **Guarded change workflows for high-risk content**: optional governance gates for script-bearing or secret-sensitive changes — such as requiring explicit acknowledgment, capability-gated approval, or elevated audit logging when a versioned change involves script content or secret-sensitive fields. These are governance-layer controls, not Intune-side mutation blocks (TenantPilot observes configuration, it does not control the Intune mutation path). The gates apply to how TenantPilot classifies and routes detected changes. - - **Scanning and policy checks for secret-handling patterns**: lightweight rule-based checks that flag obviously unsafe patterns in script or configuration content — hardcoded credentials, plaintext secrets, overly broad credential scopes, known-bad patterns. Not a full SAST engine — focused, high-signal checks that catch the most common and most dangerous mistakes. Results integrate with the findings workflow as governance signals, not a parallel detection system. - - **Rollback and auditability expectations**: script and secret-sensitive changes should have clear rollback visibility (which version introduced the script change, who triggered restore, what was the before/after state). Audit trail expectations should be elevated for this content class — change, review, approval, and rollback events should be distinctly traceable in audit logs. - - **Operator visibility into script/secret risk posture**: tenant-level and portfolio-level views that surface which tenants have unreviewed script changes, which script-bearing policies have never been reviewed, and where secret-handling patterns have been flagged. This is the governance visibility layer, not a generic dashboard initiative. -- **Explicit non-goals**: Not a replacement for external secret vault or key management systems (Azure Key Vault, HashiCorp Vault, etc.) — TenantPilot does not store or manage secrets as a vault; it governs configurations that may contain or reference sensitive material. Not a full code-signing or binary-signing platform — governance focus is on reviewability and risk visibility, not cryptographic attestation. Not a SIEM, DLP, or broad security-monitoring system — this is governance of specific high-risk content classes within the existing policy governance architecture, not a generic security operations capability. Not a catch-all bucket for every security topic — this is bounded to script-bearing and secret-sensitive configuration artifacts. Not a replacement for the baseline/drift engine (which detects *any* configuration change) — this adds risk-aware governance specifically for the content classes where changes carry elevated operational risk. Not a policy deployment or remediation engine — this is detection, review, and governance, not automated correction. Not a full static analysis (SAST) engine for arbitrary scripts. -- **Boundary with Schema-driven Secret Classification**: Schema-driven Secret Classification = improving the *redaction mechanism's* reliability by using schema metadata to classify which fields contain secrets (a backend classification improvement for the existing redaction pipeline). Script & Secrets Governance = lifecycle governance around script-bearing and secret-sensitive *artifacts* — diff, review, scanning, approval workflows, risk visibility. Classification makes redaction more accurate; governance adds reviewability and lifecycle controls. Schema-driven classification may inform governance risk tagging (which fields are secret-sensitive), but the problems and deliverables are distinct. -- **Boundary with Standardization & Policy Quality**: Standardization = evaluating whether policies are well-structured, consistently named, properly assigned, and hygienically maintained. Script & Secrets Governance = evaluating whether high-risk content (scripts, secrets) is reviewed, safe, and governable. A policy can pass all quality checks (good naming, proper assignments, scope tags) but still have an unreviewed script change or a hardcoded credential. These are complementary governance dimensions, not overlapping. -- **Boundary with Security Posture Signals Foundation**: Security Posture Signals = ingesting and historizing external posture data (Defender, backup health) as evidence inputs for reporting. Script & Secrets Governance = internal governance of the product's own high-risk configuration content. Different data sources, different governance problems. Posture signals are external evidence; script governance is internal safety. -- **Boundary with baseline/drift engine (Specs 116–119)**: Drift detection = detecting that *something changed*. Script & Secrets Governance = applying differentiated governance treatment *because of what changed* (script content, secret-sensitive fields). Drift is content-agnostic detection; script governance is risk-aware response. They compose: drift detection finds the change, script governance classifies and routes it based on risk. -- **Dependencies**: Inventory sync stable, policy versioning and snapshot infrastructure, secret redaction (Spec 120) stable, findings workflow (Spec 111) for governance signal integration, audit log foundation (Spec 134), RBAC/capability system (066+), GraphContractRegistry maturity for field-level metadata -- **Priority**: medium (high security-governance value and clear product differentiation, but realistically sequenced after current hardening work and dependent on inventory/versioning/findings maturity) - -### Cross-Tenant Compare & Promotion -- **Type**: feature -- **Source**: Spec 043 draft, 0800-future-features -- **Problem**: No way to compare policies between tenants or promote configurations from staging to production. -- **Why it matters**: Core MSP/enterprise workflow. Identified as top revenue lever in brainstorming. -- **Proposed direction**: Compare/diff UI, group/scope-tag mapping, promotion plan (preview → dry-run → cutover → verify) -- **Dependencies**: Inventory sync, backup/restore mature -- **Spec 043 relationship**: Spec 043 (`specs/043-cross-tenant-compare-and-promotion/spec.md`) is a lightweight draft (scenarios + FRs, created 2026-01-07, status: Draft) that covers the core compare/promotion contract. This candidate captures the expanded strategic direction and scope refinements accumulated since the draft was written. When this candidate is promoted, it should refresh and supersede the existing Spec 043 draft rather than creating a parallel spec. -- **Priority**: medium (high value, high effort) - -### System Console Scope Hardening -- **Type**: hardening -- **Source**: Spec 113/114 follow-up -- **Problem**: The system console (`/system`) needs a clear cross-workspace entitlement model. Current platform capabilities (Spec 114) define per-surface access, but cross-workspace query authorization and scope isolation for platform operators are not yet hardened as a standalone contract. -- **Why it matters**: Platform operators acting across workspaces need tight scope boundaries to prevent accidental cross-workspace data exposure in troubleshooting and monitoring flows. -- **Proposed direction**: Formalize cross-workspace query authorization model, scope isolation rules for platform operator sessions, and regression coverage for wrong-workspace access in system console surfaces. -- **Dependencies**: System console (114) stable, canonical tenant context (Specs 135/136) -- **Priority**: low - -### System Console Multi-Workspace Operator UX -- **Type**: feature -- **Source**: Spec 113 deferred -- **Problem**: System console (`/system`) currently can't select/filter across workspaces for platform operators. Triage and monitoring require workspace-by-workspace navigation. -- **Why it matters**: Platform ops need cross-workspace visibility for troubleshooting and monitoring at scale. -- **Proposed direction**: Workspace selector/filter in system console views, cross-workspace run aggregation, unified triage entry point. -- **Dependencies**: System console (114) stable, System Console Scope Hardening -- **Priority**: low - -### Operations Naming Harmonization Across Run Types, Catalog, UI, and Audit -- **Type**: hardening -- **Source**: coding discovery, operations UX consistency review -- **Why it matters**: Strategically important for enterprise UX, auditability, and long-term platform consistency. `OperationRun` is becoming a cross-domain execution and monitoring backbone, and the current naming drift will get more expensive as new run types and provider domains are added. This should reduce future naming drift, but it is not a blocker-critical refactor and should not be pulled in as a side quest during small UI changes. -- **Problem**: Naming around operations appears historically grown and not consistent enough across `OperationRunType` values, visible run labels, `OperationCatalog` mappings, notifications, audit events, filters, badges, and related UI copy. Internal type names and operator-facing language are not cleanly separated, domain/object/verb ordering is uneven, and small UX fixes risk reinforcing an already inconsistent scheme. If left as-is, new run types for baseline, review, alerts, and additional provider domains will extend the inconsistency instead of converging it. -- **Desired outcome**: A later spec should define a clear naming standard for `OperationRunType`, establish an explicit distinction between internal type identifiers and operator-facing labels, and align terminology across runs, notifications, audit text, monitoring views, and operations UI. New run types should have documented naming rules so they can be added without re-opening the vocabulary debate. -- **In scope**: Inventory of current operation-related naming surfaces; naming taxonomy for internal identifiers versus visible operator language; conventions for verb/object/domain ordering; alignment rules for `OperationCatalog`, run labels, notifications, audit events, filters, badges, and monitoring UI; forward-looking rules for adding new run types and provider/domain families; a pragmatic migration plan that minimizes churn and preserves audit clarity. -- **Out of scope**: Opportunistic mass-refactors during unrelated feature work; immediate renaming of all historical values without a compatibility plan; using a small UI wording issue such as "Sync from Intune" versus "Sync policies" as justification for broad churn; a full operations-domain rearchitecture unless later analysis proves it necessary. -- **Trigger / Best time to do this**: Best tackled when multiple new run types are about to land, when `OperationCatalog` / monitoring / operations hub work is already active, when new domains such as Entra or Teams are being integrated, or when a broader UI naming constitution is ready to be enforced technically. This is a good candidate for a planned cleanup window, not an ad hoc refactor. -- **Risks if ignored**: Continued terminology drift across UI and audit layers, higher cognitive load for operators, weaker enterprise polish, more brittle label mapping, and more expensive cleanup once additional domains and execution types are established. Audit/event language may diverge further from monitoring language, making cross-surface reasoning harder. -- **Suggested direction**: Define stable internal run-type identifiers separately from visible operator labels. Standardize a single naming grammar for operation concepts, including when to lead with verb, object, or domain, and when provider-specific wording is allowed. Apply changes incrementally with compatibility-minded mapping rather than a brachial rename of every historical string. Prefer a staged migration that first defines rules and mapping layers, then updates high-value operator surfaces, and only later addresses legacy internals where justified. -- **Readiness level**: Qualified and strategically important, but intentionally deferred. This should be specified before substantially more run types and provider domains are introduced, yet it should not become an immediate side-track or be bundled into minor UI wording fixes. -- **Candidate quality**: - - Clearly identified cross-cutting problem with architectural and UX impact - - Strong future-facing trigger conditions instead of vague "sometime later" - - Explicit boundaries to prevent opportunistic churn - - Concrete desired outcome without overdesigning the solution - - Easy to promote into a full spec once operations-domain work is prioritized - -### OperationRun Humanization & Diagnostics Boundary -- **Type**: hardening -- **Source**: operator surface standards adoption v1, operations UX consistency review, cross-link monitoring/alerts/dashboard review -- **Problem**: `OperationRun` is one of TenantPilot's highest-leverage operator surfaces, but its current UX risks exposing implementation semantics instead of operator-facing clarity. The default surface can blur execution outcome, blocked reason, technical diagnostics, object and scope identity, and next-action guidance into one undifferentiated detail stack. Because `OperationRun` is the destination for monitoring links, alert deep links, dashboard drill-ins, and troubleshooting flows, weak clarity here propagates system-wide. The current risk pattern includes raw reason codes, internal IDs and hashes, raw payload context, ambiguous state labels, and insufficient separation between operator summary and diagnostic truth. -- **Why it matters**: This is the best pilot surface for the operator UX standard because it is highly visible, cross-cutting, and already carries the product's richest execution truth. If the default run detail page does not quickly answer "What happened?", "Did it succeed, fail, or get blocked?", "What scope did it affect?", "Is action required?", and "What should I do next?", then monitoring, alerting, dashboards, and support flows all inherit the same ambiguity. A low-clarity run surface reduces trust far beyond the operations page itself. -- **Desired outcome**: The default `OperationRun` list and detail experience should be operator-first: identity, status, scope, interpreted summary, and next action lead; diagnostics remain available but clearly secondary. Senior engineers must still be able to reach raw reason codes, IDs, payload context, and technical details without those details polluting the primary operator surface. -- **Proposed direction**: - - **Canonical detail-page hierarchy**: make run identity, status, top-level summary, explicit scope, and primary actions the first content block on the detail page. Raw JSON, hashes, internal IDs, and technical payload fragments move below a clearly labeled diagnostics boundary. - - **Operator-first state interpretation**: translate blocked and failed states into human-readable labels and summaries. Distinguish execution outcome from completeness/downstream effect where relevant so operators can tell the difference between "execution succeeded" and "the intended effect is incomplete or limited." - - **Diagnostics boundary pattern**: establish a reusable convention where diagnostic truth is available through progressive disclosure, not mixed into the primary reading path. The boundary should explicitly contain raw reason code, raw identifiers, payload context, low-level exception detail, and similar engineering-facing artifacts. - - **Next-step guidance contract**: whenever a run is blocked or failed, the primary surface must show what the operator should do next. Next-step content should be visible without requiring expansion into raw diagnostics. - - **Explicit scope communication**: run details and list rows should make the affected object, domain, and workspace/tenant context obvious so an operator can immediately understand impact and blast radius. - - **Action hierarchy on run details**: establish a safe, obvious action order for follow-up paths such as retry, inspect source object, view related monitoring context, or open diagnostics. Diagnostic actions must not visually compete with the primary operator path. - - **Reference implementation role**: use `OperationRun` as the reference implementation for future surfaces that need the same pattern: operator-first summary plus secondary diagnostics. -- **In scope**: - - `OperationRun` list and detail surfaces - - operator-facing labels and summaries - - blocked / failed state translation on these surfaces - - diagnostics boundary and progressive disclosure pattern - - top-level summary cards / interpreted content - - explicit scope communication and workspace / tenant context - - action hierarchy on run details - - mapping technical reasons into operator-facing blocked reason and next-step guidance - - monitoring / alert / dashboard deep-link landing clarity insofar as those links land on `OperationRun` -- **Out of scope**: - - broad redesign of the entire admin UI - - full monitoring information-architecture rewrite - - full alerts redesign - - product-wide adoption of the standard in one pass - - backend execution model changes that are not needed for `OperationRun` operator-surface clarity - - broad operation naming refactors beyond what this surface strictly needs to be understandable -- **Acceptance characteristics**: - - default-visible labels do not expose raw internal field names - - blocked reasons are human-readable - - next steps are visible when the run is blocked or failed - - diagnostics are present but clearly secondary - - page hierarchy starts with identity, status, summary, and actions - - raw JSON, internal hashes, and internal IDs are not primary content - - monitoring and alert deep links land on a page that is understandable to operators without diagnostic context -- **Why now vs later**: - - **Why now**: high leverage; immediate trust improvement for monitoring and alert flows; first concrete adoption of the operator UX standard; creates a reusable pattern for future detail surfaces - - **Why not later**: unresolved ambiguity on run surfaces keeps leaking into dashboards, alerts, and troubleshooting entry points; later surfaces will otherwise copy the current weak pattern -- **Cross-cutting impact**: monitoring, alerts, dashboards, diagnostics conventions, status-taxonomy adoption, future detail-page standards, and operator-language normalization -- **Likely follow-on candidates**: `PolicyResource` operator language and metadata isolation, baseline compare / snapshot completeness clarity, restore-run language and safe-execution standard alignment, canonical degraded / prerequisite state pattern across surfaces -- **Dependencies**: Operator Outcome Taxonomy and Cross-Domain State Separation (shared vocabulary), Operator Reason Code Translation and Humanization Contract (blocked/failure labels and next steps), canonical `OperationRun` context work already explored in Specs 054, 114, and 144 -- **Boundary with Operations Naming Harmonization**: Naming harmonization owns the cross-product vocabulary for operation types, labels, and catalog mappings. This candidate owns the operator readability and diagnostics separation of the `OperationRun` surface itself. Naming alignment may be consumed here, but this candidate should not expand into a repo-wide naming refactor. -- **Boundary with Operator Presentation & Lifecycle Action Hardening**: Presentation hardening owns shared rendering and action-visibility conventions across many surfaces. This candidate is narrower and deeper: it defines the canonical operator-versus-diagnostics hierarchy on the highest-value run detail surface and uses it as the reference implementation for future adoption. -- **Boundary with Operator Reason Code Translation**: Reason code translation defines how internal codes become human-readable labels and next-step envelopes. This candidate consumes that translation on the `OperationRun` surface and defines where translated content stops and raw diagnostics begin. -- **Strategic sequencing**: Best tackled after the outcome taxonomy and in parallel with or immediately after reason-code translation. It is a strong pilot implementation candidate before broader operator-surface rollout because it validates the standards on the most cross-linked execution surface first. -- **Priority**: high - -### Operator Presentation & Lifecycle Action Hardening -- **Type**: hardening -- **Source**: Evidence Snapshot / Ops-UX review 2026-03-19 -- **Problem**: TenantPilot has strong shared presentation abstractions — `OperationCatalog` for operation labels, `BadgeRenderer` / `BadgeCatalog` for status/outcome badges, and some lifecycle-aware action gating patterns in selected resources — but these conventions are not consistently enforced across all operator-facing surfaces. Individual surfaces can bypass the shared sources of truth without triggering any architectural or CI feedback. This produces a repeatable class of operator-UX degradation: - - **Operation label bypass**: surfaces that render operation names directly from internal type keys instead of going through the shared operation catalog, leaking technical identifiers like `inventory_sync` or `compliance.snapshot` into operator-visible UI. - - **Status/outcome presentation bypass**: surfaces that render raw enum values (e.g. `queued`, `running`, `pending`, `succeeded`) directly from model attributes instead of using `BadgeRenderer`, producing unstyled debug-quality output where operators expect consistent badge rendering. - - **Missing lifecycle-aware action gating**: mutation and destructive actions (e.g. "Expire snapshot", "Refresh snapshot") that remain visible and invocable on records in terminal lifecycle states (Expired, Failed), because no shared convention requires actions to derive visibility from valid lifecycle transitions. Backend idempotency guards prevent data corruption but do not prevent operator confusion. - - **Unscoped global widget polling**: global widgets (e.g. `BulkOperationProgress`) that poll on every page including non-operational pages where no active runs are expected, creating unnecessary network noise and giving operators the impression that background activity is occurring when none is relevant. -- **Why it matters**: In enterprise SaaS, operator trust depends on consistent, predictable UI behavior across every surface. A single widget rendering raw `queued` instead of a styled badge, or a single page showing an "Expire" action on an already-expired record, undermines confidence in the product's governance capabilities. These are not cosmetic issues — they are operator-trust issues that compound as the product adds more lifecycle-driven surfaces (Findings, Review Packs, Baselines, Exceptions, Alerts, Drift governance). Without shared enforceable conventions, every new surface risks re-introducing the same failure modes. -- **Proposed direction**: - - **Operation label convention**: codify the rule that all operator-visible operation names must resolve through `OperationCatalog::label()` (or the equivalent shared source of truth). Add a lightweight enforcement mechanism (CI check, architectural test, or documented anti-pattern) that catches direct usage of raw operation type strings in Blade templates and widget renders. - - **Status/outcome badge convention**: codify the rule that all operator-visible status and outcome rendering must go through `BadgeRenderer` (or equivalent shared badge helpers). Enumerate the known surfaces that currently comply and identify any that bypass the convention. Add a regression mechanism to prevent new surfaces from bypassing. - - **Lifecycle-aware action visibility convention**: define a shared contract or trait that mutation/destructive actions must consult to determine visibility based on the record's current lifecycle state. Terminal-state records must not expose invalid lifecycle transitions as available actions. Suggest introducing `isTerminal(): bool` (or equivalent) on lifecycle enums (`EvidenceSnapshotStatus`, `ReviewPackStatus`, `OperationRunStatus`, etc.) so action visibility can be derived from lifecycle semantics rather than ad hoc per-resource `->hidden()` conditions. - - **Polling ownership convention**: codify the rule that global widgets must declare their polling scope — which pages or contexts justify active polling vs. idle/suppressed behavior. Ensure idle discovery polling intervals are intentional and documented, and that non-operational pages are not subjected to unnecessary polling overhead. -- **Scope boundaries**: - - **In scope**: shared convention definitions, enforcement mechanisms, anti-pattern catalog, lifecycle enum enrichment (`isTerminal()` or equivalent), regression coverage for badge/label/action consistency - - **Out of scope**: local Evidence Snapshot fixes (those belong in the active Evidence-related spec), operations naming vocabulary redesign (tracked separately as "Operations Naming Harmonization"), visual language canon / design-system codification (tracked separately as "Admin Visual Language Canon"), BulkOperationProgress architectural redesign, new badge domains or new operation types -- **Examples of failure modes this should prevent**: - - A widget rendering `{{ $run->status }}` directly instead of using `BadgeRenderer::render(BadgeDomain::OperationRunStatus, $run->status)` - - A card showing raw `outcome: pending` text instead of a styled outcome badge - - An "Expire snapshot" action visible on a record with status `Expired` - - A "Refresh snapshot" action visible on a record with status `Failed` - - A global progress widget polling every 30 seconds on the Evidence detail page where no active operation runs are relevant - - A new governance surface (e.g. Baseline review, Alert detail) shipping without badge rendering because no convention required it -- **Why this is a follow-up candidate, not part of current local fixes**: The active Evidence-related spec should fix the specific Evidence Snapshot bugs (raw status in `RecentOperationsSummary`, missing `->hidden()` on expire/refresh actions). This candidate addresses the **shared convention layer** that prevents the same class of bugs from recurring on every future lifecycle-driven surface. The local fixes prove the bugs exist; this candidate prevents their recurrence. -- **Dependencies**: BadgeRenderer / BadgeCatalog system (already stable), OperationCatalog (already stable), lifecycle enums (already defined, need `isTerminal()` enrichment), RBAC/capability system (066+) for action gating patterns -- **Related candidates**: Operations Naming Harmonization (naming vocabulary — complementary but distinct), Admin Visual Language Canon (visual conventions — broader scope), Action Surface Contract v1.1 (interaction-level action rules — complementary) -- **Priority**: medium - -### Operations Presence & Non-Blocking Status UX -- **Type**: hardening -- **Source**: UI/UX audit — operations presence and background activity patterns -- **Problem**: `BulkOperationProgress` is a fixed bottom-right overlay (`z-[999999]`) with adaptive polling, mounted globally via render hooks in both panels. This creates several operator-UX problems: (1) completed and stale operations remain visible without a dismiss or minimize affordance, occupying screen real estate on every page; (2) the widget polls on all pages including those with no operational relevance, creating unnecessary network activity and visual noise; (3) there is no transition from "active progress" to "completed notification" — the overlay persists until polling naturally decays, with no intermediate state that acknowledges completion; (4) there is no semantic distinction between "operations are actively running" (progress feedback) and "operations recently finished" (notification/history feedback) — both are served by the same fixed overlay; (5) operators have no way to acknowledge, dismiss, or minimize operation progress once seen, meaning batch operations can dominate the viewport throughout execution. -- **Why it matters**: Background operations (sync, backup, restore, compliance snapshot, inventory) are a core product mechanic. How they surface to operators — during and after execution — directly affects perceived responsiveness, trust in operation completion, and cognitive load during multi-step governance workflows. An overlay that cannot be dismissed, polls everywhere, and makes no distinction between "in progress" and "recently completed" scales poorly as operation types multiply and concurrent operations become normal. Enterprise operators performing audit reviews, findings triage, or policy inspection should not be distracted by persistent progress overlays for unrelated background operations. -- **Proposed direction**: - - **Dismiss and minimize semantics**: operators can dismiss completed operations from the progress overlay, and optionally minimize the overlay to a compact indicator during active operations. Dismissed state is session-scoped (not persisted across page loads). - - **Page-scoped vs global polling context**: distinguish pages where operations are contextually relevant (operations list, tenant detail, monitoring hub) from pages where they are not (findings detail, policy inspector, RBAC settings). Non-relevant pages receive either suppressed polling or a minimal "N operations running" indicator instead of the full overlay. - - **Completion transition pattern**: define a clear transition from "progress" to "notification" when operations complete — e.g. transient toast notification on completion + overlay removal, compact "completed" badge that auto-dismisses after a timeout, or handoff to the notification panel. Indefinite overlay persistence after completion is the anti-pattern to resolve. - - **Stale operation visibility rules**: define when a completed or failed operation is too old to merit overlay presence. Completed operations older than a configurable threshold should not appear in the progress overlay — they belong in the operations list/history, not in a persistent viewport overlay. - - **Non-blocking pattern formalization**: establish the product-level convention for how background operations communicate status without blocking or dominating the operator's current context. This convention applies to `BulkOperationProgress` and should be extensible to future background activities (scheduled checks, async exports, evidence generation). -- **In scope**: `BulkOperationProgress` widget behavior redesign, dismiss/minimize UX, polling scope refinement, completion transition pattern, stale visibility rules, non-blocking convention documentation -- **Out of scope**: Operation label vocabulary (tracked as "Operations Naming Harmonization"), badge/status rendering conventions (tracked as "Operator Presentation & Lifecycle Action Hardening"), operations domain architecture changes, new operation types, notification panel infrastructure, monitoring hub redesign, BulkOperationProgress internal data model or domain-layer changes beyond the UX behavior layer -- **Boundary with Operator Presentation & Lifecycle Action Hardening**: That candidate owns shared conventions for how operation labels, status badges, and lifecycle-aware actions render consistently across surfaces. This candidate owns the widget-level UX behavior of how background operations surface, persist, dismiss, and transition in the operator's viewport. Presentation conventions define *how it looks*; operations presence defines *when and where it appears*. -- **Boundary with Operations Naming Harmonization**: Naming harmonization owns the vocabulary (internal type identifiers vs operator-facing labels). This candidate owns the UX behavior layer that consumes those labels. -- **Dependencies**: BulkOperationProgress current implementation, render hook registration, Filament v5 panel infrastructure, operations domain model -- **Related candidates**: Operator Presentation & Lifecycle Action Hardening (complementary — rendering conventions), Operations Naming Harmonization (complementary — terminology), Admin Visual Language Canon (visual weight rules may inform overlay styling) -- **Priority**: medium - -### Provider Connection Resolution Normalization -- **Type**: hardening -- **Source**: architecture audit – provider connection resolution analysis -- **Problem**: The codebase has a dual-resolution model for provider connections. Gen 2 jobs (`ProviderInventorySyncJob`, `ProviderConnectionHealthCheckJob`, `ProviderComplianceSnapshotJob`) receive an explicit `providerConnectionId` and pass it through the `ProviderOperationStartGate`. Gen 1 jobs (`ExecuteRestoreRunJob`, `EntraGroupSyncJob`, `SyncRoleDefinitionsJob`, policy sync jobs, etc.) do NOT — their called services resolve the default connection at runtime via `MicrosoftGraphOptionsResolver::resolveForTenant()` or internal `resolveProviderConnection()` methods. This creates non-deterministic execution: a job dispatched against one connection may silently execute against a different one if the default changes between dispatch and execution. ~20 services use the Gen 1 implicit resolution pattern. -- **Why it matters**: Non-deterministic credential binding is a correctness and audit gap. Enterprise customers need to know exactly which connection identity was used for every Graph API call. The implicit pattern also prevents connection-scoped rate limiting, error attribution, and consent-scope validation. This is the foundational refactor that unblocks all other provider connection improvements. -- **Proposed direction**: - - Refactor all Gen 1 services to accept an explicit `ProviderConnection` (or `providerConnectionId`) parameter instead of resolving default internally - - Update all Gen 1 jobs to accept `providerConnectionId` at dispatch time (resolved at the UI/controller layer via `ProviderOperationStartGate` or equivalent) - - Deprecate `MicrosoftGraphOptionsResolver` — callers should use `ProviderGateway::graphOptions($connection)` directly - - Ensure `provider_connection_id` is recorded in every `OperationRun` context and audit event - - Standardize error handling: all resolution failures produce `ProviderConnectionResolution::blocked()` with structured `ProviderReasonCodes`, not mixed exceptions (`ProviderConfigurationRequiredException`, `RuntimeException`, `InvalidArgumentException`) -- **Known affected services** (Gen 1 / implicit resolution): `RestoreService` (line 2913 internal `resolveProviderConnection()`), `PolicySyncService` (lines 58, 450), `PolicySnapshotService` (line 752), `RbacHealthService` (line 192), `InventorySyncService` (line 730 internal `resolveProviderConnection()`), `EntraGroupSyncService`, `RoleDefinitionsSyncService`, `EntraAdminRolesReportService`, `AssignmentBackupService`, `AssignmentRestoreService`, `ScopeTagResolver`, `TenantPermissionService`, `VersionService`, `ConfigurationPolicyTemplateResolver`, `FoundationSnapshotService`, `FoundationMappingService`, `RestoreRiskChecker`, `PolicyCaptureOrchestrator`, `AssignmentFilterResolver`, `RbacOnboardingService`, `TenantConfigService` -- **Known affected jobs** (Gen 1 / no explicit connectionId): `ExecuteRestoreRunJob`, `EntraGroupSyncJob`, `SyncRoleDefinitionsJob`, `SyncEntraAdminRolesJob`, plus any job that calls a Gen 1 service -- **Gen 2 reference implementations** (correct pattern): `ProviderInventorySyncJob`, `ProviderConnectionHealthCheckJob`, `ProviderComplianceSnapshotJob` — all receive `providerConnectionId`, pass through `ProviderOperationStartGate`, lock row, create `OperationRun` with connection in context -- **Key architecture components**: - - `ProviderConnectionResolver` — correct, keep as-is. `resolveDefault()` returns `ProviderConnectionResolution` value object - - `ProviderOperationStartGate` — canonical dispatch-time gate, correct Gen 2 pattern. Handles 3 operation types: `provider.connection.check`, `inventory_sync`, `compliance.snapshot` - - `MicrosoftGraphOptionsResolver` — legacy bridge (32 lines), target for deprecation. Calls `resolveDefault()` internally, hides connection identity - - `ProviderGateway` — lower-level primitive, builds graph options from explicit connection. Correct, keep as-is - - `ProviderIdentityResolver` — resolves identity (platform vs dedicated) from connection. Correct, keep as-is - - Partial unique index on `provider_connections`: `(tenant_id, provider) WHERE is_default = true` -- **Out of scope**: UX label changes, UI banners, legacy credential field removal (those are separate candidates below) -- **Dependencies**: None — this is the foundational refactor -- **Related specs**: Spec 081 (Tenant credential migration CI guardrails), Spec 088 (provider connection model), Spec 089 (provider gateway), Spec 137 (data-layer provider prep) -- **Priority**: high - -### Provider Connection UX Clarity -- **Type**: polish -- **Source**: architecture audit – provider connection resolution analysis -- **Problem**: The operator-facing language and information architecture around provider connections creates confusion about why a "default" connection is required, what happens when it's missing, and when actions are tenant-wide vs connection-scoped. Specific issues: (1) "Set as Default" is misleading — it implies preference, but the connection is actually the canonical operational identity; (2) missing-default errors surface as blocked `OperationRun` records or exceptions, but there is no proactive banner/hint on the tenant or connection pages; (3) action labels don't distinguish tenant-wide operations (verify, sync) from connection-scoped operations (health check, test); (4) the singleton auto-promotion (first connection becomes default automatically) is invisible — operators don't understand why their first connection was special. -- **Why it matters**: Reduces support friction and operator confusion. Enterprise operators managing multiple tenants need clear, predictable language about connection lifecycle. The current UX makes the correct architecture feel like a bug ("why do I need a default?"). -- **Proposed direction**: - - Rename "Set as Default" → "Promote to Primary" (or "Set as Primary Connection") across all surfaces - - Add a missing-primary-connection banner on tenant detail / connection list when no default exists — with a direct "Promote" action - - Distinguish action labels: tenant-wide actions ("Sync Tenant", "Verify Tenant") vs connection-scoped actions ("Check Connection Health", "Test Connection") - - Improve blocked-notification copy: instead of generic "provider connection required", show "No primary connection configured for [Provider]. Promote a connection to continue." - - Show a transient success notification when auto-promotion happens on first connection creation ("This connection was automatically set as primary because it's the first for this provider") - - Consider an info tooltip or help text explaining the primary connection concept on the connection resource pages -- **Key surfaces to update**: `ProviderConnectionResource` (row actions, header actions, table empty state), `TenantResource` (verify action, connection tab), onboarding wizard consent step, `ProviderNextStepsRegistry` remediation links, notification templates for blocked operations -- **Auto-default creation locations** (4 places, need UX feedback): `CreateProviderConnection` action, `TenantOnboardingController`, `AdminConsentCallbackController`, `ManagedTenantOnboardingWizard` -- **Out of scope**: Backend resolution refactoring (that's the normalization candidate above), legacy field removal -- **Dependencies**: Soft dependency on "Provider Connection Resolution Normalization" — UX improvements are more coherent when the backend consistently uses explicit connections, but many label/banner changes can proceed independently -- **Related specs**: Spec 061 (provider connection UX), Spec 088 (provider connection model) -- **Priority**: medium - -### Provider Connection Verification Truth Contract -- **Status**: Candidate -- **Suggested release**: R1 / Platform Hardening before customer-facing onboarding -- **Area**: Provider Connections, Managed Tenant Onboarding, Verification, Admin Consent, Required Permissions Assist -- **Priority**: High -- **Risk**: Medium -- **Type**: Architecture + UX consistency + state model hardening - -#### Problem - -Managed Tenant Onboarding currently composes several different truth sources into one visible verification surface: - -- provider connection consent status -- provider connection verification status -- operation run status/outcome -- embedded verification report -- required permissions diagnostics -- stored tenant permission inventory -- previous verification report fingerprint comparison -- onboarding session / draft state - -This separation is domain-correct, but the UI currently exposes the composition in a way that can become decisionally inconsistent for an operator. - -Example symptom cluster: - -- Admin consent is granted. -- Verification state is reset to unknown / needs verification. -- Wizard still shows a blocked verification state after rerun. -- Required Permissions Assist shows missing application permissions. -- Stored permission data says it needs refresh. -- Change indicator says no changes since previous verification. -- Old/stored diagnostics can appear next to current onboarding actions without a clear freshness contract. - -The core issue is not that consent, verification, and operational readiness are separate. That separation is correct. The issue is that the onboarding UI does not consistently project these separate truths into one canonical decision state. - -#### Goal - -Introduce a clear truth contract for Provider Connection verification surfaces so that the user always sees one primary, current decision state, while historical/stored diagnostics remain available but are clearly marked as such. - -The system must clearly distinguish: - -- Consent truth -- Verification execution truth -- Operational readiness truth -- Stored diagnostics truth -- Onboarding draft truth -- Historical comparison truth - -#### Non-Goal - -This spec must not redesign provider connections globally beyond what is required for onboarding and verification truth consistency. - -This spec must not change Microsoft Graph permission requirements. - -This spec must not remove historical verification reports or stored diagnostics. - -This spec must not introduce live external calls from read-only report views. - -This spec must not implement multi-cloud provider abstraction work. - -#### Domain Principles - -##### Principle 1: Consent is not readiness - -`consent_status = granted` only means that admin consent was granted. - -It does not mean: - -- the connection is healthy -- the app has all required permissions -- the tenant is operationally ready -- inventory/backup/restore operations can safely run - -##### Principle 2: Verification is a current decision state - -The onboarding wizard must present exactly one primary verification decision state at a time. - -Allowed canonical onboarding verification states: - -- `not_started` -- `needs_verification` -- `verifying` -- `ready` -- `needs_attention` -- `blocked` -- `stale_after_connection_change` -- `stale_after_consent_change` -- `verification_unavailable` - -##### Principle 3: Stored diagnostics are not current truth unless fresh - -Stored required-permission diagnostics may be displayed, but they must be visibly classified as one of: - -- fresh diagnostics -- stale diagnostics -- historical diagnostics -- unavailable diagnostics - -Stale stored diagnostics must not look like current hard blockers. - -##### Principle 4: Historical comparison is not current readiness - -A message like “No changes since previous verification” is only a historical report comparison. - -It must not be presented as a current readiness signal when: - -- consent changed after the compared report -- provider connection changed after the compared report -- permission inventory is stale -- the current verification run could not refresh permissions -- the current onboarding draft was invalidated - -##### Principle 5: One primary decision truth per screen - -The onboarding wizard must show one primary state and one primary next action. - -Secondary diagnostics may be shown below the primary decision but must not compete with it. - -#### Current Risk - -The current behavior can make the product feel unreliable during one of the most important trust moments: tenant onboarding. - -Bad operator interpretation examples: - -- “Consent succeeded, so why is verification unknown?” -- “I reran verification, so why does it still show stored diagnostics?” -- “Are these missing permissions real or old?” -- “Does ‘No changes’ mean everything is fine?” -- “Is the tenant blocked because of current Graph state or because the app is using old data?” - -For an enterprise SaaS governance platform, this is a trust issue. - -#### Desired UX Model - -##### After successful admin consent callback - -Primary state: - -```text -Consent granted -Verification required -``` - -Do not show raw: - -```text -Verification state: Unknown -``` - -Preferred label: - -```text -Verification state: Needs verification -``` - -CTA: - -```text -Run verification -``` - -Secondary explanation: - -```text -Admin consent was granted. Run verification to confirm operational readiness. -``` - -##### Onboarding wizard after consent changed but before rerun - -Primary state: - -```text -Verification required -``` - -or - -```text -Stale after consent change -``` - -Primary explanation: - -```text -Admin consent changed after the last verification. Run verification again to confirm current readiness. -``` - -Old report handling: - -- old report may be visible under `Previous verification` -- old blockers must not appear as the current blocking reason -- old required permissions must be marked historical/stale - -##### Onboarding wizard during verification - -Primary state: - -```text -Verifying -``` - -Primary explanation: - -```text -A verification operation is running. -``` - -Old diagnostics: - -- can be hidden by default -- or shown under `Previous diagnostics` with reduced visual priority - -##### Onboarding wizard after fresh verification passes - -Primary state: - -```text -Ready -``` - -Primary explanation: - -```text -Connection verified. Tenant is ready for onboarding continuation. -``` - -Required permissions assist: - -- hidden unless warnings exist - -##### Onboarding wizard after fresh verification finds missing permissions - -Primary state: - -```text -Blocked -``` - -Primary explanation: - -```text -Required application permissions are missing. -``` - -Required Permissions Assist: - -- shown as fresh/current -- missing permissions can block progression -- copy/handoff actions available - -##### Onboarding wizard after verification cannot refresh permission inventory - -Primary state: - -```text -Needs attention -``` - -or - -```text -Verification incomplete -``` - -Primary explanation: - -```text -Verification ran, but permission inventory could not be refreshed. Stored diagnostics may be outdated. -``` - -Required Permissions Assist: - -- stale stored diagnostics may be shown -- must not be styled as fresh current blockers -- primary action remains rerun/fix connection access - -#### Functional Requirements - -##### FR-1: Canonical onboarding verification decision state - -Introduce a canonical decision projection for onboarding verification. - -The projection must consume: - -- selected provider connection -- provider connection consent status -- provider connection verification status -- onboarding session state -- latest relevant verification run -- verification report overall -- report freshness metadata -- permission diagnostics freshness -- connection/consent changed markers - -The projection must return: - -- canonical state -- primary label -- severity/color -- primary explanation -- primary action -- whether user may proceed -- whether required permissions assist is current, stale, historical, or hidden -- whether previous verification report may be shown -- whether change indicator is safe to show prominently - -##### FR-2: No raw Unknown in customer/operator-facing callback UI - -The admin consent callback page must not show raw `Unknown` as a user-facing verification state after successful consent. - -It must display: - -```text -Needs verification -``` - -or equivalent. - -The stored enum may remain `Unknown` if that is the domain model, but the UI label must reflect user intent. - -##### FR-3: Consent change invalidates current verification decision - -After successful admin consent: - -- active onboarding draft must be marked as changed after consent -- existing current verification run pointer must be invalidated -- old report must no longer be presented as current verification truth -- previous diagnostics may remain accessible only as historical/stale context - -Required flag or equivalent: - -```text -consent_recently_updated = true -``` - -or reuse/extend: - -```text -connection_recently_updated = true -``` - -The important requirement is semantic clarity: consent update and general connection edit may need distinct reason codes. - -Suggested reason codes: - -- `consent_changed_after_verification` -- `connection_changed_after_verification` -- `verification_required_after_consent` -- `verification_required_after_connection_update` - -##### FR-4: Required Permissions Assist freshness classification - -Required Permissions Assist must expose a freshness classification: - -```text -fresh -stale -historical -unknown -unavailable -``` - -The assist must not only say “Stored permission diagnostics show blockers.” - -It must say whether those blockers are: - -- from the current verification -- from stored diagnostics before the latest consent change -- from stored diagnostics that could not be refreshed -- from a previous verification run - -##### FR-5: Stale diagnostics must not be hard current blockers - -If missing permissions are known only from stale stored diagnostics, the UI must not present them as current confirmed blockers. - -Allowed state: - -```text -Needs attention -``` - -Preferred copy: - -```text -Stored permission diagnostics show previous blockers, but they need refresh before they can be treated as current. -``` - -Not allowed as primary current truth: - -```text -Blocked -Missing permissions -``` - -unless the current verification report confirms the missing permissions with fresh diagnostics. - -##### FR-6: Current blocking state requires current evidence - -The wizard may show `Blocked` only if one of these is true: - -1. Current verification run report has `overall = blocked` and the blocking evidence is current/fresh. -2. Current operation was blocked by execution/legitimacy/preflight checks. -3. Current provider connection state explicitly projects to non-operable due to consent/identity/credential failure. -4. Current verification report confirms required permissions are missing using fresh permission inventory. - -If the system only has stale permission evidence, the state must be `needs_attention`, not `blocked`. - -##### FR-7: Change indicator must be context-aware - -The report change indicator must not show “No changes since previous verification” as a prominent current signal when: - -- consent changed after the previous verification -- provider connection changed after the previous verification -- current permission inventory is stale -- current verification is required but not yet run -- latest run did not refresh permission inventory - -Alternative labels: - -```text -Previous result unchanged -``` - -```text -Historical comparison only -``` - -```text -Comparison unavailable after consent change -``` - -##### FR-8: Verification start paths must be consistent - -The onboarding wizard verification start path and the general StartVerification service must have consistent state transitions. - -At minimum, starting verification from onboarding must set/reflect: - -- operation run queued/running -- connection verification pending/verifying -- onboarding decision state verifying -- stale flags cleared only when appropriate - -No surface should start a verification operation without the canonical status projection knowing that verification is in progress. - -##### FR-9: Read-only report views must remain read-only - -Existing report views may continue to state: - -```text -Read-only: this view uses stored data and makes no external calls. -``` - -But the UI must make clear whether the stored data is: - -- current result -- previous result -- stale result -- diagnostic-only result - -##### FR-10: Proceed gating must use canonical decision state - -The onboarding wizard’s Next/Complete eligibility must be based on the canonical decision state. - -Allowed to proceed: - -- `ready` -- optionally `needs_attention` if explicitly defined as non-blocking - -Not allowed to proceed: - -- `not_started` -- `needs_verification` -- `verifying` -- `blocked` -- `stale_after_connection_change` -- `stale_after_consent_change` -- `verification_unavailable` - -#### UX Requirements - -##### UX-1: Primary state block - -The verification step should have a single primary state block. - -Example: - -```text -Verification required -Admin consent was granted after the last verification. Run verification again to confirm operational readiness. -[Start verification] -``` - -##### UX-2: Historical diagnostics section - -Old reports and old permission diagnostics must be moved under a visually secondary section: - -```text -Previous verification diagnostics -``` - -or - -```text -Stored diagnostics -``` - -With explicit freshness label: - -```text -Historical -Stale -Current -``` - -##### UX-3: Required permissions copy - -Current fresh blocker: - -```text -Required permissions missing -Verification confirmed that required application permissions are missing. -``` - -Stale blocker: - -```text -Stored permission diagnostics need refresh -Previous diagnostics show missing permissions, but they were not refreshed after the latest consent or connection change. -``` - -Unable to refresh: - -```text -Permission inventory could not be refreshed -The system could not confirm the current permission state. Fix the connection issue and run verification again. -``` - -##### UX-4: Callback page wording - -Replace confusing callback wording: - -```text -Verification state: Unknown -``` - -With: - -```text -Verification state: Needs verification -``` - -Optional explanation: - -```text -Consent was granted, but the connection has not been verified since this consent change. -``` - -##### UX-5: No false calmness - -Do not show “No changes since previous verification” in a way that suggests current readiness. - -If shown at all after consent/connection changes, label it: - -```text -Previous report comparison: no changes -``` - -and place it below the primary current decision. - -#### Technical Requirements - -##### TR-1: Introduce or consolidate a ViewModel - -Create or consolidate a dedicated onboarding verification decision ViewModel. - -Suggested name: - -```php -OnboardingVerificationDecision -``` - -or - -```php -ProviderConnectionVerificationDecision -``` - -Suggested fields: - -```php -state -label -description -severity -canProceed -primaryAction -reportMode -assistMode -diagnosticsFreshness -isCurrent -isHistorical -staleReason -blockingReason -sourceRunId -sourceReportId -sourceConnectionId -``` - -##### TR-2: Explicit freshness metadata - -Freshness should not be inferred only from UI copy. - -Suggested value object: - -```php -VerificationFreshness -``` - -Possible values: - -```text -current -stale_after_consent_change -stale_after_connection_change -stale_permission_inventory -historical -unknown -unavailable -``` - -##### TR-3: Required Permissions Assist mode - -The assist builder should return a mode: - -```text -hidden -current_blocker -current_attention -stale_diagnostics -historical_diagnostics -unavailable -``` - -##### TR-4: Report comparison guard - -The change indicator must receive enough context to decide whether it is safe to present the comparison prominently. - -Inputs may include: - -- consent_changed_after_previous_report -- connection_changed_after_previous_report -- permission_inventory_fresh -- current_run_id -- previous_report_id - -##### TR-5: State transition normalization - -Normalize verification start state across: - -- onboarding wizard action -- general StartVerification service -- queued operation run service -- provider connection health job - -There should not be one path that sets ProviderConnection to Pending and another path that only stores a run id in the onboarding session. - -##### TR-6: No destructive cleanup of history - -Do not delete historical OperationRuns, historical VerificationReports, or stored TenantPermissions simply because consent changed. - -Instead, classify their relevance. - -#### Acceptance Criteria - -##### AC-1: Consent callback wording - -Given admin consent succeeds -When the callback page is shown -Then the page shows: - -```text -Verification state: Needs verification -``` - -And does not show: - -```text -Verification state: Unknown -``` - -##### AC-2: Draft invalidated after consent - -Given an onboarding draft has a completed verification run -When admin consent succeeds again for the same connection -Then the onboarding draft no longer treats the previous verification run as current -And the wizard primary state is `needs_verification` or `stale_after_consent_change`. - -##### AC-3: Historical report remains accessible - -Given a previous verification report exists -When consent changes -Then the previous report may still be visible -But it is labelled as previous/historical/stale -And it is not used as current readiness truth. - -##### AC-4: Fresh missing permissions block - -Given a current verification run refreshes permission inventory successfully -And required application permissions are missing -When the wizard renders -Then the primary state is `blocked` -And Required Permissions Assist is labelled current/fresh. - -##### AC-5: Stale missing permissions do not hard block as current - -Given missing permissions exist only in stale stored diagnostics -And current verification did not refresh permission inventory -When the wizard renders -Then the primary state is `needs_attention` or `verification_incomplete` -And not `blocked`, unless another current blocker exists. - -##### AC-6: Change indicator is demoted after consent change - -Given a previous verification report has the same fingerprint -And consent changed after that report -When the wizard renders -Then “No changes since previous verification” is not shown as a primary current signal. - -##### AC-7: Start verification projects pending state consistently - -Given verification is started from onboarding -When the operation is queued/running -Then the wizard shows `verifying` -And the provider connection state is consistent with the general verification start path. - -##### AC-8: Proceed gating follows canonical state - -Given canonical verification state is blocked/stale/needs verification -Then the wizard cannot proceed to completion. - -Given canonical verification state is ready -Then the wizard can proceed. - -#### Required Tests - -##### Callback UX tests - -- Successful admin consent renders `Needs verification`, not raw `Unknown`. -- Failed admin consent still renders meaningful failure state. -- Consent callback marks affected onboarding draft as needing verification. -- Consent callback does not delete historical operation runs. - -##### Connection state transition tests - -- Granted consent resets verification to unverified/needs verification. -- Starting verification sets pending/verifying consistently across entry points. -- Successful health check projects healthy/ready. -- Missing consent projects required/revoked/blocked correctly. -- Missing credentials or invalid identity project blocked/error correctly. - -##### Onboarding stale invalidation tests - -- Existing completed verification becomes stale after consent change. -- Existing completed verification becomes stale after connection edit. -- Wizard shows one primary current state after invalidation. -- Previous reports are shown only as historical diagnostics after invalidation. - -##### Verification rerun after consent change tests - -- Rerun after consent clears stale-after-consent state. -- Rerun with fresh complete permissions shows ready. -- Rerun with fresh missing permissions shows blocked. -- Rerun unable to refresh permissions shows needs_attention/incomplete, not fresh blocked. - -##### Required Permissions Assist freshness tests - -- Fresh missing app permissions produce `current_blocker` assist. -- Stale stored missing app permissions produce `stale_diagnostics` assist. -- Previous diagnostics after consent change are labelled historical/stale. -- Stored diagnostics cannot override a current ready verification state. -- Assist does not imply current missing permissions when inventory freshness is false. - -##### Change indicator tests - -- No changes indicator appears only when comparison is valid and current. -- After consent change, comparison is hidden or labelled historical. -- After connection change, comparison is hidden or labelled historical. -- If permission inventory is stale, comparison is not presented as readiness evidence. - -#### Suggested Implementation Slices - -##### Slice 1: Callback wording and tests - -Smallest safe improvement. - -- Ensure callback page never renders raw `Unknown`. -- Keep stored enum unchanged. -- Add/adjust tests. - -##### Slice 2: Canonical decision ViewModel - -Introduce the central projection object for onboarding verification. - -- Move wizard status derivation into one explicit class/service. -- Encode decision precedence. -- Add unit tests for all canonical states. - -##### Slice 3: Required Permissions Assist freshness contract - -Extend assist builder with explicit mode/freshness. - -- Fresh current blocker -- Stale diagnostics -- Historical diagnostics -- Unavailable diagnostics - -##### Slice 4: Change indicator demotion - -Make report comparison context-aware. - -- Consent changed -- Connection changed -- Permission inventory stale -- Current run missing/incomplete - -##### Slice 5: Verification start path normalization - -Align onboarding verification start with general StartVerification semantics. - -- Pending/verifying projection consistent -- No hidden divergent writer paths - -#### Open Questions - -1. Should `Unknown` remain the persisted enum value, or should a more explicit enum be introduced? - -Possible new enum values: - -```text -needs_verification -unverified_after_consent_change -unverified_after_connection_change -``` - -2. Should consent changes and connection edits use separate stale flags? - -Possible flags: - -```text -consent_recently_updated -connection_recently_updated -``` - -or a single structured stale reason: - -```text -verification_stale_reason = consent_changed | connection_changed | permissions_stale -``` - -3. Should stale stored missing permissions ever block onboarding? - -Recommended answer: no, unless there is a separate current blocker. - -4. Should Required Permissions Assist be driven only from current verification report during onboarding? - -Recommended answer: mostly yes. Stored tenant permission diagnostics can supplement, but must not override the current decision state. - -5. Should OperationRun outcome and VerificationReport overall remain separate? - -Recommended answer: yes. - -Reason: - -- OperationRun outcome = execution truth -- VerificationReport overall = readiness truth - -But the UI must explain this separation cleanly. - -#### Definition of Done - -- Admin consent callback no longer exposes raw `Unknown` to operators. -- Onboarding verification step shows exactly one primary decision state. -- Old reports and stored diagnostics are clearly labelled as previous/stale/historical. -- Required Permissions Assist distinguishes fresh blockers from stale diagnostics. -- Change indicator does not provide false calmness after consent/connection changes. -- Verification start state is consistent between onboarding and general verification entry points. -- Proceed gating is based on canonical onboarding verification decision. -- Regression tests cover callback UX, stale invalidation, rerun behavior, assist freshness, and change indicator context. -- No historical reports or permission records are destructively deleted as part of this cleanup. -- Read-only report views remain read-only and do not trigger external calls. - -#### Recommended Decision - -This should not be treated as a tiny wording bug. - -The proper fix is a Flow-/State-Fix with a small architecture cleanup: - -```text -Consent truth ≠ Verification truth ≠ Operational readiness truth -``` - -But: - -```text -Onboarding UI must project them into one clear current decision truth. -``` - -Recommended priority: high before customer-facing onboarding, because this flow is a trust-critical first-run experience. - -### Provider Connection Legacy Cleanup -- **Type**: hardening -- **Source**: architecture audit – provider connection resolution analysis -- **Problem**: After normalization is complete, several legacy artifacts remain: (1) `MicrosoftGraphOptionsResolver` — a 32-line convenience bridge that exists only because ~20 services haven't been updated to use explicit connections; (2) service-internal `resolveProviderConnection()` methods in `RestoreService` (line 2913), `InventorySyncService` (line 730), and similar — these are local resolution logic that should not exist once services receive explicit connections; (3) `Tenant` model legacy credential accessors (`app_client_id`, `app_client_secret` fields) — `graphOptions()` already throws `BadMethodCallException`, but the fields and accessors remain; (4) `migration_review_required` flag on `ProviderConnection` — used during the credential migration from tenant-level to connection-level, should be retired once all tenants are migrated. -- **Why it matters**: Dead code increases cognitive load and creates false affordances. New developers may use `MicrosoftGraphOptionsResolver` or internal resolution methods thinking they're the correct pattern. Legacy credential fields on `Tenant` suggest credentials still live there. Cleaning up after normalization makes the correct architecture self-documenting. -- **Proposed direction**: - - Remove `MicrosoftGraphOptionsResolver` class entirely (after normalization ensures zero callers) - - Remove all service-internal `resolveProviderConnection()` / `resolveDefault()` methods - - Remove legacy credential fields from `Tenant` model (migration to drop columns, update factory, update tests) - - Evaluate `migration_review_required` — if all tenants have migrated, remove the flag and related UI (banner, filter) - - Update CI guardrails: `NoLegacyTenantGraphOptionsTest` and `NoTenantCredentialRuntimeReadsSpec081Test` can be simplified or removed once the code they guard against is gone - - Verify no seeders, factories, or test helpers reference legacy patterns -- **Out of scope**: Any new features — this is pure cleanup -- **Dependencies**: Hard dependency on "Provider Connection Resolution Normalization" — cleanup cannot proceed until all callers are migrated -- **Related specs**: Spec 081 (credential migration guardrails), Spec 088 (provider connection model), Spec 137 (data-layer provider prep) -- **Priority**: medium (deferred until normalization is complete) - -> Repository cleanup strand from the strict read-only legacy audit 2026-04-22: -> 1. **Dead Transitional Residue Cleanup** -> 2. **Onboarding State Fallback Retirement** -> 3. **Canonical Operation Type Source of Truth** -> -> The first two candidates remove dead or weakly justified compatibility residue. The third resolves the remaining core semantic conflict that still spans persistence, registries, resources, specs, and tests. - -### Dead Transitional Residue Cleanup -- **Type**: hardening / cleanup -- **Source**: strict read-only legacy / compatibility audit 2026-04-22; orphaned-truth residue review -- **Absorbs / broadens**: the earlier `Tenant App Status False-Truth Removal` slice plus adjacent dead-symbol cleanup -- **Problem**: The repo still contains smaller transitional residues that no longer carry active product semantics but still survive in code, badges, factories, fixtures, and tests. Confirmed examples include unused deprecated `BaselineProfile::STATUS_*` constants and orphaned tenant app-status residue that now mainly persists as badge, factory, fixture, and test conservat. -- **Why it matters**: Each residue is small, but together they blur the real domain language, preserve dead semantics in tests, and make later cleanup harder because it is no longer obvious which symbols are still authoritative. -- **Goal**: Remove dead transitional residue that no longer drives runtime, UI, filter, cast, or API behavior, and clean up associated tests, fixtures, and factories in the same change. -- **In scope**: - - remove unused deprecated `BaselineProfile::STATUS_*` constants - - remove orphaned tenant app-status badge, factory, fixture, and test residue - - verify that no hidden runtime, UI, filter, cast, or API dependency still exists before removal - - document the remaining active domain language after cleanup -- **Out of scope**: operation-type dual semantics, onboarding state fallbacks, provider identity or migration review, Baseline Scope V2, and spec-backed legacy redirect paths. -- **Key requirements**: - - dead deprecated constants must be removed when no productive reference remains - - orphaned badge, status, factory, and fixture residue must not survive as silent compatibility lore - - cleanup must include tests and fixtures in the same change - - removal must prove there is no hidden runtime, UI, filter, cast, or API dependency - - the remaining canonical domain language must be clearer after cleanup -- **Acceptance characteristics**: - - deprecated `BaselineProfile::STATUS_*` constants are gone - - tenant app-status residue is removed or reduced to explicitly justified boundary-only remnants - - no productive references to removed symbols remain - - tests no longer conserve dead semantics -- **Boundary with Provider Connection Legacy Cleanup**: provider connection cleanup owns still-legitimate or spec-bound provider transitional paths. This candidate only removes dead residue with no active product role. -- **Strategic sequencing**: first step of the repository cleanup strand. -- **Priority**: high - -### Onboarding State Fallback Retirement -- **Type**: hardening / cleanup -- **Source**: strict read-only legacy / compatibility audit 2026-04-22; onboarding state-key audit -- **Problem**: Onboarding still carries mixed old and new state keys and service-level fallback reads between older fields and newer canonical fields. Some keys still have distinct roles, such as mutable selector state versus trusted persisted state, but others now appear to survive only as historical fallback. -- **Why it matters**: In a pre-production repo, silent fallback between state classes keeps semantic boundaries fuzzy and makes future trusted-state hardening harder. New work can accidentally bind to retired keys because the service layer still tolerates them. -- **Goal**: Retire pure onboarding fallback keys and make the remaining split between selector state and trusted persisted state explicit. -- **In scope**: - - audit and retire pure fallback keys such as `verification_run_id` and `bootstrap_run_ids` if no current contract still needs them - - remove corresponding fallback reads in onboarding services - - align contracts and tests to the remaining active key language - - document which onboarding keys remain active and why -- **Out of scope**: removing `selected_provider_connection_id` while it still has an active contract role, provider identity or migration review, and generic session or trusted-state architecture redesign. -- **Key requirements**: - - onboarding keys with no active contractual role must be removed when they survive only as fallback - - selector state and trusted state must be semantically separated - - silent fallback between semantically different state classes must not persist without an explicit current contract - - specs, contracts, and service read behavior must converge on the same remaining keys - - tests must stop conserving retired fallback fields -- **Risks / open questions**: - - `selected_provider_connection_id` still appears in current contracts and should not be treated as dead residue by default - - some onboarding keys may require contract cleanup before code cleanup can be completed cleanly -- **Strategic sequencing**: second step of the repository cleanup strand, after `Dead Transitional Residue Cleanup` and before `Canonical Operation Type Source of Truth`. -- **Priority**: high - -### Provider Connection Status Vocabulary Cutover -- **Type**: hardening -- **Source**: legacy / orphaned truth audit 2026-03-16 -- **Classification**: bounded cutover -- **Problem**: `ProviderConnection` currently exposes overlapping status vocabularies across `status`, `health_status`, `consent_status`, and `verification_status`. Resources, badges, and filters can read both projected legacy state and canonical enum state, creating drift and operator ambiguity. -- **Why it matters**: This is duplicate status truth on an operator-facing surface. It also leaves the system vulnerable to projector drift if legacy projected fields stop matching the enum source of truth. -- **Target model**: `ProviderConnection` -- **Canonical source of truth**: `ProviderConnection.consent_status` and `ProviderConnection.verification_status` -- **Must stop being read**: `ProviderConnection.status` and `ProviderConnection.health_status` in resources, filters, badges, and any operator-facing status summaries. -- **Can be removed immediately**: - - new operator-facing reads of legacy varchar status fields - - new badge/filter logic that depends on normalized legacy values -- **Remove only after cutover**: - - `status` and `health_status` columns - - projector persistence of those fields, if still retained for compatibility - - legacy badge normalization paths -- **Migration / backfill**: No data backfill if enum columns are already complete. Requires a later schema cleanup migration to drop legacy varchar columns after all reads are migrated. -- **UI / resource / policy / test impact**: - - UI/resources: `ProviderConnectionResource` and related badges/filters move to one coherent operator vocabulary - - Policy: none directly - - Tests: add exhaustive projection and badge mapping coverage during the transition; update resource/filter assertions to enum-driven behavior -- **Scope boundaries**: - - In scope: provider connection status fields, display semantics, badge/filter vocabulary, deprecation path for projected columns - - Out of scope: tenant credential migration, provider onboarding flow redesign, unrelated badge cleanup elsewhere -- **Dependencies**: Confirm all hidden read paths outside the main resource and define the operator-facing enum presentation. -- **Risks**: Medium rollout risk. Filters, badges, and operator language change together, and hidden reads may exist outside the primary resource. -- **Why it should be its own spec**: This is a self-contained source-of-truth cutover on one model. It is too important and too operationally visible to bury inside a generic provider cleanup spec. -- **Priority**: high - -### Tenant Legacy Credential Source Decommission -- **Type**: hardening -- **Source**: legacy / orphaned truth audit 2026-03-16 -- **Classification**: staged migration -- **Problem**: Tenant-level credential fields remain in the data model after ProviderCredential became the canonical identity store. They are still used for migration classification and are kept artificially alive by factory defaults, which obscures the real architecture and prolongs the cutover. -- **Why it matters**: This is an incomplete architectural cutover around sensitive identity data. The system needs an explicit end-state where runtime credential resolution no longer depends on tenant legacy fields. -- **Target model**: `Tenant`, with `ProviderCredential` as the destination canonical model -- **Canonical source of truth**: `ProviderCredential.client_id` and `ProviderCredential.client_secret` -- **Must stop being read**: tenant legacy credential fields in normal runtime credential resolution. Transitional reads remain allowed only inside migration-classification paths until exit criteria are met. -- **Can be removed immediately**: - - factory defaults that populate legacy tenant credentials by default - - any non-classification runtime reads if discovered during spec work - - UI affordances that imply tenant-stored credentials are active -- **Remove only after cutover**: - - `Tenant.app_client_id`, `Tenant.app_client_secret`, `Tenant.app_certificate_thumbprint` - - migration-classification reads and related transitional guardrails -- **Migration / backfill**: Requires explicit completion criteria for the tenant-to-provider credential migration. No blind backfill; removal should follow confirmed migration review state for all affected tenants. -- **UI / resource / policy / test impact**: - - UI/resources: remove any residual legacy credential messaging once the cutover is complete - - Policy: none directly - - Tests: `TenantFactory` must stop creating legacy credentials by default; transition-only tests should use explicit legacy states -- **Scope boundaries**: - - In scope: tenant legacy credential fields, classification-only transition reads, factory/test cleanup tied to the cutover - - Out of scope: provider connection status vocabulary, unrelated tenant stale fields, onboarding UX redesign -- **Dependencies**: Hard dependency on the provider credential migration/review lifecycle being complete enough to identify all remaining transitional tenants safely. -- **Risks**: Higher rollout risk than simple cleanup because this touches credential-path architecture and transitional data needed for migration review. -- **Why it should be its own spec**: This has distinct exit criteria, migration gating, and rollback concerns. It is not the same problem as stale operator-facing badges or provider status vocabulary cleanup. -- **Priority**: high - -### Entra Group Authorization Capability Alignment -- **Type**: hardening -- **Source**: legacy / orphaned truth audit 2026-03-16 -- **Classification**: bounded cutover -- **Problem**: `EntraGroupPolicy` currently grants read access based on tenant access alone and bypasses the capability layer used by the rest of the repo's authorization model. -- **Why it matters**: This is a security- and RBAC-relevant inconsistency. Even if currently read-only, it weakens the capability-first architecture and increases the chance of future authorization drift. -- **Target model**: `EntraGroupPolicy` and the Entra group read-access surface -- **Canonical source of truth**: capability-based authorization decisions layered on top of tenant-access checks -- **Must stop being read**: implicit "tenant access alone is sufficient" as the effective rule for Entra group read access. -- **Can be removed immediately**: - - the direct bypass if the correct capability already exists and seeded roles already carry it -- **Remove only after cutover**: - - any compatibility allowances needed while role-capability mappings are updated and verified -- **Migration / backfill**: Usually no schema migration. May require role-capability seeding updates or RBAC backfill so intended operators retain access. -- **UI / resource / policy / test impact**: - - UI/resources: some users may lose access if role mapping is incomplete; tenant-facing Entra group screens need regression verification - - Policy: this spec is the policy change - - Tests: add authorization matrix coverage proving tenant access alone no longer grants read access -- **Scope boundaries**: - - In scope: read authorization semantics for Entra group surfaces and the required capability mapping - - Out of scope: new CRUD semantics, role mapping product UI, unrelated policy tidy-up -- **Dependencies**: Choose the correct capability and verify seeded/default roles include it where intended. -- **Risks**: Medium rollout risk because authorization mistakes become access regressions for legitimate operators. -- **Why it should be its own spec**: This is a targeted RBAC hardening change with its own stakeholders, rollout checks, and regression matrix. It should not be hidden inside data or UI cleanup work. -- **Priority**: high - -### Support Intake with Context (MVP) -- **Type**: feature -- **Source**: Product design, operator feedback -- **Problem**: Nutzer haben keinen strukturierten Weg, Probleme direkt aus dem Produkt zu melden. Bei technischen Fehlern fehlen Run-/Tenant-/Provider-Details; bei Access-/UX-Problemen fehlen Route-/RBAC-Kontext. Folge: ineffiziente Support-Schleifen und Rückfragen. Ein vollwertiges Ticketsystem ist falsch priorisiert. -- **Why it matters**: Reduziert Support-Reibung, erhöht Erfassungsqualität, steigert wahrgenommene Produktreife. Schafft typed intake layer für spätere Webhook-/PSA-/Ticketing-Erweiterungen, ohne jetzt ein Helpdesk einzuführen. -- **Proposed direction**: Neues `SupportRequest`-Modell (kein Ticket/Case) mit `source_type` (operation_run, provider_connection, access_denied, generic) und `issue_kind` (technical_problem, access_problem, ux_feedback, other). Drei Entry Paths: (1) Context-bound aus failed OperationRun, (2) Access-Denied/403-Kontext, (3) generischer Feedback-Einstieg (User-Menü). Automatischer Context-Snapshot per `SupportRequestContextBuilder` je source_type. Persistierung vor Delivery. E-Mail-Delivery an konfigurierte Support-Adresse. Fingerprint-basierter Spam-Guard. Audit-Events. RBAC via `support.request.create` Capability. Scope-Isolation. Secret-Redaction in context_jsonb. -- **Dependencies**: OperationRun-Domain stabil, RBAC/Capability-System (066+), Workspace-/Tenant-Scoping -- **Priority**: medium - -### Policy Setting Explorer — Reverse Lookup for Tenant Configuration -- **Type**: feature -- **Source**: recurring enterprise pain point, governance/troubleshooting gap -- **Problem**: In medium-to-large Intune tenants with dozens of policy types and hundreds of policies, admins routinely face the question: "Where is this setting actually defined?" Examples: "Which policy configures BitLocker?", "Where is `EnableTPM` set to `true`?", "Why does this tenant enforce a specific firewall rule, and which policy is the source?" Today, answering this requires manually opening policies one by one across device configuration, compliance, endpoint security, admin templates, settings catalog, and more. TenantPilot inventories and versions these policies but provides no reverse-lookup surface that maps a setting name, key, or value back to the policies that explicitly define it. -- **Why it matters**: This is a governance, troubleshooting, and explainability gap — not a search convenience. Enterprise admins, auditors, and reviewers need authoritative answers to "where is X defined?" for incident triage, change review, compliance evidence, and duplicate detection. Without it, TenantPilot has deep policy data but cannot surface it from the operator's natural entry point (the setting, not the policy). This capability directly increases the product's value proposition for security reviews, audit preparation, and day-to-day configuration governance. -- **V1 scope**: - - Tenant-scoped only. User queries settings within the active tenant's indexed policies. No cross-tenant or portfolio-wide search in V1. - - Dedicated working surface: a tenant-level "Policy Explorer" or "Setting Search" page with query input, filters, and structured result inspection. Not a global header search widget. - - Query modes: search by setting name/label, by raw key/path, or by value-oriented query (e.g. `EnableTPM = true`). - - Results display: policy name, policy type/family, setting label/path/key, configured value, version/snapshot context, deep link to the policy detail or version inspector. - - Supported policy families: start with a curated subset of high-value indexed families (settings catalog, device configuration, compliance, endpoint security baselines, admin templates). Not every Microsoft policy type from day one. - - Search projection model: a lightweight extracted-setting-facts table per supported policy family. Preserves policy-family-local structure, retains raw path/key, stores search-friendly displayable rows. PostgreSQL-first (GIN indexes on JSONB or dedicated columns as appropriate). Not a universal canonical key normalization engine — a pragmatic, product-oriented search projection. - - Trust boundary: results reflect settings explicitly present in supported indexed policies. UI must clearly communicate this scope. No-result does NOT imply the setting is absent from effective tenant configuration — only that it was not found in currently supported indexed policy families. This distinction must be visible in the UX (scope indicator, help text, empty-state copy). - - "Defined in" only: V1 answers "where is this setting explicitly defined?" — it does NOT answer "is this setting effectively applied to devices/users?" The difference between explicit definition and effective state must be preserved and communicated. -- **Explicit non-goals (V1)**: - - No universal cross-provider canonical setting ontology. Avoid a large fragile semantic mapping project. Setting identity stays policy-family-local in V1. - - No effective-state guarantees. V1 does not resolve assignment targeting, conflict resolution, or platform-side precedence. - - No portfolio / cross-tenant / workspace-wide scope. - - No dependency on external search infrastructure (Elasticsearch, Meilisearch, etc.) if PostgreSQL-first is sufficient. - - No naive raw JSON full-text search as the product surface. The projection model must provide structured, rankable, explainable results — not grep output. - - No requirement to support every Microsoft policy family from day one. -- **Architectural direction**: - - Search projection layer: when policies are synced/versioned, extract setting facts into a dedicated search-friendly projection (e.g. `policy_setting_facts` table or JSONB-indexed structure). Each row captures: `tenant_id`, `policy_id`, `policy_version_id` (nullable), `policy_type`/`family`, `setting_key`/`path`, `setting_label` (display name where available), `configured_value`, `raw_payload_path`. Extraction logic is per-family, not a universal parser. - - PostgreSQL-first: use GIN indexes on JSONB or trigram indexes on text columns for efficient search. Evaluate `pg_trgm` for fuzzy matching. - - Extraction is append/rebuild on sync — not real-time transformation. Can be a post-sync projection step integrated into the existing inventory sync pipeline. - - Provider boundary stays explicit: the projection is populated by each policy family's extraction logic. No abstraction that pretends all policy families share the same schema. - - RBAC: tenant-scoped, gated by a capability (e.g. `policy.settings.search`). Results respect existing policy-level visibility rules. - - Audit: queries are loggable but do not require per-query audit entries in V1. The feature is read-only. -- **UX direction**: - - Primary surface: dedicated page under the tenant context (e.g. tenant → Policy Explorer or tenant → Setting Search). Full working surface with query input, optional filters (policy type, policy family, value match mode), and a results table. - - Result rows: policy name (linked), policy type badge, setting path/key, configured value, version indicator. Expandable detail or click-through to policy inspector. - - Empty state: clearly explains scope limitations ("No matching settings found in supported indexed policies. This does not mean the setting is absent from your tenant's effective configuration."). - - Scope indicator: persistent badge or label showing the search scope (e.g. "Searching N supported policy families in [tenant name]"). - - Future quick-access entry point (e.g. command palette, header search shortcut) is a natural extension but not V1 scope. -- **Future expansion space** (not V1): - - Semantic aliases / display-name normalization across families - - Duplicate / conflict detection hints ("this setting is defined in 3 policies") - - Assignment-aware enrichment ("this policy targets group X") - - Setting history / change timeline ("this value changed from false to true in version 4") - - Baseline / drift linkage ("this setting deviates from the CIS baseline") - - Workspace-wide / portfolio search across tenants - - Quick-access command palette entry point -- **Risks / notes**: - - Extraction logic per policy family is the main incremental effort. Each new family supported requires a family-specific extractor. Start with the highest-value families and expand. - - Settings catalog policies have structured setting definitions that are relatively easy to extract. OMA-URI / admin template policies are less structured. The V1 family selection should favor extractability. - - The "no-result ≠ not configured" trust boundary is critical for enterprise credibility. Overcommitting search completeness erodes trust. - - Projection freshness depends on sync frequency. Stale projections must be visually flagged if the tenant hasn't been synced recently. -- **Dependencies**: Inventory sync stable, policy versioning (snapshots), tenant context model, RBAC capability system (066+) -- **Priority**: high - -### Help Center / Documentation Surface -- **Type**: feature -- **Source**: product planning, operator support friction analysis -- **Problem**: TenantPilot lacks a central, searchable, in-product knowledge surface for operators. Product documentation, glossary content, workflow walkthroughs, and conceptual explanations are fragmented across specs, internal docs, and implicit operator expectations. Operators who need to understand a concept, look up a term, or read a walkthrough must leave the admin experience entirely. -- **Why it matters**: A canonical in-product documentation surface reduces support friction, enables self-service resolution, and makes advanced governance features more understandable and adoptable. This is the product's central knowledge layer — distinct from contextual inline help (separate candidate), page-level instructional panels (separate candidate), and onboarding next-step guidance (separate candidate). It is also distinct from audit/evidence/reporting artifacts. This is a product maturity and support-efficiency capability, not a content management system. -- **Proposed direction**: - - Markdown-based documentation stored in-repo, rendered inside the Filament admin panel as a dedicated help center surface - - Global documentation search across all help articles - - Structured article hierarchy: conceptual docs, walkthroughs, glossary, role/capability explanations, domain-specific governance guidance - - Clear separation between product help/knowledge and audit/report/evidence exports - - Workspace/tenant context awareness only where helpful for navigation, not to turn docs into tenant data - - Native first-party rendering approach — no external CMS dependency, no third-party documentation platform required -- **Explicit non-goals**: Not a customer support ticket system. Not an audit pack feature. Not a generic CMS. Not a replacement for external knowledge bases if those exist separately. Not the delivery mechanism for contextual inline help or page-level guidance panels — those are separate capabilities that may link into this surface but are independently spec-able. Not a video-first help strategy. Not a forced guided tour infrastructure. v1 does not include a `/system` help-management UI. Content authoring, editing, and lifecycle remain code/repo-driven. Any future `/system` surface is limited to governance/observability concerns, not CMS behavior. -- **Dependencies**: Filament panel infrastructure, existing navigation/information architecture -- **Related candidates**: Contextual Help and Inline Guidance Framework, Page-Level Guidance Patterns, Onboarding Guidance and Next-Step Surfaces, Documentation Generation Pipeline and Editorial Workflow -- **Priority**: medium - -### Contextual Help and Inline Guidance Framework -- **Type**: feature -- **Source**: product planning, operator support friction analysis, governance UX complexity -- **Problem**: As TenantPilot's governance surface area grows — findings workflows, RBAC capabilities, provider connection lifecycle, restore risk assessment, compliance baselines — operators encounter complex concepts and multi-step workflows where the purpose, consequences, or next steps are not self-evident from the UI alone. There is no standard mechanism for surfacing short, targeted, inline explanations at the point of need. Operators must either already know the domain or leave the product to find help. -- **Why it matters**: Contextual help reduces cognitive load on high-complexity governance surfaces, decreases support escalations, and makes advanced features accessible to operators who are not domain experts. This is distinct from the central documentation surface (which is a browsable knowledge base) and from page-level instructional panels (which provide section-level orientation). Contextual help is the layer that answers "what does this mean?" and "what happens if I do this?" at the point of interaction. -- **Proposed direction**: - - Standardized inline help entry points (e.g. `?` icon actions, help affordances on action buttons, info popovers on complex form fields) integrated with Filament's action and component patterns - - Short-form help content rendered in slideover or modal surfaces — not tooltips, not a tooltip explosion - - Help content is text-first: concise explanation of what the feature does, why it matters, and what happens next - - Content sourced from markdown or structured help definitions stored in-repo, maintainable alongside code - - Optional deep-link from contextual help into the central Help Center for extended reading - - Clean integration with Filament v5 actions, render hooks, and CSS hook classes — no internal view publishing - - First-party, native approach — no third-party guided-tour or walkthrough library dependency -- **Explicit non-goals**: Not a tooltip explosion across every field. Not a video-first help strategy. Not a forced guided tour or product walkthrough system. Not a replacement for the central Help Center documentation surface. Not intercom-style chat or conversational help. Not an onboarding checklist (separate candidate). Not a generic CMS feature. v1 does not include a `/system` help-management UI. Content authoring, editing, and lifecycle remain code/repo-driven. Any future `/system` surface is limited to governance/observability concerns, not CMS behavior. -- **Dependencies**: Filament panel infrastructure, action system (Filament v5 actions), Help Center / Documentation Surface (for deep-link target, but functionally independent) -- **Related candidates**: Help Center / Documentation Surface, Page-Level Guidance Patterns, Admin Visual Language Canon -- **Priority**: medium - -### Page-Level Guidance Patterns -- **Type**: feature -- **Source**: product planning, governance UX complexity analysis -- **Problem**: Several TenantPilot admin pages serve governance-heavy, interpretation-heavy, or consequence-heavy functions — findings review, RBAC capability management, provider connection lifecycle, restore dry-run results, compliance baseline comparison, drift analysis. These pages present data and actions that require domain context to interpret correctly, but operators arrive without orientation. There is no standard pattern for providing page-level introductory guidance, "learn more" affordances, or instructional panels that help operators understand what a page shows, why it matters, and how to use it effectively. -- **Why it matters**: Page-level guidance reduces operator confusion on the product's most complex and highest-stakes surfaces. It bridges the gap between contextual inline help (which explains individual concepts) and the central documentation surface (which is a browsable reference). It provides section-level orientation without requiring operators to leave the page or consult external resources. For governance and compliance surfaces, clear page-level framing increases operator confidence and reduces misinterpretation of presented data. -- **Proposed direction**: - - Standardized page-level instructional panel pattern: optional intro/help section at the top of qualifying pages, rendered via Filament render hooks or section components - - "Learn more" affordances linking from the page context into the central Help Center for extended documentation - - Pattern supports dismissibility and operator preference (e.g. collapsible, "don't show again" per-page or per-user) - - Content stored as markdown or structured definitions in-repo, not hardcoded in Blade templates - - Visual pattern consistent with the Admin Visual Language Canon direction — does not introduce a new visual system - - Qualifying page criteria: governance-heavy pages, consequence-heavy action pages, interpretation-heavy data review pages. Not every list page or simple CRUD form. -- **Explicit non-goals**: Not a banner/notification system. Not contextual inline help for individual fields or actions (separate candidate). Not onboarding step-by-step flow (separate candidate). Not a forced walkthrough or modal tutorial. Not applied universally to every admin page — only where governance complexity warrants it. v1 does not include a `/system` help-management UI. Content definitions remain code/repo-driven. Any future `/system` surface is limited to governance/observability concerns, not CMS behavior. -- **Dependencies**: Filament panel infrastructure, render hooks, Admin Visual Language Canon (for visual consistency), Help Center / Documentation Surface (for "learn more" link targets) -- **Related candidates**: Help Center / Documentation Surface, Contextual Help and Inline Guidance Framework, Admin Visual Language Canon -- **Priority**: low - -### Onboarding Guidance and Next-Step Surfaces -- **Type**: feature -- **Source**: product planning, operator time-to-value analysis -- **Problem**: New operators and new tenants arrive in TenantPilot without structured product-level guidance about what to do next. The managed-tenant onboarding wizard handles the technical setup flow (consent, connection, initial sync), but after onboarding completes — or for operators exploring governance features for the first time — there is no product-level guidance surface that helps operators understand what is available, what to configure next, or how to reach productive use of governance workflows. Empty states, action labels, and page descriptions do not consistently communicate next steps or paths to value. -- **Why it matters**: Reduces time-to-value for new operators without introducing a forced product tour. Improves discoverability of governance features (findings, baselines, RBAC, drift, reporting) by surfacing actionable next steps rather than relying on operators to discover capabilities through sidebar exploration. Improves the quality of empty states and first-use experiences across the product. This is a product adoption and operator efficiency capability, not a marketing or onboarding conversion feature. -- **Proposed direction**: - - Smart empty states with actionable next-step guidance on high-value surfaces (e.g. findings list with no findings yet, backup history with no backups, dashboard with no synced tenants) - - Optional post-onboarding next-step surface or checklist-style orientation (not a forced wizard — an opt-in guidance panel or dedicated page) - - Descriptive action labels and page descriptions that reduce confusion about what features do, especially on first encounter - - Progress-aware guidance: surfaces that adapt based on what the operator has already configured (e.g. "sync complete — next: review your compliance baseline" vs. "no sync yet — start here") - - Lightweight, maintainable: guidance content stored as structured definitions in-repo, not hardcoded across scattered Blade templates - - Native Filament integration via empty states, info sections, and render hooks -- **Explicit non-goals**: Not a forced guided tour or product walkthrough. Not a video-first onboarding strategy. Not a step-by-step wizard for every product feature. Not a tooltip explosion. Not a replacement for the managed-tenant onboarding wizard (which handles technical setup). Not a marketing/conversion funnel. Not a third-party onboarding library integration (e.g. no Appcues, no Intercom tours). Not the same as contextual inline help (separate candidate) or page-level instructional panels (separate candidate). v1 does not include a `/system` help-management UI. Guidance definitions remain code/repo-driven. Any future `/system` surface is limited to governance/observability concerns, not CMS behavior. -- **Dependencies**: Filament panel infrastructure, empty-state patterns, Admin Visual Language Canon (for visual consistency), managed-tenant onboarding wizard (Spec 001 and follow-ups — this candidate covers life after technical onboarding) -- **Related candidates**: Help Center / Documentation Surface, Contextual Help and Inline Guidance Framework, Page-Level Guidance Patterns -- **Priority**: medium - -### Documentation Generation Pipeline and Editorial Workflow -- **Type**: feature -- **Source**: product planning, documentation sustainability analysis -- **Problem**: Even with a markdown-based knowledge layer, documentation quality and coverage will degrade without a lightweight authoring pipeline. The product needs a structured way to generate document skeletons/templates, support repeatable documentation workflows, and optionally use AI-assisted drafting without treating generated text as authoritative by default. -- **Why it matters**: Without a documentation pipeline, docs become inconsistent, coverage drifts as features grow, teams fall back to ad hoc writing, the help layer becomes expensive to maintain, and future AI-assisted documentation lacks guardrails. -- **Proposed direction**: - - Document skeleton or template generation (e.g. command/tooling such as `docs:generate`) - - Structured frontmatter / metadata expectations where useful - - Editorial states such as draft / needs review / published - - Explicit "AI draft needs review" semantics to distinguish generated drafts from canonical reviewed documentation - - Repo-native markdown workflow as the source of truth -- **Explicit non-goals**: Not a replacement for careful documentation authorship. Not a public marketing content engine. Not a promise of autonomous documentation generation. This is an internal/product documentation pipeline and editorial guardrail layer. -- **Dependencies**: Help Center / Documentation Surface (this candidate builds on the rendering/delivery surface) -- **Priority**: low - -### Drift Notifications Settings Surface -- **Type**: feature -- **Source**: product planning, governance alerting direction -- **Problem**: TenantPilot has governance/alerting direction, but operators still lack a clear product surface to configure drift-related notification behavior in a predictable way. Without a dedicated settings experience, alert routing feels infrastructural rather than operator-manageable. -- **Why it matters**: Operators need tenant/workspace-level control over how governance signals reach them — email, Microsoft Teams, severity-aware routing, notification fatigue reduction, and confidence that important drift events will not be silently missed. Especially relevant for MSP-style operations and ongoing tenant reviews. -- **Proposed direction**: - - Dedicated settings-level drift notification management surface - - Delivery targets such as email and Teams - - Routing preferences by severity / event type where appropriately bounded - - Sensible defaults with cooldown / dedup / quiet-hours framing if those concepts already exist in the broader alerting direction - - Clear alignment with broader Alerts v1 direction, focused on the operator settings UX and configuration model -- **Explicit non-goals**: Not a reinvention of the whole alerts engine. Not a generic notification center for every product event. This is the operator-facing configuration surface for drift/governance notifications. -- **Dependencies**: Alerting v1 direction, drift detection foundation (Spec 044), tenant/workspace context model -- **Priority**: medium - -### Drift Change Governance — Approval Workflows, Freeze Windows, Tamper Detection -- **Type**: feature -- **Source**: roadmap-to-spec coverage audit 2026-03-18, 0800-future-features brainstorming (pillar #2 — Drift & Change Governance / "Zahlhebel #1") -- **Problem**: TenantPilot's drift/baseline engine (Specs 116–119) detects configuration changes and surfaces them as findings. But detection alone does not govern _when_ and _how_ changes are allowed. There is no approval workflow for high-risk configuration changes, no protected time windows during which changes are blocked or escalated, and no tamper detection that distinguishes authorized changes from suspicious or unauthorized modifications. The drift engine answers "what changed?"; this capability answers "was the change allowed, and should it have happened now?" -- **Why it matters**: Change governance is the #2 revenue lever identified in product brainstorming. Enterprise customers and MSPs managing production tenants need controlled change processes — especially for high-risk policy families (endpoint security, compliance, conditional access). Without approval workflows and freeze windows, every detected drift is reactive: something already happened, the only question is whether to accept or revert it. Adding a governance layer turns drift from a detection feature into a change-control platform — the core value proposition for regulated environments, MSP SLA enforcement, and enterprise change management. -- **Proposed direction**: - - **Change approval workflows**: define which policy families, change types, or severity levels require explicit approval before being accepted in the governance record. Approval can be gated by capability (e.g. `drift.change.approve`), with structured approval/rejection, justification, actor, and timestamp. Approval workflows are governance-layer constructs — they do not block changes in Intune (TenantPilot does not control the mutation path in the source tenant), but they govern how TenantPilot treats detected changes: approved (accepted into baseline), rejected (escalated as governance violation), pending (awaiting review). - - **Protected / frozen windows**: workspace- or tenant-level configuration of time periods during which detected changes are automatically escalated or flagged (e.g. "no high-risk changes accepted during weekend maintenance windows" or "freeze all baseline-covered policy families during audit preparation"). Freeze windows do not prevent changes in Intune — they elevate the governance response when changes are detected during protected periods. - - **Tamper / suspicious change detection**: heuristic or rule-based identification of changes that look unauthorized or anomalous — changes outside business hours, changes by unexpected actors, bulk changes across multiple policy families, changes to policies that haven't been modified in extended periods. These produce elevated findings or alerts, not automated blocks. - - **Integration with existing drift/findings pipeline**: change governance operates on top of the drift detection pipeline. It consumes drift findings and applies governance rules (approval requirements, freeze window evaluation, tamper heuristics) to classify and route them. It does not replace the detection engine. -- **Explicit non-goals**: Not a rewrite or replacement of the drift/baseline engine (Specs 116–119 handle detection; this handles governance response). Not a DevOps CI/CD pipeline or deployment system — TenantPilot does not deploy to Intune tenants. Not a mutation-path control mechanism — TenantPilot cannot prevent changes in the source tenant; it can only govern how detected changes are classified and acted upon. Not a generic security hardening bucket. Not a real-time blocking proxy between operators and Intune. -- **Boundary with drift/baseline engine (Specs 116–119)**: Drift engine = detect changes, capture content, produce findings. Change governance = classify detected changes against approval/freeze/tamper rules, route them through governance workflows. The engine feeds this layer; this layer does not modify detection behavior. -- **Boundary with Drift Notifications Settings Surface**: Drift notifications = operator-facing configuration of alert delivery for drift events. Change governance = approval workflows, freeze windows, tamper classification. Notifications deliver signals; governance adds workflow, classification, and control semantics. -- **Boundary with Spec 154 (Finding Risk Acceptance Lifecycle)**: Risk acceptance = post-hoc acknowledgment that a known finding is intentionally accepted. Change governance = pre/peri-change classification of whether a detected change was authorized and occurred under acceptable conditions. Different lifecycle positions, complementary capabilities. -- **Dependencies**: Drift/baseline engine (Specs 116–119) fully shipped, findings workflow (Spec 111), alerting foundations (Specs 099/100), audit log foundation (Spec 134), RBAC/capability system (066+) -- **Priority**: medium (high strategic and revenue value, but depends on drift engine and findings workflow maturity) - -### User Invitations and Directory-based User Selection -- **Type**: feature -- **Source**: product planning, access-management UX analysis -- **Problem**: Workspace and tenant membership flows currently lack a polished enterprise-grade invitation and directory-assisted user selection experience. Operators should not need brittle manual steps to add the right person to the right workspace/tenant context. -- **Why it matters**: Improves onboarding speed, operator/admin efficiency, correctness of membership assignment, enterprise credibility of the access-management UX, and future scalability of workspace/tenant administration. -- **Proposed direction**: - - Directory-based user lookup / selection where supported - - Invitation flows initiated directly from membership management surfaces - - Invitation link / invitation lifecycle support - - Clear distinction between selecting an existing directory identity vs inviting a not-yet-active participant - - Alignment with existing RBAC / membership / workspace-first context model -- **Explicit non-goals**: Not a full identity-provider redesign. Not a replacement for the Entra auth architecture. Not a generic address-book feature. This is a bounded access-administration workflow improvement. -- **Dependencies**: RBAC/capability system (066+), workspace membership model, Entra identity integration -- **Priority**: medium - - - -> **Action Surface follow-up direction** — The action-surface contract foundation (Specs 082, 090) and the follow-up taxonomy/viewer specs (143–146) are all fully implemented. The remaining gaps are not architectural redesign — they are incomplete adoption, missing decision criteria, and scope boundaries that haven't expanded to cover all product surfaces. The correct shape is: one foundation amendment to codify the missing rules and extend contract scope (v1.1), two compliance rollout specs to enroll currently-exempted surface families, and one targeted correction to fix the clearest remaining anti-pattern on a high-signal surface. This avoids reinventing the architecture, avoids umbrella "consistency" specs, and produces bounded, independently shippable work. TenantResource lifecycle-conditional actions and PolicyResource More-menu ordering are addressed by the updated foundation rules, not by standalone specs. Widgets, choosers, and pickers remain deferred/exempt. - -### Action Surface Contract v1.1 — Decision Criteria, Ordering Rules, and System Scope Extension -- **Type**: foundation/spec amendment -- **Source**: row interaction / action surface architecture analysis 2026-03-16 -- **Problem**: The action-surface contract (Spec 082) establishes profiles, slots, affordances, validator tests, and guard tests — but does not codify three things: (1) formal decision criteria for when a surface should use ClickableRow vs ViewAction vs PrimaryLinkColumn as its inspect affordance; (2) ordering rules for actions inside the More menu (destructive-last, lifecycle position, stable grouping); (3) system-panel table surfaces are explicitly excluded from contract scope, meaning ~6 operational surfaces have no declaration and no CI coverage. The architecture is correct; it just cannot prevent inconsistent choices on new surfaces or catch drift on existing ones. -- **Why this is its own spec**: This is a foundation amendment — it changes the rules that all other surfaces must follow. Rollout specs (system panel enrollment, relation manager enrollment) depend on this spec's updated rules existing first. Merging rollout work into a foundation amendment blurs the boundary between "what the rules are" and "who must comply." -- **In scope**: - - Codify inspect-affordance decision tree (ClickableRow default, ViewAction exception criteria, PrimaryLinkColumn criteria) in `docs/ui/action-surface-contract.md` - - Define the "lone ViewAction" anti-pattern formally and add it to validator detection - - Codify More-menu action ordering rules (lifecycle actions, severity ordering, destructive-last) - - Extend contract scope so system-panel table surfaces are enrollable (not exempt by default) - - Add guidance that cross-panel surface taxonomy should converge where semantically equivalent - - Update `ActionSurfaceValidator` to enforce new criteria - - Update guard/contract tests to cover new rules -- **Non-goals**: - - Retrofitting all existing system-panel pages (separate rollout spec) - - Retrofitting all relation managers (separate rollout spec) - - One-off resource-level fixes (those are tasks within rollout or correction specs) - - TenantResource or PolicyResource redesign (addressed by applying the updated rules, not by dedicated specs) - - Chooser/picker/widget contracts (remain deferred/exempt) -- **Depends on**: Spec 082, Spec 090 (both fully complete — this extends their foundation) -- **Suggested order**: First. All other candidates in this cluster depend on the updated rules. -- **Risk**: Low. This adds rules and extends scope — it does not change existing compliant declarations. -- **Why this boundary is right**: Foundation rules must be codified before rollout enforcement. Mixing rule definition with compliance rollout makes it impossible to review the rules independently and creates circular dependencies. -- **Priority**: high - -### System Panel Action Surface Contract Enrollment -- **Type**: compliance rollout -- **Source**: row interaction / action surface architecture analysis 2026-03-16 -- **Problem**: System-panel table surfaces (Ops/Runs, Ops/Failures, Ops/Stuck, Directory/Tenants, Directory/Workspaces, Security/AccessLogs) use `recordUrl()` consistently but have no `ActionSurfaceDeclaration`, no CI coverage, and are exempt from the contract by default. They are the largest family of undeclared table surfaces in the product. -- **Why this is its own spec**: System-panel surfaces belong to a different panel with different operator audiences and potentially different profile requirements. Enrolling them is a distinct compliance effort from tenant-panel relation managers or targeted resource corrections. The scope is bounded and independently shippable. -- **In scope**: - - Declare `ActionSurfaceDeclaration` for each system-panel table surface (~6 pages) - - Map to existing profiles where semantically correct (e.g., `ListOnlyReadOnly` for access logs, `RunLog` for ops run tables) - - Introduce new system-specific profiles only if existing profiles truly do not fit - - Remove enrolled system-panel pages from `ActionSurfaceExemptions` baseline - - Add guard test coverage for enrolled system surfaces -- **Non-goals**: - - Tenant-panel resource declarations (already covered by Spec 090) - - Relation manager enrollment (separate candidate) - - Non-table system pages (dashboards, diagnostics, choosers) - - System-panel RBAC redesign - - Cross-workspace query authorization (tracked as "System Console Scope Hardening" candidate) -- **Depends on**: Action Surface Contract v1.1 (must extend scope to system panel first) -- **Suggested order**: Second, in parallel with "Run Log Inspect Affordance Alignment" after v1.1 is complete. -- **Risk**: Low. These surfaces already behave consistently; this work adds formal declarations and CI coverage. -- **Why this boundary is right**: System-panel enrollment is self-contained — it doesn't touch tenant-panel resources or relation managers. Completing it independently gives CI coverage over a currently-invisible surface family. -- **Priority**: medium - -### Relation Manager Action Surface Contract Enrollment -- **Type**: compliance rollout -- **Source**: row interaction / action surface architecture analysis 2026-03-16 -- **Problem**: Three relation managers (`BackupItemsRelationManager`, `TenantMembershipsRelationManager`, `WorkspaceMembershipsRelationManager`) are in the `ActionSurfaceExemptions` baseline with no declaration. They were exempted during initial rollout (Spec 090) because relation-manager-specific profile semantics were not yet settled. Three other relation managers already have declarations. The exemption should be reduced, not permanent. -- **Why this is its own spec**: Relation managers have different interaction expectations than standalone list resources (context is always nested under a parent record, pagination/empty-state semantics differ, attach/detach may replace create/delete in some cases). Enrollment requires relation-manager-specific review of profile fit, not just copying resource-level declarations. -- **In scope**: - - Declare `ActionSurfaceDeclaration` for each currently-exempted relation manager (3 components) - - Validate profile fit (`RelationManager` profile vs a more specific variant) - - Reduce `ActionSurfaceExemptions` baseline by removing enrolled relation managers - - Add guard test coverage -- **Non-goals**: - - Redesigning backup item management UX - - Redesigning membership management UX - - Parent resource changes (TenantResource, WorkspaceResource) - - Full restore/backup domain redesign - - Introducing new relation managers -- **Depends on**: Action Surface Contract v1.1 (for any updated profile guidance or relation-manager-specific ordering rules) -- **Suggested order**: Third, after both v1.1 and System Panel Enrollment are complete. Lowest urgency because these surfaces are low-traffic and already functionally correct. -- **Risk**: Low. These relation managers already work correctly. This adds formal compliance, not behavioral change. -- **Why this boundary is right**: Relation manager enrollment is a distinct surface family with its own profile semantics. Mixing it with system-panel enrollment or targeted resource corrections would create an unfocused rollout spec. -- **Priority**: low - -### Run Log Inspect Affordance Alignment -- **Type**: targeted surface correction -- **Source**: row interaction / action surface architecture analysis 2026-03-16 -- **Problem**: `OperationRunResource` declares the `RunLog` profile with `ViewAction` as its inspect affordance. In practice, it renders a lone `ViewAction` in the actions column — the "lone ViewAction" anti-pattern identified in `docs/ui/action-surface-contract.md`. The row-click-first direction means this surface should use `ClickableRow` drill-down to the canonical tenantless viewer (`OperationRunLinks::tenantlessView()`), not a standalone View button. This surface is also inherited by the `Monitoring/Operations` page (which delegates to `OperationRunResource::table()`), so the fix propagates to both surfaces. -- **Why this is its own spec**: This is the single highest-signal concrete violation of the action-surface contract direction. It is bounded to one resource declaration + one inherited page. It does not require rewriting the canonical viewer, redesigning the operations domain, or touching other monitoring surfaces. Keeping it separate from foundation amendments ensures it can ship quickly after v1.1 codifies the anti-pattern rule. -- **In scope**: - - Change `OperationRunResource` inspect affordance from `ViewAction` to `ClickableRow` - - Verify `recordUrl()` points to the canonical tenantless viewer - - Remove the lone `ViewAction` from the actions column - - Confirm the change propagates correctly to `Monitoring/Operations` (which delegates to `OperationRunResource::table()`) - - Update/add guard test assertion for the corrected declaration -- **Non-goals**: - - Rewriting the canonical operation viewer (Spec 144 already complete) - - Broad operations UX redesign - - All monitoring pages (Alerts, Stuck, Failures are separate surfaces with distinct interaction models) - - RestoreRunResource alignment (currently exempted — separate concern) - - Action hierarchy / More-menu changes on this surface (belong to a general rollout, not this correction) -- **Depends on**: Action Surface Contract v1.1 (for codified anti-pattern rule and ClickableRow-default guidance) -- **Suggested order**: Second, in parallel with "System Panel Enrollment" after v1.1 is complete. Quickest win and highest signal correction. -- **Risk**: Low. Single resource, no behavioral regression, no data model change. -- **Why this boundary is right**: One resource, one anti-pattern, one fix. Expanding scope to "all run-log surfaces" or "all operation views" would turn a quick correction into a rollout spec and delay the most visible improvement. -- **Priority**: medium - -### Selected-Record Monitoring Host Alignment -- **Type**: workflow compression -- **Source**: enterprise UX review 2026-04-19 — Finding Exceptions Queue and Audit Log selected-record monitoring surfaces -- **Problem**: Specs 193 and 198 correctly established the semantics for `queue_workbench` and `selected_record_monitoring`, but they intentionally stopped at action hierarchy and page-state transport. The remaining gap is the active review host shape. `FindingExceptionsQueue` and `AuditLog` both preserve selection via query parameter and `inspect_action`, yet the current host experience still sits awkwardly between a list page, an inline expanded detail block, and a modal-style inspect affordance. That is technically valid, but it does not read as an enterprise-grade workbench. Operators get shareable URLs and refresh-safe state, but not a clearly expressed review mode with one deliberate place for context, next step, and close/return behavior. -- **Why it matters**: Enterprise operators working through queues or history need one of two unmistakable behaviors: either remain in a stable workbench where list context and active record review coexist intentionally, or leave the list for a canonical detail route with explicit return continuity. The current halfway pattern preserves state better than a slide-over, but it still weakens scanability, makes the active review lane feel bolted on, and leaves too much room for future local variations across monitoring surfaces. -- **Proposed direction**: - - Define two allowed enterprise host models for `selected_record_monitoring` surfaces: - - **Split-pane workbench**: the list, filters, and queue context remain continuously visible while the selected record occupies a dedicated persistent review pane - - **Canonical detail route**: the list remains list-first, and inspect opens a standalone detail page with explicit back/return continuity and optional preserved filter state - - Allow **quick-peek overlays** only as optional preview affordances, never as the sole canonical inspect or deep-link contract - - Add host-selection criteria so surfaces choose deliberately between split-pane and canonical detail route instead of drifting into full-page inline "focused lane above the table" patterns - - Pilot the rule on `FindingExceptionsQueue` and `AuditLog`, keeping current query-param addressability while upgrading the actual review host ergonomics - - Codify close/back/new-tab/reload semantics and invalid-selection fallback per host model so URL durability and review ergonomics are aligned rather than accidental -- **Smallest enterprise-capable version**: Limit the first slice to the two already-real `selected_record_monitoring` surfaces in Monitoring: `FindingExceptionsQueue` and `AuditLog`. The spec should choose and implement one clear host model per surface, document the decision rule, and stop there. No generic pane framework, no broad monitoring IA rewrite, and no rollout to unrelated list/detail pages. -- **Explicit non-goals**: Not a full Monitoring redesign, not a new modal framework, not a replacement for Spec 198 page-state semantics, not a generic shared-detail engine, not a broad action-surface retrofit outside `selected_record_monitoring`, and not a rewrite of finding or audit domain truth. -- **Permanent complexity imported**: One small host-pattern contract for `selected_record_monitoring`, explicit decision criteria for split-pane vs canonical detail route, focused regression coverage for two surfaces, and a small amount of new vocabulary around host model choice. No new persisted truth, no new provider/runtime architecture, and no new generalized UI platform are justified. -- **Why now**: The product already has at least two real consumers of the same selected-record monitoring pattern, and one of them is visible enough that the UX gap is now obvious. Leaving the gap open means future monitoring surfaces will keep re-solving the same question locally, and the currently correct page-state work will continue to feel less enterprise than it should. -- **Why not local**: A one-off polish pass on `FindingExceptionsQueue` would not answer what `AuditLog` should do, nor would it define when a selected-record monitoring surface should stay list-first versus move to canonical detail. The missing artifact is not just layout polish; it is the host decision rule for a small but real surface family. -- **Approval class**: Workflow Compression -- **Red flags triggered**: One red flag: this introduces a cross-surface host-model rule. The scope must stay bounded to the already-real `selected_record_monitoring` family and must not grow into a general monitoring-shell framework. -- **Score**: Nutzen: 2 | Dringlichkeit: 1 | Scope: 2 | Komplexität: 1 | Produktnähe: 2 | Wiederverwendung: 2 | **Gesamt: 10/12** -- **Decision**: approve -- **Acceptance points**: - - Each `selected_record_monitoring` surface declares one deliberate host model instead of expressing active review as an ad hoc inline expansion - - Deep links, refresh, and invalid-selection fallback remain stable after the host upgrade - - Operators can keep queue/history context while reviewing a record, or return to it predictably when the chosen host model uses a dedicated detail route - - Close, back, related drilldowns, and "open in full detail" semantics become consistent enough that selected-record monitoring feels like a product pattern instead of a local layout choice -- **Dependencies**: Spec 193 (`monitoring-action-hierarchy`), Spec 198 (`monitoring-page-state`), and the existing Monitoring page-state guards already in the repo -- **Related specs / candidates**: Spec 197 (`shared-detail-contract`), Action Surface Contract v1.1, Admin Visual Language Canon, Record Page Header Discipline & Contextual Navigation (for return semantics only; not a direct dependency) -- **Priority**: medium - -### Admin Visual Language Canon — First-Party UI Convention Codification and Drift Prevention -- **Type**: foundation -- **Source**: admin UI consistency analysis 2026-03-17 -- **Problem**: TenantPilot has accumulated a strong set of first-party visual conventions across Filament resources, widgets, detail pages, badges, status indicators, action hierarchies, and operational surfaces. These conventions are emerging organically and are already broadly consistent — but they remain implicit. No canonical reference defines when to use native Filament patterns vs custom enterprise-detail compositions, which badge/status semantics apply to which domain states, how timestamps should render (`since()` vs absolute datetime vs contextual format), what the card/section/surface hierarchy rules are, which widget composition strategies are canonical, or where cross-panel visual divergence is intentional vs accidental. As the product's surface area grows — new policy families, new governance domains, new operational pages, new evidence/reporting surfaces — the risk is not current visual chaos but future drift caused by missing written selection criteria and decision rules. -- **Why it matters**: Without a codified visual language reference, each new surface is a local design decision made without canonical guidance. This produces slow, cumulative inconsistency that becomes expensive to correct retroactively and degrades enterprise UX credibility. The problem is amplified by multi-agent development: multiple contributors (human and AI) cannot converge on implicit conventions they haven't seen documented. The value is not aesthetic — it is architectural: a canonical reference prevents divergent local choices, reduces review friction, accelerates new surface development, and establishes a stable foundation for the product's long-term visual identity without introducing third-party theme dependencies. -- **Proposed direction**: - - Codify the existing first-party admin visual conventions as a canonical reference document (e.g. `docs/ui/admin-visual-language.md` or similar), covering: - - Badge/status semantics: color mapping rules, icon usage criteria, domain-specific badge extraction patterns, when to use Filament native badge vs custom status composition - - Timestamp rendering: decision rules for `since()` (relative) vs absolute datetime vs contextual format, with domain-specific overrides where justified - - Action hierarchy: primary action vs header actions vs row actions vs bulk actions presentation conventions (complementing the Action Surface Contract's interaction-level rules with visual-level guidance) - - Widget composition: selection criteria for stat cards, chart widgets, list widgets, and custom compositions; density and grouping rules - - Surface/card/section hierarchy: when to use native Filament sections vs custom detail cards vs grouped infoblocks; nesting and visual weight rules - - Enterprise-detail page composition: canonical structure for entity detail/view pages (header, metadata, status, content sections, related data) - - Cross-panel visual divergence: explicit rules for where admin-panel and system-panel styling may diverge and where they must converge - - Typography and spacing: canonical use of Filament's built-in text scales and spacing tokens; rules against ad hoc inline styles - - Establish guardrails against ad hoc local visual overrides (documented anti-patterns, PR review checklist items, or lightweight CI checks where practical) - - Explicitly state that native Filament v5 configuration and CSS hook classes remain the primary styling foundation; a thin first-party theme layer is only justified if native configuration proves insufficient for a documented, bounded set of requirements - - Explicitly reject third-party theme packages (e.g. Filament theme marketplace packages) as an architectural baseline unless separately justified by a dedicated evaluation spec with clear acceptance criteria - - Where existing conventions have already diverged, define the canonical choice and flag surfaces that need alignment (as future cleanup tasks, not as part of this spec's implementation scope) -- **In scope**: - - Inventory of existing visual conventions across tier-1 admin surfaces (resources, detail pages, dashboards, operational views) - - Canonical reference document with decision rules and examples - - Anti-pattern catalog (known visual drift patterns to avoid) - - Lightweight enforcement strategy (review checklist, optional CI, or validator approach) - - Explicit architectural position on theme dependencies -- **Out of scope**: - - Visual redesign of any existing surface (this is codification, not redesign) - - Aesthetic refresh or "make it look nicer" polish work - - Third-party theme evaluation, selection, or integration - - Broad Filament view publishing or deep customization layer - - Marketing/branding/identity work (this is internal admin UX, not external brand) - - Color palette redesign or new design-system creation - - Retrofitting all existing surfaces to strict compliance (alignment cleanup is tracked separately per surface) -- **Key architectural positions**: - - Native Filament v5 remains the primary visual foundation. The product's visual identity is expressed through intentional use of native Filament configuration, not through override layers. - - CSS hook classes are the canonical customization mechanism where native configuration is insufficient. No publishing of Filament internal views for styling purposes. - - The main gap is missing canonical reference and decision rules, not missing components or missing technology. - - The value proposition is preventing future UI drift as more surfaces are added, not correcting a current visual crisis. -- **Dependencies**: Action Surface Contract (Spec 082 / v1.1 candidate) for interaction-level conventions that this visual-level reference complements but does not duplicate. Operations Naming Harmonization candidate for operator-facing terminology alignment that is a distinct concern from visual conventions. -- **Related candidates**: Action Surface Contract v1.1, Operations Naming Harmonization, Help Center / Documentation Surface (the visual language reference could eventually link from contextual help) -- **Trigger / best time to do this**: Before the next wave of new governance domain surfaces (Entra Role Governance, Enterprise App Governance, SharePoint Sharing Governance, Evidence Domain) and before the Policy Setting Explorer UX, so those surfaces are built against documented canonical conventions rather than best-effort pattern matching. -- **Risks if ignored**: Slow visual drift across surfaces, increasing review friction for new surfaces, divergent local conventions that become expensive to reconcile, weakened enterprise UX credibility as surface count grows, and higher cost of eventual systematic alignment. -- **Priority**: medium - -### Surface Signal-to-Noise Optimization — Metadata Hierarchy and Information Density -- **Type**: hardening -- **Source**: UI/UX audit — consistency and noise reduction analysis -- **Problem**: Across TenantPilot's operator-facing list pages, detail views, and table surfaces, secondary metadata (timestamps, technical identifiers, raw provider keys, policy family labels, scope tag counts) often renders with the same visual prominence as primary content (policy name, status, outcome, tenant name). This creates a "wall of equal-weight information" where operators must mentally parse every row to find the signal that matters. Specific patterns: (1) date/time format inconsistency — some surfaces use `since()` (relative time), others use absolute datetime, with no clear rule for when each is appropriate; (2) technical identifiers (Graph API entity IDs, internal run IDs, provider-specific keys) surface in columns or info entries where they add no operator value; (3) badge/indicator density — some table rows have 3–4 badges (status, outcome, type, provider) where a simpler hierarchy would communicate the same information with less cognitive load; (4) column label and truncation inconsistency — overlong policy names, setting paths, or assignment descriptions push column layouts into horizontal scroll or wrap awkwardly without consistent truncation conventions; (5) metadata-to-action ratio — some detail/view pages dedicate more viewport space to metadata fields than to the actionable governance information the page exists to serve. -- **Why it matters**: Enterprise governance UX credibility depends on perceived information quality, not just data completeness. A surface that shows everything with equal visual weight communicates "we don't know what's important" to operators processing dozens of policies, hundreds of operation runs, or fleet-level tenant summaries daily. Noise reduction is not aesthetics — it is usability: faster scanning, fewer misreadings, lower cognitive fatigue, and higher confidence that the visible information is the information that matters. This is particularly important as TenantPilot adds more governance domains (Entra roles, enterprise apps, compliance baselines, evidence surfaces) — each new domain adds columns, badges, and metadata fields, and without a noise-reduction pass, information density will compound rather than degrade gracefully. -- **Proposed direction**: - - **Date/time format decision rules**: codify when to use relative time (`since()`, "2 hours ago"), when to use absolute datetime, and when to use contextual format (date only for old entries, time only for today). Apply consistently across all list and detail surfaces. Relative time is appropriate for recency-sensitive contexts (last sync, last backup, operation age); absolute datetime is appropriate for audit/evidence contexts (created_at, snapshot timestamp). Both should render with appropriate visual weight (secondary text style for timestamps in list columns, not primary text weight). - - **Technical identifier suppression**: audit list and detail surfaces for raw IDs, Graph API entity IDs, and internal identifiers that serve no operator purpose. Suppress or move to expandable/copyable detail panels. Operators should see human-readable labels, not internal keys, unless they explicitly request technical detail. - - **Badge density reduction**: establish a per-row badge budget or hierarchy rule — primary status badge, optional outcome badge, context labels where needed, but not every possible dimension as a visible badge. Secondary indicators can be tooltips, expandable detail, or column values instead of badges. - - **Truncation and column conventions**: define consistent truncation rules for long text fields in table columns (policy names, setting paths, assignment descriptions). Prefer tooltip-on-truncation over horizontal scroll. Define maximum comfortable column count for primary list surfaces. - - **Metadata visual weight hierarchy**: establish a 3-tier visual weight system for metadata: primary (name, status — full weight), secondary (type, updated_at, tenant — reduced weight, secondary text color), tertiary (ID, raw key, scope tag count — hidden by default, available on demand). Apply across list and detail surfaces. -- **In scope**: metadata visual weight hierarchy rules, date/time format conventions, technical identifier suppression audit, badge density guidelines, truncation conventions, affected surface inventory, implementation of conventions on highest-traffic surfaces -- **Out of scope**: visual redesign or aesthetic refresh (this is hierarchy and noise reduction, not a design overhaul), Admin Visual Language Canon codification (which writes down the full visual convention set — this candidate solves one specific problem within that space), empty states (covered by Spec 122), action hierarchy (covered by Action Surface Contract), new component development, third-party theme integration -- **Boundary with Admin Visual Language Canon**: The Canon is a codification and drift-prevention effort — it documents all visual conventions to prevent future divergence. This candidate identifies and resolves a specific, measurable UX problem (excess metadata noise) that exists today. The solutions from this candidate should feed into the Canon's documented conventions, but this candidate produces concrete UX improvements, not just documentation. The Canon defines the rules; this candidate fixes the violations that currently hurt operator experience. -- **Boundary with Operator Presentation & Lifecycle Action Hardening**: That candidate owns shared rendering conventions for operation labels and status badges via centralized abstractions (OperationCatalog, BadgeRenderer). This candidate owns the broader question of how much metadata should be visible and at what visual weight — a problem that extends beyond operations to all governance surfaces. -- **Boundary with Spec 122 (Empty State Consistency)**: Empty states address surfaces with no data. This candidate addresses surfaces with data that presents too much secondary information with too little hierarchy. Complementary problems at opposite ends of the information-density spectrum. -- **Dependencies**: Admin Visual Language Canon (soft dependency — conventions established here should align with or feed into the Canon), existing Filament table/infolist infrastructure, BadgeRenderer/BadgeCatalog (for badge rendering conventions) -- **Related candidates**: Admin Visual Language Canon (complementary — visual convention codification), Operator Presentation & Lifecycle Action Hardening (complementary — rendering conventions), Spec 122 (complementary — empty state consistency) -- **Priority**: medium - -### Infrastructure & Platform Debt — CI, Static Analysis, Test Parity, Release Process -- **Type**: hardening -- **Source**: roadmap-to-spec coverage audit 2026-03-18, Infrastructure & Platform Debt table in `docs/product/roadmap.md` -- **Problem**: TenantPilot's product architecture and governance domain have matured significantly, but the surrounding delivery infrastructure has not kept pace. The roadmap acknowledges six open infrastructure debt items — no CI pipeline, no static analysis (PHPStan/Larastan), SQLite-for-tests vs. PostgreSQL-in-production schema drift risk, no `.env.example`, no formal release process, and Dokploy configuration external to the repo — but none of these has a planning home or a specifiable path to resolution. Individually, each is a small-to-medium task. Collectively, they represent a real delivery confidence and maintainability gap: regressions are caught manually, schema drift between test and runtime is a known risk, deploys are manual, there is no static analysis baseline, and developer onboarding has unnecessary friction. As surface area and contributor count grow, this gap becomes more expensive and more dangerous. -- **Why it matters**: Delivery infrastructure is the foundation that makes product-level correctness sustainable. Without CI, regressions that product architecture hardening work has eliminated can silently return. Without static analysis, type-safety gains from PHP 8.4 and strict Filament/Livewire patterns are unenforced. Test/runtime environment parity gaps mean that passing tests do not prove production correctness — a particularly dangerous problem for a product that governs enterprise tenant configurations. No formal release process means deploy confidence depends on human discipline, which degrades as velocity increases. These are not individually urgent, but they are collectively a prerequisite for scaling the product safely. A platform that governs enterprise Intune tenants should have its own delivery governance in order. -- **Proposed direction**: - - **CI pipeline**: establish a CI configuration (compatible with Gitea runners or external CI) that runs the test suite, Pint formatting checks, and (once added) static analysis on every push/PR. Start with a minimal pipeline that provides a pass/fail quality gate rather than a complex multi-stage build system. The goal is "every merge request is automatically validated" — not a full platform engineering initiative. - - **Static analysis baseline**: introduce PHPStan or Larastan at a pragmatic starting level (e.g. level 5–6), baselined against the current codebase. Focus on catching type errors, undefined method calls, and incorrect return types. Do not aim for level-max compliance as a first step — establish the tool, baseline the noise, and raise the level incrementally. - - **Test/runtime environment parity**: resolve the SQLite-for-tests vs. PostgreSQL-in-production gap. The existing `phpunit.pgsql.xml` suggests this work is partially started. The goal is that the default test suite runs against the same database engine used in production, so that schema-level and query-level differences do not create silent correctness gaps. This is particularly important for JSONB-dependent domains (policy snapshots, backup payloads, operation context). - - **Developer onboarding hygiene**: add `.env.example` with documented defaults. Small but persistent friction item that affects new contributor experience and reduces setup-related support burden. - - **Release process formalization**: define a lightweight, documented release process covering version tagging, migration verification, asset compilation (`filament:assets`, `npm run build`), and staging-to-production promotion checks. Not a full release engineering overhaul — a minimal repeatable process that replaces purely manual deploys with a documented, verifiable workflow. - - **Deployment configuration traceability**: evaluate bringing essential Dokploy/deploy configuration into the repo (or at minimum documenting the external configuration surface) so that environment drift between staging and production is detectable rather than discovered after deployment. -- **Explicit non-goals**: Not a full platform engineering or DevOps transformation initiative. Not a rewrite of deployment architecture or infrastructure provisioning. Not a generic "clean up the repo" bucket for unrelated code quality tasks. Not a replacement for product-level architecture hardening work (queued execution reauthorization, Livewire context locking, etc. are distinct product-safety concerns). Not a mandate to achieve maximum static analysis strictness immediately. Not a CI/CD feature-flag or canary-deployment system. Not an internal developer tooling platform with custom CLIs, dashboards, or abstraction layers. The scope is bounded to the six concrete items identified in the roadmap's Infrastructure & Platform Debt table, plus the minimal CI/release process that connects them into an actionable delivery improvement. -- **Boundary with product architecture hardening (Queued Execution Reauthorization, Livewire Context Locking, etc.)**: Product hardening candidates address trust, authorization, and isolation correctness in the running application. Infrastructure debt addresses delivery confidence — the tooling and process that ensures correctness is verified continuously and shipped reliably. These are complementary layers: product hardening fixes what the code does; infrastructure maturity ensures the fixes stay fixed. -- **Boundary with Operations Naming Harmonization**: Operations naming is about operator-facing terminology consistency across product surfaces. Infrastructure debt is about developer-facing delivery tooling and process. Different audiences, different concerns. -- **Boundary with Admin Visual Language Canon**: The visual language canon mentions lightweight CI enforcement as a possible delivery mechanism for visual convention compliance. If this infrastructure candidate delivers CI, the visual canon can use it — but the CI pipeline itself is infrastructure scope, not visual-canon scope. -- **Dependencies**: None — this is foundational work that other candidates can build on. CI pipeline benefits every future spec by providing automated regression coverage. Static analysis benefits every future hardening spec by enforcing type-safety contractually. -- **Priority**: medium (high cumulative value for delivery confidence and maintainability, but individual items are execution-level tasks rather than product-architecture blockers; should be prioritized pragmatically alongside product work rather than treated as urgent or deferred indefinitely) - ---- - -## Covered / Absorbed - -> Candidates that were previously qualified but are now substantially covered by existing specs, or were umbrella labels whose children have been promoted individually. - -### Governance Architecture Hardening Wave (umbrella — dissolved) -- **Original source**: architecture audit 2026-03-15 -- **Status**: Dissolved into individual follow-ups. Queued Execution Reauthorization is now Spec 149, Livewire Context Locking is now Spec 152, Tenant-Owned Query Canon remains the only child still tracked as its own open candidate, and Findings Workflow Enforcement is absorbed below. -- **Reference**: [../audits/tenantpilot-architecture-audit-constitution.md](../audits/tenantpilot-architecture-audit-constitution.md), [../audits/2026-03-15-audit-spec-candidates.md](../audits/2026-03-15-audit-spec-candidates.md) - -### Evidence Completeness Reclassification -- **Original source**: semantic clarity & operator-language audit 2026-03-21 -- **Status**: Do not promote as a separate candidate. This follow-up is absorbed into existing Spec 153 (Evidence Domain Foundation), which should carry the semantic split between coverage, freshness, valid-empty states, and operator-facing completeness language. - -### Operation Outcome & Notification Language -- **Original source**: semantic clarity & operator-language audit 2026-03-21 -- **Status**: Prefer extending existing Spec 055 (Ops-UX Constitution Rollout) rather than creating a standalone semantic candidate, as long as Spec 055 is still the active vehicle for operation outcome presentation, partial-success messaging, blocked-cause guidance, and terminal notification language. - -### Tenant Review & Publication Readiness Semantics -- **Original source**: semantic clarity & operator-language audit 2026-03-21 -- **Status**: Do not create a standalone candidate. This follow-up is absorbed into existing Spec 155 (Tenant Review Layer), which should own review completeness vocabulary, publication-readiness blocker wording, freshness semantics, and review-layer next-action language. - -### Findings Workflow Enforcement and Audit Backstop -- **Original source**: architecture audit 2026-03-15, candidate C -- **Status**: Absorbed by Spec 111 (findings workflow v2) and the narrower hardening follow-up Spec 151 (`findings-workflow-backstop`). The remaining enforcement gap is no longer candidate-sized unless a new regression or audit finding re-opens it. - -### Workspace Chooser v2 -- **Original source**: Spec 107 deferred backlog -- **Status**: Workspace chooser v1 is covered by Spec 107 + semantic fix in Spec 121. The v2 polish items (search, sort, favorites, pins, environment badges) remain tracked as an Inbox entry. Not qualified as a standalone spec candidate at current priority. - -### Dashboard Polish (Enterprise-grade) -- **Original source**: Product review 2026-03-08 -- **Status**: Core tenant dashboard is covered by Spec 058 (drift-first KPIs, needs attention, recent lists). Workspace-level landing is in progress via Spec 129. The remaining polish items (sparklines, compliance gauge, progressive disclosure) are tracked in Inbox. This was demoted because the candidate lacked a bounded spec scope — it read as a wish list rather than a specifiable problem. - -### Scope & Navigation Semantics (UI/UX Audit) -- **Original source**: UI/UX audit — scope and navigation semantics analysis -- **Status**: Comprehensively covered by existing spec constellation. Spec 077 (implemented) established workspace-first navigation, monitoring hub IA, header context bar, and tenant-context default filters. Spec 103 (draft) addresses IA scope-vs-filter-vs-targeting semantics on monitoring pages. Spec 121 (draft) fixes workspace switch routing semantics. Spec 106 (draft) corrects sidebar navigation context visibility. Spec 107 (draft) covers workspace chooser v1. Spec 129 (draft) addresses workspace home and admin landing pages. Spec 143 (draft) covers tenant lifecycle, operability, and context semantics. Spec 144 (draft) addresses canonical operation viewer context decoupling. Spec 131 (draft) covers cross-resource navigation and drill-down cohesion. The audit's navigation/scope concerns are distributed across these specs as precisely bounded, spec-level problems — no new umbrella candidate is needed. -- **Reference specs**: 077, 103, 106, 107, 121, 129, 131, 143, 144 - -### Detail Page Hierarchy & Progressive Disclosure (UI/UX Audit) -- **Original source**: UI/UX audit — detail page hierarchy and progressive disclosure analysis -- **Status**: Directly covered by Spec 133 (View Page Template Standard for Enterprise Detail Screens). Spec 133 defines the shared enterprise detail-page composition standard including summary-first header, main-and-supporting layout, dedicated related-context section, secondary technical detail separation, optional section support, and degraded-state resilience. Spec.md, plan.md, research.md, data-model.md, and tasks.md (all tasks complete) exist for 4 initial target pages (BaselineSnapshot, BackupSet, EntraGroup, OperationRun). If additional pages require alignment beyond the initial 4 targets, that is a Spec 133 follow-up scope extension, not a new candidate. -- **Reference specs**: 133 - -### Record Page Header Discipline & Contextual Navigation -- **Original source**: Constitution compliance audit 2026-04 -- **Status**: Promoted to Spec 192 (`record-header-discipline`). No longer tracked as an open candidate. -- **Reference specs**: 192 - -### Monitoring Surface Action Hierarchy & Workbench Semantics -- **Original source**: Constitution compliance audit 2026-04 -- **Status**: Promoted to Spec 193 (`monitoring-action-hierarchy`). No longer tracked as an open candidate. -- **Reference specs**: 193 - -### Governance Friction & Operator Vocabulary Hardening -- **Original source**: Constitution compliance audit 2026-04 -- **Status**: Promoted to Spec 194 (`governance-friction-hardening`). No longer tracked as an open candidate. -- **Reference specs**: 194 - -> **UI Discipline Trilogy — Sequencing Note** -> -> These three candidates formed a coordinated trilogy and are now represented by Specs 192, 193, and 194: -> -> 1. **Record Page Header Discipline & Contextual Navigation** — largest visible lever; establishes the binding header-action contract for all Record/Detail pages -> 2. **Monitoring Surface Action Hierarchy & Workbench Semantics** — separates Workbench/Queue surfaces from Record page rules; defines the action hierarchy for Monitoring surfaces -> 3. **Governance Friction & Operator Vocabulary Hardening** — targeted finishing step for friction, reason capture, and vocabulary consistency -> -> **Why this order:** Record pages are the most numerous and most directly visible gap. Monitoring surfaces need their own rules (not a Record page derivative). Governance friction is the smallest scope and benefits from the architectural cleanup of the first two specs. -> -> **Why three specs instead of one:** Each has different affected surfaces, different interaction models, and different implementation patterns. Merging them would create an unshippable monolith. Keeping them sequenced preserves independent delivery while converging on one coherent UI discipline. - ---- - -## Planned - -> Ready for spec creation. Waiting for slot in active work. - -*(empty — move items here when prioritized for next sprint)* - ---- - -## Template - -```md -### Title -- **Type**: feature | polish | hardening | bug | research -- **Source**: chat | audit | coding discovery | customer feedback | spec N follow-up -- **Problem**: -- **Why it matters**: -- **Proposed direction**: -- **Dependencies**: -- **Priority**: low | medium | high -``` - -### Customer Review Workspace v1 -- **Type**: product / customer-facing governance review -- **Source**: platform strategy review 2026-04-24 — internal tenant reviews, evidence, findings, exceptions, and review packs are present, but customer-facing consumption remains under-specified -- **Problem**: TenantPilot already has internal governance artifacts such as tenant reviews, evidence snapshots, findings, accepted risks, and review packs. However, the value remains too operator-internal unless customer members or review recipients can safely consume released review results without admin access. -- **Why it matters**: For MSPs and audit-sensitive customers, a read-only review surface turns internal governance work into a sellable recurring service. Customers need to see baseline status, open findings, accepted risks, review history, and downloadable evidence or review packs without being exposed to admin actions or raw diagnostic internals. -- **Proposed direction**: - - provide a read-only customer/member review workspace per tenant - - show baseline status and latest review summary - - show open findings, resolved findings, and accepted risks - - expose released review packs and evidence downloads - - apply customer-safe redaction rules - - hide all admin, remediation, provider, and destructive actions - - preserve drilldown only into released, customer-safe artifacts -- **Scope boundaries**: - - **In scope**: read-only review dashboard, released tenant-review visibility, evidence-pack links, accepted-risk visibility, customer-safe copy, RBAC visibility rules - - **Out of scope**: customer policy changes, remediation actions, full collaboration/chat, bidirectional ticket portal, public anonymous sharing, external auditor portal -- **Dependencies**: Tenant Review Layer, Evidence Domain Foundation, Finding Risk Acceptance Lifecycle, Review Pack generation, RBAC, workspace/customer membership model -- **Acceptance points**: - - Customer members can only see released review data for their tenant - - Customer members cannot trigger admin or remediation actions - - Review packs and evidence links respect redaction and visibility rules - - Findings and accepted risks are understandable without raw operator diagnostics - - MSP/internal operators retain full admin surfaces separately -- **Roadmap fit**: Release 2 — Tenant Reviews, Evidence Packs & Control Foundation; sharpens the current Customer Read-only View into a concrete review-consumption surface. -- **Priority**: high +- Findings Notifications & Escalation v1 -> Spec 224 (`findings-notifications-escalation`) +- Assignment Hygiene & Stale Work Detection -> Spec 225 (`assignment-hygiene`) +- Findings Notification Presentation Convergence -> Spec 230 (`findings-notification-convergence`) +- Finding Outcome Taxonomy & Verification Semantics -> Spec 231 (`finding-outcome-taxonomy`) +- Operation Run Link Contract Enforcement -> Spec 232 (`operation-run-link-contract`) +- Operation Run Active-State Visibility & Stale Escalation -> Spec 233 (`stale-run-visibility`) +- Provider Boundary Hardening -> Spec 237 (`provider-boundary-hardening`) + +## Superseded / Removed From Active Queue + +These items were previously open candidates or roadmap-fit ideas, but should no longer stay in the active queue. + +- `R2.0 Canonical Control Catalog Foundation`: remove from active candidates because the ledger shows a repo-real catalog, config, bindings, review integration, and test coverage. This is no longer an open candidate; it is an implemented foundation. +- `Self-Service Tenant Onboarding & Connection Readiness`: remove from active candidates because it is already Spec 240 and the repo already shows meaningful adoption. +- `Support Diagnostic Pack`: remove from active candidates because it is already Spec 241 and repo-adopted. +- `Operational Controls & Feature Flags`: remove from active candidates because it is already Spec 242 and repo-adopted. +- `Product Usage & Adoption Telemetry`: remove from active candidates because it is already Spec 243 and repo-adopted. +- `Product Knowledge & Contextual Help`: remove from active candidates because it is already Spec 244; any remaining work should be narrower follow-ups, not a repeated top-level candidate. +- `Customer Health Score`: remove from active candidates because it is already Spec 245 and repo-adopted. +- `In-App Support Request with Context`: remove from active candidates because it is already Spec 246 and repo-implemented. +- `Plans, Entitlements & Billing Readiness`: remove as a broad active candidate because Spec 247 already exists and the remaining open gap is narrower commercial lifecycle maturity. +- `Private AI Execution & Policy Foundation`: remove from the active queue because Spec 248 already exists. +- Company-ops items such as `Lead Capture & CRM Pipeline`, `AVV / DPA / TOM / Legal Pack`, `Vendor Questionnaire Answer Bank`, `Business Continuity / Founder Backup Plan`, and similar operating artifacts should remain outside the active product-spec queue unless a concrete product slice emerges. diff --git a/specs/043-cross-tenant-compare-and-promotion/checklists/requirements.md b/specs/043-cross-tenant-compare-and-promotion/checklists/requirements.md new file mode 100644 index 00000000..df9f95c8 --- /dev/null +++ b/specs/043-cross-tenant-compare-and-promotion/checklists/requirements.md @@ -0,0 +1,57 @@ +# Specification Quality Checklist: Cross-Tenant Compare Preview and Promotion Preflight + +**Purpose**: Validate full preparation-package completeness and implementation readiness before the feature moves into the implementation loop +**Created**: 2026-04-27 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] Business value and operator outcome stay explicit +- [x] The slice is tightly bounded to compare preview, promotion preflight, and portfolio launch continuity +- [x] Runtime-governance sections are present for an implementation-ready package +- [x] All mandatory sections are completed in `spec.md`, `plan.md`, and `tasks.md` + +## Requirement Completeness + +- [x] No `[NEEDS CLARIFICATION]` markers remain +- [x] Requirements are testable and unambiguous +- [x] Acceptance scenarios are defined for compare preview, read-only promotion preflight, and launch/return continuity +- [x] Edge cases are identified, including explicit rejection of same-tenant compare, cross-workspace attempts, lost entitlement, ambiguous identity, and stale target evidence +- [x] Scope is clearly bounded away from actual promotion execution, queues, persisted drafts, mapping automation, customer-facing compare, and multi-provider work +- [x] Dependencies, assumptions, risks, and follow-up candidates are identified + +## Feature Readiness + +- [x] The first slice is small enough for a bounded implementation loop +- [x] Concrete repo surfaces are named for compare reuse, portfolio launch, audit reuse, and likely new compare support files +- [x] Foundational work stays preparation-only and does not imply execution scope or new persistence +- [x] The tasks are ordered, testable, and grouped by user story +- [x] No unresolved product question blocks implementation once artifact analysis passes + +## Governance Readiness + +- [x] Workspace and tenant isolation rules are explicit, including `404` for non-members and out-of-scope tenants +- [x] The capability matrix is explicit: page access = `WORKSPACE_BASELINES_VIEW`, preview data = `TENANT_VIEW` on both tenants, preflight execution = `WORKSPACE_BASELINES_MANAGE`, and manage-denied members see a disabled preflight action with permission guidance +- [x] Promotion remains preflight-only, with no write execution, queue, or `OperationRun` +- [x] Audit remains bounded to promotion-preflight entry points with no new compare/promotion persistence truth +- [x] Livewire v4 and Filament v5 compliance, unchanged provider registration in `bootstrap/providers.php`, no new global-search resource, and no new asset strategy are explicit in the package + +## Test Governance Review + +- [x] Lane fit stays in focused `Unit` plus `Feature` validation only +- [x] Fixture and helper growth stays local to compare preview, preflight classification, and launch-context coverage +- [x] No browser, heavy-governance, or queue family is introduced implicitly +- [x] Minimal validation commands are explicit in the plan +- [x] The active feature PR close-out entry remains `Guardrail` + +## Review Outcome + +- [x] Review outcome class: `keep` +- [x] Workflow outcome: `keep` +- [x] Next command readiness: implementation prep is ready once artifact analysis is clear + +## Notes + +- This checklist validates the preparation package only: `spec.md`, `plan.md`, `tasks.md`, and this checklist artifact. It does not claim that runtime code or a promotion workflow already exists. +- The active slice stops before any target mutation, any queued execution, any persisted draft or compare snapshot, and any broader mapping automation. +- No new globally searchable resource is introduced, no new asset registration is expected, and deployment behavior remains unchanged unless a later implementation explicitly adds assets. \ No newline at end of file diff --git a/specs/043-cross-tenant-compare-and-promotion/plan.md b/specs/043-cross-tenant-compare-and-promotion/plan.md index 0ff77aea..2a1c7982 100644 --- a/specs/043-cross-tenant-compare-and-promotion/plan.md +++ b/specs/043-cross-tenant-compare-and-promotion/plan.md @@ -1,24 +1,210 @@ -# Implementation Plan: Cross-tenant Compare and Promotion +# Implementation Plan: Cross-Tenant Compare Preview and Promotion Preflight -**Date**: 2026-01-07 -**Spec**: `specs/043-cross-tenant-compare-and-promotion/spec.md` +**Branch**: `043-cross-tenant-compare-and-promotion` | **Date**: 2026-04-27 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from [spec.md](spec.md) ## Summary -Introduce read-only cross-tenant comparison views; optionally add promotion with strong safety gates. +Refresh Spec 043 into a narrow, implementation-ready workflow that adds one canonical workspace-context compare page under `/admin`, one reusable compare preview builder, and one read-only promotion preflight action. The slice reuses existing baseline compare subject identity, portfolio-triage context continuity, capability resolvers, and workspace audit logging. It deliberately stops before actual promotion execution, queueing, or persisted promotion drafts. -## Dependencies +Filament remains on Livewire v4, no panel-provider registration changes are required (`bootstrap/providers.php` remains the authoritative provider registration location), no globally searchable compare resource is added, and no new panel asset bundle is expected. -- Inventory core + UI (Specs 040–041) -- Strong authorization model for multi-tenant access +## Technical Context -## Deliverables +**Language/Version**: PHP 8.4, Laravel 12 +**Primary Dependencies**: Filament v5, Livewire v4, Pest v4, existing baseline compare services, portfolio-triage seams, audit services, and capability resolvers +**Storage**: PostgreSQL via existing inventory, policy-version, and audit tables; no new compare or promotion table +**Testing**: Pest v4 `Unit` and `Feature` coverage only +**Validation Lanes**: fast-feedback, confidence +**Target Platform**: Laravel monolith in `apps/platform`, admin panel only (`/admin`) +**Project Type**: Web application (Laravel monolith with Filament pages) +**Performance Goals**: compare preview and promotion preflight stay synchronous and derived from existing persisted truth; no background execution path in v1 +**Constraints**: no target mutation, no `OperationRun`, no queue, no new persisted draft, no cross-workspace compare, no raw payload view by default +**Scale/Scope**: 2 tenant selectors, 1 canonical compare page, 1 preflight action, 1 launch/return continuity path, focused reuse of existing compare builders -- Tenant selection + comparison view -- Safe diff output and export -- (Optional) gated promotion workflow +## UI / Surface Guardrail Plan -## Risks +- **Guardrail scope**: one new canonical compare page plus one launch action from existing tenant-registry/portfolio context +- **Native vs custom classification summary**: native Filament page with shared compare/audit/navigation primitives +- **Shared-family relevance**: canonical admin pages, compare drill-down patterns, launch actions, audit-backed modal/action copy +- **State layers in scope**: page, query state +- **Audience modes in scope**: operator-MSP only +- **Decision/diagnostic/raw hierarchy plan**: decision-first compare summary, diagnostics second, raw evidence stays on existing tenant/baseline surfaces +- **Raw/support gating plan**: no new raw/support surface; keep payload proof behind existing pages +- **One-primary-action / duplicate-truth control**: the compare page keeps one dominant next action, `Generate promotion preflight`; drill-down and return actions stay secondary +- **Launch default**: the tenant-registry launch action prefills the launched tenant as `target tenant`; the operator chooses the source tenant explicitly +- **Handling modes by drift class or surface**: review-mandatory; any actual promotion execution or queue path is exception-required and out of scope +- **Repository-signal treatment**: review-mandatory +- **Special surface test profiles**: standard-native-filament +- **Required tests or manual smoke**: functional-core, state-contract +- **Exception path and spread control**: none +- **Active feature PR close-out entry**: Guardrail -- Data leakage across tenants -- Over-scoping promotion beyond safe MVP +## Shared Pattern & System Fit + +- **Cross-cutting feature marker**: yes +- **Systems touched**: + - `App\Filament\Pages\BaselineCompareLanding` + - `App\Filament\Pages\BaselineCompareMatrix` + - `App\Filament\Resources\TenantResource` + - `App\Filament\Resources\TenantResource\Pages\ListTenants` + - `App\Services\Baselines\BaselineCompareService` + - `App\Support\Baselines\BaselineCompareMatrixBuilder` + - `App\Support\Baselines\Compare\CompareStrategyRegistry` + - `App\Services\PortfolioTriage\TenantTriageReviewService` + - `App\Services\Audit\WorkspaceAuditLogger` + - `App\Support\Audit\AuditActionId` + - `App\Support\Navigation\CanonicalNavigationContext` +- **Shared abstractions reused**: capability resolvers, baseline compare strategy selection, canonical navigation context, existing audit recorder/logger path, and tenant-registry return-state conventions +- **New abstraction introduced? why?**: one narrow compare preview builder and one narrow promotion preflight service, because no existing service accepts source+target tenant scope and computes promotion readiness without execution +- **Why the existing abstraction was sufficient or insufficient**: tenant-level baseline compare is sufficient for subject identity, evidence posture, and drill-down semantics, but insufficient for dual-tenant scope and promotion-readiness reasoning +- **Bounded deviation / spread control**: no local compare sidecars on tenant pages; future callers must route through the canonical compare page and its services + +## OperationRun UX Impact + +- **Touches OperationRun start/completion/link UX?**: no +- **Central contract reused**: `N/A` +- **Delegated UX behaviors**: `N/A` +- **Surface-owned behavior kept local**: compare preview and preflight remain synchronous and read-only +- **Queued DB-notification policy**: `N/A` +- **Terminal notification path**: `N/A` +- **Exception path**: none + +## Provider Boundary & Portability Fit + +- **Shared provider/platform boundary touched?**: yes +- **Provider-owned seams**: Microsoft-first inventory subject identity and policy-type mapping remain inside existing baseline compare strategy selection and inventory data +- **Platform-core seams**: source/target tenant scope, compare preview contract, promotion preflight contract, operator-facing readiness vocabulary +- **Neutral platform terms / contracts preserved**: `source tenant`, `target tenant`, `governed subject`, `compare preview`, `promotion preflight`, and `blocked reason` +- **Retained provider-specific semantics and why**: existing policy-type and inventory semantics remain Microsoft-first because this repo still has one real provider domain; the compare page should not invent fake provider-neutral mapping logic above that seam +- **Bounded extraction or follow-up path**: follow-up-spec only if later provider domains become current-release truth + +## Constitution Check + +*GATE: Must pass before implementation preparation continues.* + +- Inventory-first: PASS. Compare preview and preflight derive from existing inventory and policy-version truth rather than a new compare snapshot. +- Read/write separation: PASS. This slice stays read-only; no write execution is introduced. +- Graph contract path: PASS. No new Graph endpoint or direct provider call is added. +- Deterministic capabilities: PASS. Reuse existing capability registries such as `Capabilities::TENANT_VIEW`, `Capabilities::WORKSPACE_BASELINES_VIEW`, `Capabilities::WORKSPACE_BASELINES_MANAGE`, and existing tenant sync/manage seams. +- Workspace and tenant isolation: PASS. The compare page must resolve workspace membership first and source/target entitlement second, with `404` for inaccessible tenants. +- RBAC-UX plane separation: PASS. This slice lives only in `/admin`; no `/system` or cross-plane route is introduced. +- Destructive action discipline: PASS by non-use. The slice contains no destructive action. +- Global search: PASS. No new Resource or Global Search result is introduced. +- OperationRun / Ops-UX: PASS by non-use. Actual promotion execution is deferred. +- Data minimization: PASS. The compare page summarizes derived readiness and blocks; raw payloads stay on existing tenant/baseline pages. +- Test governance: PASS. Proof stays in `Unit` plus `Feature`; no browser or heavy-governance expansion is planned. +- Proportionality / no premature abstraction: PASS. One preview builder and one preflight service are justified by the dual-tenant workflow; no new persistence or framework layer is added. +- Persisted truth: PASS. No new compare or promotion table. +- Behavioral state: PASS. Readiness and blocked reasons remain derived, not persisted. +- Shared pattern first / UI semantics / Filament-native UI: PASS. Existing compare, navigation, and audit paths are extended rather than replaced. +- Provider boundary: PASS. Microsoft-shaped subject matching stays in existing strategy seams; the page contract stays platform-neutral. +- Filament/Laravel panel safety: PASS. Filament v5 remains on Livewire v4, no provider registration change beyond `bootstrap/providers.php`, and no new assets are planned. + +**Gate evaluation**: PASS. + +## Test Governance Check + +- **Test purpose / classification by changed surface**: `Feature` for the compare page, launch context, auth, and audit; `Unit` for compare preview matching and promotion-preflight classification +- **Affected validation lanes**: fast-feedback, confidence +- **Why this lane mix is the narrowest sufficient proof**: feature tests prove the Filament page and launch path while unit tests keep preview/preflight rules cheap and isolated. Browser or heavy-governance coverage is not required for the first read-only slice. +- **Narrowest proving command(s)**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/PortfolioCompare/CrossTenantComparePreviewBuilderTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/PortfolioCompare/CrossTenantPromotionPreflightTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantComparePageTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantCompareAuthorizationTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantPromotionPreflightAuditTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantCompareLaunchContextTest.php` +- **Fixture / helper / factory / seed / context cost risks**: reuse existing inventory, baseline compare, tenant registry, and portfolio-triage fixtures; avoid browser setup, queue fixtures, or seeded promotion history +- **Expensive defaults or shared helper growth introduced?**: no +- **Heavy-family additions, promotions, or visibility changes**: none +- **Surface-class relief / special coverage rule**: standard-native-filament +- **Closing validation and reviewer handoff**: rerun the six focused commands above and confirm the slice remains read-only, deny-as-not-found-safe, and grounded on existing compare + portfolio seams +- **Budget / baseline / trend follow-up**: none expected +- **Review-stop questions**: lane fit, hidden fixture growth, accidental write execution, accidental queue/runtime scope +- **Escalation path**: `document-in-feature` for contained lane drift, `reject-or-split` for any attempt to add execution scope +- **Active feature PR close-out entry**: Guardrail +- **Why no dedicated follow-up spec is needed**: test upkeep remains feature-local; only actual promotion execution or multi-provider compare would warrant a separate follow-up spec + +## Project Structure + +### Documentation (this feature) + +```text +specs/043-cross-tenant-compare-and-promotion/ +├── checklists/ +│ └── requirements.md +├── spec.md +├── plan.md +└── tasks.md +``` + +This refresh intentionally limits itself to the core preparation package plus `checklists/requirements.md`. No additional research/data-model/contracts artifact is required to make the narrowed slice implementation-ready. + +### Source Code (repository root) + +```text +apps/platform/ +├── app/ +│ ├── Filament/Pages/ +│ │ ├── BaselineCompareLanding.php +│ │ ├── BaselineCompareMatrix.php +│ │ └── [new canonical compare page] +│ ├── Filament/Resources/TenantResource.php +│ ├── Filament/Resources/TenantResource/Pages/ListTenants.php +│ ├── Models/ +│ │ ├── InventoryItem.php +│ │ └── PolicyVersion.php +│ ├── Services/Audit/ +│ │ └── WorkspaceAuditLogger.php +│ ├── Services/Baselines/ +│ │ └── BaselineCompareService.php +│ ├── Services/PortfolioTriage/ +│ │ └── TenantTriageReviewService.php +│ ├── Support/Audit/AuditActionId.php +│ ├── Support/Baselines/ +│ │ ├── BaselineCompareMatrixBuilder.php +│ │ └── Compare/CompareStrategyRegistry.php +│ └── Support/PortfolioCompare/ or Services/PortfolioCompare/ +└── tests/ + ├── Feature/PortfolioCompare/ + └── Unit/Support/PortfolioCompare/ +``` + +**Structure Decision**: keep implementation inside `apps/platform`, reuse existing compare and portfolio seams, and introduce at most one small `PortfolioCompare` support/service namespace for the new dual-tenant preview/preflight logic. + +## Complexity Tracking + +| Violation | Why Needed | Simpler Alternative Rejected Because | +|-----------|------------|-------------------------------------| +| New compare preview builder | dual-tenant compare needs one place to translate existing inventory/baseline truth into a canonical preview contract | page-local mapping would duplicate compare logic and drift from existing baseline compare seams | +| New promotion preflight service | readiness reasoning must stay read-only and auditable before any execution path exists | bolting readiness rules into the page would make later reuse and testing brittle | + +## Proportionality Review + +- **Current operator problem**: portfolio operators still lack one bounded surface that answers whether a target tenant can follow a source tenant. +- **Existing structure is insufficient because**: existing baseline compare is tenant-vs-reference, not tenant-vs-tenant, and portfolio triage does not compute promotion readiness. +- **Narrowest correct implementation**: one canonical page plus one preview builder and one preflight service, no new table, no execution path. +- **Ownership cost created**: maintain a small preview/preflight contract and a focused test family. +- **Alternative intentionally rejected**: actual promotion execution, persisted promotion drafts, and local compare sidecars were rejected as premature. +- **Release truth**: current-release gap, not speculative platform work. + +## Implementation Strategy + +### Suggested MVP Scope + +MVP = **US1 + US2 together**. A compare page without a promotion preflight leaves the core decision incomplete, and a preflight without a canonical compare page has no trustworthy operator context. + +### Incremental Delivery + +1. Reuse current compare, navigation, capability, and audit seams. +2. Deliver the canonical compare preview. +3. Add the read-only promotion preflight on top of the same page and services. +4. Add launch/return continuity from portfolio-triage and tenant-registry context. +5. Finish with narrow validation and formatting. + +### Team Strategy + +1. Settle the preview/preflight contracts first. +2. Parallelize unit tests for preview/preflight rules and feature tests for page/auth behavior. +3. Serialize merges around the canonical compare page and the shared `PortfolioCompare` service namespace so the page contract does not drift. diff --git a/specs/043-cross-tenant-compare-and-promotion/spec.md b/specs/043-cross-tenant-compare-and-promotion/spec.md index f7e416aa..3adb2752 100644 --- a/specs/043-cross-tenant-compare-and-promotion/spec.md +++ b/specs/043-cross-tenant-compare-and-promotion/spec.md @@ -1,59 +1,293 @@ -# Feature Specification: Cross-tenant Compare and Promotion +# Feature Specification: Cross-Tenant Compare Preview and Promotion Preflight -**Feature Branch**: `feat/043-cross-tenant-compare-and-promotion` +**Feature Branch**: `043-cross-tenant-compare-and-promotion` **Created**: 2026-01-07 -**Status**: Draft +**Updated**: 2026-04-27 +**Status**: Ready for implementation +**Input**: Refresh existing Spec 043 against `docs/product/spec-candidates.md`, `docs/product/implementation-ledger.md`, and `docs/product/roadmap.md` so the feature becomes a narrow, implementation-ready slice instead of a broad future ambition. -## Purpose +## Spec Candidate Check *(mandatory - SPEC-GATE-001)* -Enable safe cross-tenant comparison of inventory and, optionally, controlled promotion workflows. +- **Problem**: TenantPilot now has portfolio visibility, triage continuity, and strong tenant-level baseline compare surfaces, but operators still lack one canonical workspace-level path to compare a source tenant to a target tenant and prepare a safe promotion decision. +- **Today's failure**: Operators can see that tenants differ, but they still reconstruct cross-tenant decisions manually across tenant registry, baseline compare, and tenant detail surfaces. Promotion remains a roadmap phrase, not a bounded product workflow. +- **User-visible improvement**: An authorized workspace operator can select a source and target tenant, review a structured compare preview of governed subjects, and generate a read-only promotion preflight that shows what is ready, blocked, or requires manual mapping before any write path exists. +- **Smallest enterprise-capable version**: One canonical `/admin` compare surface, one compare preview builder, one read-only promotion preflight action, deep links back to existing tenant and baseline compare surfaces, and bounded audit metadata for preflight entry points. No actual promotion execution ships in this slice. +- **Explicit non-goals**: No cutover, no write execution, no queue or `OperationRun`, no automatic target remapping of groups/tags/named locations, no cross-workspace compare, no customer-facing compare workspace, no provider marketplace, and no new persisted promotion draft entity. +- **Permanent complexity imported**: One canonical compare page, one narrow compare scope contract, one preview/preflight builder pair, one small audit metadata shape, and focused unit plus feature coverage. +- **Why now**: The implementation ledger explicitly identifies cross-tenant compare and promotion as one of the remaining real product gaps. It is the missing bridge between portfolio visibility and portfolio action. +- **Why not local**: A local compare action on one tenant page would duplicate entitlement, matching, audit, and promotion-readiness logic and would not create a reusable, canonical workspace workflow. +- **Approval class**: Workflow Compression +- **Red flags triggered**: New page + new compare/preflight service pair. Defense: the slice stays read-only, introduces no new table, reuses existing baseline compare and portfolio triage seams, and defers actual execution. +- **Score**: Nutzen: 2 | Dringlichkeit: 2 | Scope: 2 | Komplexitaet: 1 | Produktnaehe: 1 | Wiederverwendung: 2 | **Gesamt: 10/12** +- **Decision**: approve -Comparison is read-only by default. Any write/promotion behavior must be explicitly gated, audited, and separately authorized. +## Spec Scope Fields *(mandatory)* -## User Scenarios & Testing +- **Scope**: canonical-view +- **Primary Routes**: + - new canonical admin compare page under `/admin` for cross-tenant compare preview and promotion preflight + - existing `/admin/tenants` portfolio/registry surfaces as launch and return context + - existing tenant detail and baseline compare pages as secondary drill-down targets rather than duplicated local detail panes +- **Data Ownership**: + - compare preview and promotion preflight remain derived from existing tenant-owned inventory, policy-version, and baseline-compare truth + - no new compare snapshot, promotion draft, or mapping table is introduced in v1 + - audit remains on the existing workspace audit log only +- **RBAC**: + - non-members or actors outside workspace scope receive `404` + - launch-action visibility requires established workspace context, `Capabilities::WORKSPACE_BASELINES_VIEW` on the workspace, and `Capabilities::TENANT_VIEW` on the launched tenant + - opening the compare page requires established workspace context and `Capabilities::WORKSPACE_BASELINES_VIEW` on the workspace + - loading preview data requires `Capabilities::TENANT_VIEW` on both source and target tenants + - executing promotion preflight requires the preview permissions plus `Capabilities::WORKSPACE_BASELINES_MANAGE` on the workspace + - for established members who can view compare but lack `Capabilities::WORKSPACE_BASELINES_MANAGE`, the preflight action remains visible but disabled with explicit permission help text; server-side attempts still return `403` + - the implementation must stay on existing capability registries instead of raw strings and must not introduce a new promotion capability family for this slice -### Scenario 1: Compare two tenants (read-only) -- Given the operator has access to Tenant A and Tenant B -- When they select two tenants and a set of policy types -- Then they can see differences in presence and key metadata +For canonical-view specs, the spec MUST define: -### Scenario 2: Compare with a stable reference -- Given a reference selection scope -- When the operator runs comparison -- Then results are stable and reproducible for that scope +- **Default filter behavior when tenant-context is active**: if launched from the tenant registry or portfolio-triage context, prefill the launched tenant as the `target tenant`, leave the `source tenant` intentionally user-selected, and preserve a return context token. +- **Explicit entitlement checks preventing cross-tenant leakage**: the compare surface must validate workspace membership first, then validate both source and target tenant entitlement before any preview data loads. Any inaccessible tenant input is treated as not found. -### Scenario 3: Promotion is explicitly gated (optional) -- Given promotion is enabled by policy -- When the operator initiates promotion -- Then the system requires explicit confirmation and records an audit event +## Cross-Cutting / Shared Pattern Reuse *(mandatory when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, navigation entry points, evidence/report viewers, or any other existing shared operator interaction family; otherwise write `N/A - no shared interaction family touched`)* -## Functional Requirements +- **Cross-cutting feature?**: yes +- **Interaction class(es)**: navigation entry points, compare/drill-down actions, audit metadata, and canonical workspace-context pages +- **Systems touched**: `ListTenants`, portfolio-triage state, `CanonicalNavigationContext`, `BaselineCompareLanding`, `BaselineCompareMatrix`, `BaselineCompareService`, `CompareStrategyRegistry`, `WorkspaceAuditLogger`, and `AuditActionId` +- **Existing pattern(s) to extend**: canonical `/admin` workspace-context pages, baseline compare preview patterns, portfolio-triage return-state patterns, and existing workspace audit metadata patterns +- **Shared contract / presenter / builder / renderer to reuse**: `CanonicalNavigationContext`, `ActionSurfaceDeclaration`, `BaselineCompareService`, `BaselineCompareMatrixBuilder`, `CompareStrategyRegistry`, `TenantTriageReviewService`, and `WorkspaceAuditLogger` +- **Why the existing shared path is sufficient or insufficient**: existing tenant-level baseline compare surfaces already solve stable subject matching, result framing, and drill-down semantics, but they are insufficient for cross-tenant compare because they do not accept dual-tenant scope or produce a promotion-readiness preflight. +- **Allowed deviation and why**: none. The new surface should extend current compare and navigation patterns, not invent a parallel compare UX family. +- **Consistency impact**: source tenant, target tenant, compare preview, promotion preflight, blocked reason, and ready/manual mapping language must stay consistent across page copy, modal copy, audit prose, and deep links. +- **Review focus**: reviewers must block new local compare widgets or tenant-specific preflight sidecars that bypass the canonical compare page or its shared preview/preflight services. -- FR1: Support selecting two tenants within authorized scope. -- FR2: Provide read-only diff views based on inventory metadata and stable identifiers. -- FR3: Provide exportable comparison results. -- FR4: If promotion is included: - - require explicit enablement - - require explicit confirmation per operation - - record audit logs - - support dry-run/preview +## OperationRun UX Impact *(mandatory when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`; otherwise write `N/A - no OperationRun start or link semantics touched`)* -## Non-Functional Requirements +- **Touches OperationRun start/completion/link UX?**: no +- **Shared OperationRun UX contract/layer reused**: `N/A` +- **Delegated start/completion UX behaviors**: `N/A` +- **Local surface-owned behavior that remains**: compare preview and promotion preflight stay synchronous and read-only in v1 +- **Queued DB-notification policy**: `N/A` +- **Terminal notification path**: `N/A` +- **Exception required?**: none -- NFR1: Enforce tenant isolation and least privilege across tenant selection and data access. -- NFR2: Comparison must not expose secrets or unsafe payload fields. +## Provider Boundary / Platform Core Check *(mandatory when the feature changes shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth; otherwise write `N/A - no shared provider/platform boundary touched`)* + +- **Shared provider/platform boundary touched?**: yes +- **Boundary classification**: mixed +- **Seams affected**: compare subject identity, compare strategy reuse, promotion preflight reason vocabulary, and operator-facing compare terminology +- **Neutral platform terms preserved or introduced**: `source tenant`, `target tenant`, `governed subject`, `compare preview`, `promotion preflight`, `mapping gap`, and `blocked reason` +- **Provider-specific semantics retained and why**: Microsoft-first policy-type and inventory semantics remain inside existing compare strategy and inventory seams because the repo currently has one real provider domain. They should not leak deeper into the page contract than necessary. +- **Why this does not deepen provider coupling accidentally**: the page and services stay anchored on existing compare registries and inventory identifiers instead of inventing Microsoft-specific page contracts or raw Graph payload handling. +- **Follow-up path**: future multi-provider compare remains a separate follow-up spec if it ever becomes current-release truth. + +## UI / Surface Guardrail Impact *(mandatory when operator-facing surfaces are changed; otherwise write `N/A`)* + +| Surface / Change | Operator-facing surface change? | Native vs Custom | Shared-Family Relevance | State Layers Touched | Exception Needed? | Low-Impact / `N/A` Note | +|---|---|---|---|---|---|---| +| Canonical cross-tenant compare page | yes | Native Filament page plus shared compare primitives | compare preview, navigation, audit-backed preflight action | page, query state, compare summary, modal/action state | no | Reuses baseline compare language and drill-down patterns instead of a custom standalone shell | +| Tenant registry / portfolio launch action | yes | Native Filament action | navigation entry point, contextual launch | table state, query/deep-link state | no | Extends existing portfolio-triage return-state handling | +| Actual promotion execution surface | no | N/A | none | none | no | `N/A - explicitly deferred` | + +## Decision-First Surface Role *(mandatory when operator-facing surfaces are changed)* + +| Surface | Decision Role | Human-in-the-loop Moment | Immediately Visible for First Decision | On-Demand Detail / Evidence | Why This Is Primary or Why Not | Workflow Alignment | Attention-load Reduction | +|---|---|---|---|---|---|---|---| +| Canonical cross-tenant compare page | Primary Decision Surface | Operator decides whether the target tenant is ready for promotion planning or still blocked by scope and mapping gaps | source/target summary, ready/blocked/manual counts, top blockers, and next action | tenant drill-down, baseline compare drill-down, subject-level diagnostics | Primary because it is the first canonical workspace place where cross-tenant action becomes decidable | Moves from portfolio triage into compare and preflight without manual reconstruction | Replaces cross-page mental diffing with one bounded decision surface | +| Tenant registry / portfolio launch action | Secondary Context | Operator chooses when to leave the tenant registry for compare | current tenant context and preserved return state | compare details live on the compare page | Secondary because it launches the decision surface rather than hosting it | Keeps portfolio review flow intact | Reduces repeated tenant re-selection and filter loss | + +## Audience-Aware Disclosure *(mandatory when operator-facing surfaces are changed)* + +| Surface | Audience Modes In Scope | Decision-First Default-Visible Content | Operator Diagnostics | Support / Raw Evidence | One Dominant Next Action | Hidden / Gated By Default | Duplicate-Truth Prevention | +|---|---|---|---|---|---|---|---| +| Canonical cross-tenant compare page | operator-MSP | source/target summary, compare counts, preflight readiness summary, top blocked reasons | subject-level mapping gaps and deep links to tenant-specific evidence | raw payloads remain on existing tenant/baseline pages, not this surface | `Generate promotion preflight` | raw JSON, provider IDs, and low-level evidence stay behind existing detail pages | compare page states the decision truth once; drill-down pages add proof rather than rephrasing the same blocker | +| Tenant registry / portfolio launch action | operator-MSP | current tenant context and compare launch intent | return-state token only | none | `Compare tenants` | any future write action remains absent | launch action does not duplicate compare summaries on the registry row | + +## UI/UX Surface Classification *(mandatory when operator-facing surfaces are changed)* + +| Surface | Action Surface Class | Surface Type | Likely Next Operator Action | Primary Inspect/Open Model | Row Click | Secondary Actions Placement | Destructive Actions Placement | Canonical Collection Route | Canonical Detail Route | Scope Signals | Canonical Noun | Critical Truth Visible by Default | Exception Type / Justification | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| Canonical cross-tenant compare page | Utility / Workspace Decision | Draft apply analysis | Generate promotion preflight or open drill-down evidence | explicit selectors plus focused compare/preflight panels | forbidden | drill-down links and secondary navigation stay below the summary/preflight sections | none in v1 | new canonical `/admin` compare route | same page with shareable query state | workspace context plus source/target tenant chips | Cross-tenant compare | whether the target is ready, blocked, or needs manual mapping | none | +| Tenant registry / portfolio launch action | List / Table / Launch Context | Launch context support | Open compare with current tenant prefilled | explicit action from tenant list or triage context | preserved existing row behavior | compare entry is a safe secondary action | none | `/admin/tenants` | compare route | current workspace and tenant | Tenant registry | why the action launches compare, not promotion | existing tenant registry action hierarchy remains valid | + +## Operator Surface Contract *(mandatory when operator-facing surfaces are changed)* + +| Surface | Primary Persona | Decision / Operator Action Supported | Surface Type | Primary Operator Question | Default-visible Information | Diagnostics-only Information | Status Dimensions Used | Mutation Scope | Primary Actions | Dangerous Actions | +|---|---|---|---|---|---|---|---|---|---|---| +| Canonical cross-tenant compare page | Workspace operator / MSP operator | Decide whether a target tenant is ready for a later promotion workflow | Canonical decision page | Can this target tenant safely follow the selected source tenant for the chosen governed subjects? | source/target summary, compare counts, blocked reasons, ready/manual counts, and next action | subject-level mappings, stale evidence signals, and deep links to existing tenant compare/detail surfaces | compare state, readiness, mapping confidence, evidence freshness | TenantPilot only in v1 | Generate promotion preflight, open source tenant, open target tenant | none | +| Tenant registry / portfolio launch action | Workspace operator / MSP operator | Start compare from an existing portfolio review path | Registry action | Which tenant should I compare next without losing context? | current tenant identity and compare launch intent | preserved triage filters and return token | launch context only | none | Compare tenants | none | + +## Proportionality Review *(mandatory when structural complexity is introduced)* + +- **New source of truth?**: no +- **New persisted entity/table/artifact?**: no +- **New abstraction?**: yes - one narrow compare preview builder and one narrow promotion preflight service +- **New enum/state/reason family?**: no new persisted state family; readiness and blocked reasons remain derived from compare/preflight results +- **New cross-domain UI framework/taxonomy?**: no +- **Current operator problem**: operators can identify tenants that need attention but cannot reach a trustworthy cross-tenant decision without manual reconstruction. +- **Existing structure is insufficient because**: existing tenant-level baseline compare pages and portfolio triage state do not support dual-tenant scope or promotion-readiness reasoning. +- **Narrowest correct implementation**: derive compare preview and promotion preflight from existing inventory/baseline truth, keep the page canonical and read-only, and audit only the preflight entry points. +- **Ownership cost**: maintain one compare page, one preview builder, one preflight service, and a handful of focused tests. +- **Alternative intentionally rejected**: actual promotion execution and persisted draft plans were rejected because they would add write risk, queue semantics, and new truth before the compare/preflight workflow is proven. +- **Release truth**: current-release workflow gap, not future-release platform speculation + +### Compatibility posture + +This feature assumes a pre-production environment. + +Backward compatibility, legacy aliases, migration shims, historical fixtures, and compatibility-specific tests are out of scope unless explicitly required by this spec. + +Canonical replacement is preferred over preservation. + +## Testing / Lane / Runtime Impact *(mandatory for runtime behavior changes)* + +- **Test purpose / classification**: Unit, Feature +- **Validation lane(s)**: fast-feedback, confidence +- **Why this classification and these lanes are sufficient**: unit coverage proves preview matching and promotion-preflight classification without Filament overhead, while focused feature coverage proves page rendering, launch context, audit, and `404`/`403` semantics on the canonical compare surface. +- **New or expanded test families**: one focused `PortfolioCompare` feature family and one focused `Unit/Support/PortfolioCompare` family +- **Fixture / helper cost impact**: moderate; reuse existing tenant, workspace, inventory, baseline compare, and portfolio-triage fixtures instead of adding browser setup or queue scaffolding +- **Heavy-family visibility / justification**: none; do not widen this slice into browser or heavy-governance lanes by default +- **Special surface test profile**: standard-native-filament +- **Standard-native relief or required special coverage**: ordinary feature coverage is sufficient for the page and launch actions; a small unit test set must prove preflight classification and no-write semantics +- **Reviewer handoff**: reviewers must confirm that the slice stays read-only, reuses baseline compare and portfolio seams, preserves deny-as-not-found semantics for inaccessible tenants, and does not smuggle in actual promotion execution +- **Budget / baseline / trend impact**: low increase in unit + feature only +- **Escalation needed**: none +- **Active feature PR close-out entry**: Guardrail +- **Planned validation commands**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/PortfolioCompare/CrossTenantComparePreviewBuilderTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/PortfolioCompare/CrossTenantPromotionPreflightTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantComparePageTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantCompareAuthorizationTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantPromotionPreflightAuditTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/PortfolioCompare/CrossTenantCompareLaunchContextTest.php` + +## Scope Boundaries + +### In Scope + +- one canonical workspace-context compare page for source/target tenant selection +- read-only compare preview using stable governed-subject identity and existing compare strategy patterns +- one read-only promotion preflight action that classifies ready, blocked, and manual-mapping subjects +- workspace audit metadata for preflight entry points +- launch and return continuity from portfolio-triage/tenant-registry context +- deep links to existing tenant and baseline compare detail pages instead of duplicated proof surfaces + +### Non-Goals + +- actual promotion execution or target mutation +- queueing, retries, or `OperationRun` +- persisted compare snapshots or promotion draft tables +- automatic mapping writers for groups, scope tags, filters, named locations, or app references +- customer-facing review or compare surfaces +- cross-workspace compare +- multi-provider compare frameworks + +## Assumptions + +- existing inventory and baseline compare seams already provide enough stable subject identity to drive a first compare preview +- current portfolio-triage return-state patterns are sufficient for launch and back-navigation continuity +- a read-only preflight is valuable before any write path exists and can be audited without introducing a second persistence truth + +## Risks + +- some compare subjects may still need provider-specific mapping logic before they can produce a trustworthy readiness result +- target inventory freshness or missing evidence may block preflight more often than expected and needs explicit reasoning on the page +- a later implementation could try to add actual promotion execution inside this slice; that must be rejected as scope growth + +## Follow-up Candidates + +- Cross-tenant promotion execution with preview -> confirmation -> queued run -> verify +- Managed mapping workflows for named locations, assignments, groups, and filters +- Cross-tenant decision inbox integration after compare/preflight exists + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Compare two authorized tenants (Priority: P1) + +As a workspace operator, I want to compare one source tenant to one target tenant from a canonical workspace surface so I can see where governed subjects match, differ, or are missing without reconstructing the answer manually. + +**Why this priority**: This is the smallest valuable slice that turns portfolio visibility into a concrete operator decision surface. + +**Independent Test**: Open the compare page with two authorized tenants, choose governed-subject filters, and verify that the compare preview shows reproducible ready/different/missing results and drill-down links. + +**Acceptance Scenarios**: + +1. **Given** an operator has access to both selected tenants, **When** they open the compare page and run the preview, **Then** they see a structured compare summary grouped by governed-subject state rather than a raw payload diff. +2. **Given** the same source and target selection, **When** the operator reloads or shares the preview URL, **Then** the compare state is reproducible for the same scoped selection. +3. **Given** the operator selects the same tenant as both source and target, **When** they try to run the preview, **Then** the page rejects the selection as invalid and does not produce compare or preflight output. + +--- + +### User Story 2 - Generate a promotion preflight without writing (Priority: P1) + +As a workspace operator, I want a read-only promotion preflight that tells me what is ready, blocked, or needs manual mapping before any cross-tenant write path exists. + +**Why this priority**: Promotion language is not trustworthy until the product can explain why a target is or is not ready in a bounded, auditable way. + +**Independent Test**: From an authorized compare preview, trigger the preflight action and verify that the page shows readiness counts, blocked reasons, and manual-mapping requirements without mutating source or target tenants. + +**Acceptance Scenarios**: + +1. **Given** a compare preview contains subjects with stable identity and usable target conditions, **When** the operator generates a promotion preflight, **Then** those subjects appear as ready with a clear explanation. +2. **Given** some subjects are missing identifiers, stale, or blocked by target conditions, **When** the operator generates the preflight, **Then** those subjects appear as blocked or manual-mapping-required with explicit reasons. +3. **Given** the operator generates a preflight, **When** the action completes, **Then** no target mutation, queued run, or provider write occurs. +4. **Given** the operator can view compare but lacks `WORKSPACE_BASELINES_MANAGE`, **When** they reach the compare page, **Then** the preflight action is visibly disabled with permission guidance and any forced request is rejected server-side. + +--- + +### User Story 3 - Launch compare from portfolio context without losing return state (Priority: P2) + +As a workspace operator, I want to enter compare from the tenant registry or portfolio-triage context and return without losing my working filters so compare becomes part of the portfolio workflow instead of a detached utility. + +**Why this priority**: The workflow is much less useful if compare starts from scratch and breaks the operator's portfolio-review context. + +**Independent Test**: Launch compare from the tenant registry with active triage filters, verify one tenant is prefilled, and verify the return path restores the prior registry state. + +**Acceptance Scenarios**: + +1. **Given** the tenant registry has active portfolio-triage filters, **When** the operator launches compare from a tenant row or contextual action, **Then** the compare page preserves a return token and prefills the launched tenant as the `target tenant`. +2. **Given** the operator returns from compare, **When** the registry reloads, **Then** the prior triage filters are restored. + +### Edge Cases + +- source and target tenant are the same tenant: reject the selection as invalid input and do not compute preview or preflight +- source and target tenants belong to different workspaces +- one selected tenant is no longer visible or never belonged to the actor's scope +- compare subjects have ambiguous identity or duplicate matches +- target evidence is stale or missing, making readiness impossible to prove + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR1**: The feature MUST provide one canonical workspace-context compare surface for selecting source and target tenants. +- **FR2**: The feature MUST enforce workspace membership and source/target tenant entitlement before loading compare data; inaccessible tenants resolve as `404`. +- **FR3**: The compare preview MUST use stable governed-subject identity and existing inventory/baseline compare seams rather than raw JSON diffing. +- **FR4**: The compare preview MUST stay read-only and MUST deep-link to existing tenant or baseline detail surfaces for proof instead of duplicating raw diagnostics locally. +- **FR5**: The feature MUST provide a read-only promotion preflight action that classifies subjects as ready, blocked, or manual-mapping-required. +- **FR6**: The preflight MUST NOT execute a target write, queue a run, or persist a promotion draft artifact. +- **FR7**: The preflight MUST explain blocked and manual states with explicit operator-readable reasons. +- **FR8**: The feature MUST reuse existing capability registries with this exact split: page access = `WORKSPACE_BASELINES_VIEW`, preview data = `TENANT_VIEW` on both tenants, preflight execution = `WORKSPACE_BASELINES_MANAGE`. +- **FR9**: The feature MUST preserve launch and return continuity from the tenant registry / portfolio-triage path. +- **FR10**: The feature MUST record bounded workspace audit metadata for promotion-preflight entry points only. +- **FR11**: The compare page MUST reject same-tenant selection before preview or preflight runs. + +### Non-Functional Requirements + +- **NFR1**: The feature MUST preserve workspace and tenant isolation and MUST NOT leak source or target hints to unauthorized actors. +- **NFR2**: The compare page MUST remain operator-first, decision-first, and must not expose raw payloads by default. +- **NFR3**: The implementation MUST remain Filament-native on Livewire v4 and must not introduce a second compare shell or custom status framework. +- **NFR4**: The slice MUST not introduce new assets or new globally searchable resources. ## Success Criteria -- SC1: Operators can identify which tenant differs for a given policy type in under 2 minutes. -- SC2: Read-only comparisons are reproducible when run again with the same scope. - -## Out of Scope - -- Bulk remediation without preview/confirmation. +- **SC1**: An authorized operator can produce a cross-tenant compare preview from one canonical page without switching across multiple tenant detail surfaces. +- **SC2**: The same source, target, and filter selection produces reproducible compare output. +- **SC3**: A promotion preflight clearly separates ready, blocked, and manual subjects without performing any write. +- **SC4**: Unauthorized source/target combinations remain deny-as-not-found. +- **SC5**: View-only members can inspect compare results but cannot execute preflight, and the UI makes that boundary explicit. ## Related Specs - Program: `specs/039-inventory-program/spec.md` - Core: `specs/040-inventory-core/spec.md` +- UI: `specs/041-inventory-ui/spec.md` - Drift: `specs/044-drift-mvp/spec.md` +- Foundation follow-up context: `docs/product/spec-candidates.md` (`Cross-Tenant Compare and Promotion v1`) diff --git a/specs/043-cross-tenant-compare-and-promotion/tasks.md b/specs/043-cross-tenant-compare-and-promotion/tasks.md index bed50126..f3176f9d 100644 --- a/specs/043-cross-tenant-compare-and-promotion/tasks.md +++ b/specs/043-cross-tenant-compare-and-promotion/tasks.md @@ -1,7 +1,190 @@ -# Tasks: Cross-tenant Compare and Promotion +--- -- [ ] T001 Define authorized tenant selection rules -- [ ] T002 Read-only compare UI and diff rules -- [ ] T003 Export capability for comparison results -- [ ] T004 If enabled: promotion workflow with preview + confirm + audit -- [ ] T005 Tests: tenant isolation, authorization, reproducibility +description: "Task list for Cross-Tenant Compare Preview and Promotion Preflight" + +--- + +# Tasks: Cross-Tenant Compare Preview and Promotion Preflight + +**Input**: Design documents from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/043-cross-tenant-compare-and-promotion/` +**Prerequisites**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/043-cross-tenant-compare-and-promotion/plan.md` (required), `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/043-cross-tenant-compare-and-promotion/spec.md` (required) + +**Tests**: REQUIRED (Pest) for runtime behavior changes. Keep proof in narrow `Unit` plus `Feature` lanes only; do not add browser or heavy-governance coverage by default for this first read-only slice. +**Operations**: No new `OperationRun`, queue, retry, monitoring page, or execution ledger is introduced. Promotion remains preflight-only. +**RBAC**: Existing workspace and tenant membership semantics remain authoritative. Non-members or actors lacking source or target tenant entitlement receive `404`; members who reach the canonical compare surface but lack the required capability receive `403`. Page access uses `Capabilities::WORKSPACE_BASELINES_VIEW`, preview data uses `Capabilities::TENANT_VIEW` on both tenants, and preflight execution adds `Capabilities::WORKSPACE_BASELINES_MANAGE`. +**Provider Boundary**: The page contract stays platform-neutral (`source tenant`, `target tenant`, `governed subject`, `promotion preflight`) while reusing Microsoft-first inventory and baseline compare seams under the hood. +**Organization**: Tasks are grouped by user story so compare preview, promotion preflight, and portfolio launch continuity remain independently testable once the shared contracts exist. + +## Test Governance Checklist + +- [ ] Lane assignment stays `Unit` plus `Feature` and remains the narrowest sufficient proof for the changed behavior. +- [ ] New or changed tests stay in `apps/platform/tests/Unit/Support/PortfolioCompare/` and `apps/platform/tests/Feature/PortfolioCompare/` only. +- [ ] Shared helpers, fixtures, and context defaults stay cheap by default; do not add browser setup, queue scaffolding, or seeded promotion history. +- [ ] Planned validation commands cover compare preview, promotion preflight, launch continuity, audit, and authorization without widening scope. +- [ ] The declared surface test profile remains `standard-native-filament` because the slice adds one canonical page and one launch action on existing surfaces. +- [ ] Any deferred execution, mapping automation, or multi-provider follow-up resolves as `document-in-feature` or `follow-up-spec`, not hidden scope growth. + +## Phase 1: Setup (Shared Context) + +**Purpose**: Confirm the narrowed slice, the reusable compare seams, and the reviewer stop conditions before implementation begins. + +- [ ] T001 Review the narrowed compare-preview and promotion-preflight slice in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/043-cross-tenant-compare-and-promotion/spec.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/043-cross-tenant-compare-and-promotion/plan.md` together with the current candidate and ledger references. +- [ ] T002 [P] Confirm the compare and subject-identity seams that this slice must reuse in `apps/platform/app/Filament/Pages/BaselineCompareLanding.php`, `apps/platform/app/Filament/Pages/BaselineCompareMatrix.php`, `apps/platform/app/Services/Baselines/BaselineCompareService.php`, `apps/platform/app/Support/Baselines/BaselineCompareMatrixBuilder.php`, `apps/platform/app/Support/Baselines/Compare/CompareStrategyRegistry.php`, `apps/platform/app/Models/InventoryItem.php`, and `apps/platform/app/Models/PolicyVersion.php`. +- [ ] T003 [P] Confirm the portfolio launch, authorization, and audit seams that this slice must reuse in `apps/platform/app/Filament/Resources/TenantResource.php`, `apps/platform/app/Filament/Resources/TenantResource/Pages/ListTenants.php`, `apps/platform/app/Services/PortfolioTriage/TenantTriageReviewService.php`, `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php`, `apps/platform/app/Support/Audit/AuditActionId.php`, `apps/platform/app/Services/Auth/CapabilityResolver.php`, and `apps/platform/app/Services/Auth/WorkspaceCapabilityResolver.php`. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Add the shared compare scope and promotion-preflight primitives that every user story depends on. + +**Critical**: No user-story work should begin until this phase is complete. + +- [ ] T004 [P] Define the minimal compare-page input/output shape inside `apps/platform/app/Support/PortfolioCompare/` or `apps/platform/app/Services/PortfolioCompare/` for source tenant, target tenant, governed-subject filters, and preflight output without adding a wider DTO or resolver framework. +- [ ] T005 [P] Implement source-plus-target entitlement checks inside the canonical compare page and shared preview/preflight services using the existing capability resolvers so workspace membership, source entitlement, target entitlement, and capability denial all follow existing `404`/`403` semantics. +- [ ] T006 Implement the compare preview builder so it reuses stable governed-subject identity from existing inventory and baseline compare seams and produces a canonical preview summary without storing new compare truth. +- [ ] T007 Implement the promotion-preflight service so it classifies governed subjects as ready, blocked, or manual-mapping-required and explicitly performs no target mutation, queue dispatch, or `OperationRun` creation. +- [ ] T008 [P] Add bounded preflight audit action IDs and metadata shaping in `apps/platform/app/Support/Audit/AuditActionId.php` and `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php` for promotion-preflight entry points only. + +**Checkpoint**: Shared compare scope, entitlement resolution, preview building, preflight classification, and audit metadata exist; user stories can proceed independently. + +--- + +## Phase 3: User Story 1 - Compare Two Authorized Tenants (Priority: P1) MVP + +**Goal**: Give an authorized workspace operator one canonical compare page that shows a reproducible source-vs-target preview without cross-page reconstruction. + +**Independent Test**: Open the compare page with two authorized tenants, apply governed-subject filters, and verify that the preview shows match/difference/missing states plus drill-down links. + +### Tests for User Story 1 + +- [ ] T009 [P] [US1] Add feature coverage for rendering the canonical compare page, selecting source and target tenants, rejecting same-tenant selection, and showing one default-visible compare summary with no duplicate decision truth or raw/support evidence in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantComparePageTest.php`. +- [ ] T010 [P] [US1] Add feature coverage for `404` vs `403` semantics across source/target entitlement, workspace `WORKSPACE_BASELINES_VIEW`, tenant `TENANT_VIEW`, visible-disabled preflight UX for members lacking `WORKSPACE_BASELINES_MANAGE`, and server-side denial of forced preflight requests in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantCompareAuthorizationTest.php`. +- [ ] T011 [P] [US1] Add unit coverage for compare preview subject matching and reproducible summary output in `apps/platform/tests/Unit/Support/PortfolioCompare/CrossTenantComparePreviewBuilderTest.php`. + +### Implementation for User Story 1 + +- [ ] T012 [US1] Add the canonical compare page under `apps/platform/app/Filament/Pages/` with source/target selectors, governed-subject filters, shareable query state, and compare preview summary built from the shared preview builder. +- [ ] T013 [US1] Reuse existing baseline compare and inventory seams so the compare page deep-links to tenant-level proof surfaces instead of duplicating raw diagnostics. +- [ ] T014 [US1] Keep page copy, chips, and summary wording aligned to `source tenant`, `target tenant`, `governed subject`, and `compare preview` rather than Microsoft-first or execution-first vocabulary. + +**Checkpoint**: User Story 1 is independently functional when the canonical page produces a reproducible compare preview for two authorized tenants. + +--- + +## Phase 4: User Story 2 - Generate a Read-Only Promotion Preflight (Priority: P1) + +**Goal**: Let the operator ask whether the chosen target is ready for a later promotion workflow without performing any write. + +**Independent Test**: From an authorized compare preview, trigger the preflight action and verify that the page shows ready, blocked, and manual-mapping-required groups without mutating target data. + +### Tests for User Story 2 + +- [ ] T015 [P] [US2] Add unit coverage for preflight classification across ready, blocked, manual-mapping, stale-evidence, and missing-identifier cases in `apps/platform/tests/Unit/Support/PortfolioCompare/CrossTenantPromotionPreflightTest.php`. +- [ ] T016 [P] [US2] Add feature coverage for the compare page's `Generate promotion preflight` action, visible-disabled manage-denial UX, one dominant next action, visible readiness summary, and no default-visible raw/support evidence in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantComparePageTest.php`. +- [ ] T017 [P] [US2] Add feature coverage for preflight audit metadata and explicit no-write semantics in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantPromotionPreflightAuditTest.php`. + +### Implementation for User Story 2 + +- [ ] T018 [US2] Add the read-only `Generate promotion preflight` action to the canonical compare page, keeping it distinct from any future execution action and free of queue/runtime side effects. +- [ ] T019 [US2] Render a promotion-preflight summary that groups governed subjects into ready, blocked, and manual-mapping-required buckets with explicit operator-readable reasons. +- [ ] T020 [US2] Route preflight entry-point audit through the existing workspace audit pipeline with source tenant, target tenant, subject counts, and blocked-reason metadata only. + +**Checkpoint**: User Story 2 is independently functional when the operator can generate an audited, read-only readiness decision from the compare page. + +--- + +## Phase 5: User Story 3 - Launch Compare from Portfolio Context Without Losing State (Priority: P2) + +**Goal**: Make compare part of the portfolio workflow by preserving the launch tenant and return state from the tenant registry / portfolio-triage path. + +**Independent Test**: Launch compare from the tenant registry with active triage filters, verify the launched tenant is prefilled, and verify the return path restores the prior registry state. + +### Tests for User Story 3 + +- [ ] T021 [P] [US3] Add feature coverage for compare launch and return continuity from the tenant registry / portfolio-triage path in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantCompareLaunchContextTest.php`. +- [ ] T022 [P] [US3] Extend authorization coverage so launch actions only appear or resolve when the current actor is entitled to the launched tenant and the compare surface in `apps/platform/tests/Feature/PortfolioCompare/CrossTenantCompareAuthorizationTest.php`. + +### Implementation for User Story 3 + +- [ ] T023 [US3] Add a bounded launch action from `apps/platform/app/Filament/Resources/TenantResource.php` or `apps/platform/app/Filament/Resources/TenantResource/Pages/ListTenants.php` that opens the canonical compare page with the current tenant prefilled as the `target tenant`. +- [ ] T024 [US3] Preserve and restore portfolio-triage return state using the existing navigation-context pattern rather than a page-local custom token format. + +**Checkpoint**: User Story 3 is independently functional when compare can be launched from portfolio context and the operator can return without losing triage filters. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Finish narrow validation and reviewer close-out without widening scope. + +- [ ] T025 [P] Run the focused unit validation commands for `apps/platform/tests/Unit/Support/PortfolioCompare/CrossTenantComparePreviewBuilderTest.php` and `apps/platform/tests/Unit/Support/PortfolioCompare/CrossTenantPromotionPreflightTest.php`. +- [ ] T026 [P] Run the focused feature validation commands for `apps/platform/tests/Feature/PortfolioCompare/CrossTenantComparePageTest.php`, `apps/platform/tests/Feature/PortfolioCompare/CrossTenantCompareAuthorizationTest.php`, `apps/platform/tests/Feature/PortfolioCompare/CrossTenantPromotionPreflightAuditTest.php`, and `apps/platform/tests/Feature/PortfolioCompare/CrossTenantCompareLaunchContextTest.php`. +- [ ] T027 Run dirty-only formatting for touched platform files with `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent`. +- [ ] T028 [P] Add or update the checklist/reviewer guard confirming that this slice introduces no new asset registration and no globally searchable resource. +- [ ] T029 Record TEST-GOV-001 close-out and any `document-in-feature` or `follow-up-spec` deferrals for actual execution, mapping automation, or multi-provider compare in the active feature PR or implementation notes. + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: no dependencies; start immediately. +- **Phase 2 (Foundational)**: depends on Phase 1 and blocks all user stories. +- **Phase 3 (US1)**: depends on Phase 2 and establishes the canonical compare truth. +- **Phase 4 (US2)**: depends on Phase 2 and should ship with US1 because compare without readiness reasoning leaves promotion language vague. +- **Phase 5 (US3)**: depends on Phase 2 and is safest after US1 because the canonical compare page must exist before launch continuity can target it. +- **Phase 6 (Polish)**: depends on all desired user stories being complete. + +### User Story Dependencies + +- **US1 (P1)**: independently testable after Phase 2 and forms the MVP decision surface. +- **US2 (P1)**: independently testable after Phase 2 and should ship with US1 for a complete P1 slice. +- **US3 (P2)**: independently testable after Phase 2 and improves portfolio workflow continuity once the canonical page exists. + +### Within Each User Story + +- Write the listed Pest coverage first and make it fail for the intended behavior gap. +- Settle the shared preview/preflight service contract before adding or widening page wiring. +- Re-run the narrowest affected validation command after each story checkpoint before moving to the next story. + +--- + +## Parallel Execution Examples + +### User Story 1 + +- T009, T010, and T011 can run in parallel before runtime edits begin. +- After the preview contract settles, T012 and T013 can proceed in parallel because page wiring and compare-service reuse touch different seams; T014 should follow both. + +### User Story 2 + +- T015, T016, and T017 can run in parallel because they cover separate unit, page, and audit concerns. +- After T018 settles the action shape, T019 and T020 can proceed in parallel because UI rendering and audit metadata touch different seams. + +### User Story 3 + +- T021 and T022 can run in parallel before implementation starts. +- T023 should land before T024 so return-state handling can target the final launch route. + +--- + +## Implementation Strategy + +### Suggested MVP Scope + +- MVP = **US1 + US2 together**. The feature is only product-complete when the operator can compare two tenants and immediately ask whether that comparison is promotion-ready. + +### Incremental Delivery + +1. Complete Phase 1 and Phase 2. +2. Deliver US1 and US2 together. +3. Add US3 launch and return continuity. +4. Finish with narrow validation and formatting in Phase 6. + +### Team Strategy + +1. Finish the preview/preflight contracts together before splitting page work. +2. Parallelize unit and feature test authoring inside each story first. +3. Serialize merges around the canonical compare page and shared `PortfolioCompare` service namespace so the workflow language stays coherent. diff --git a/specs/248-private-ai-policy-foundation/checklists/requirements.md b/specs/248-private-ai-policy-foundation/checklists/requirements.md new file mode 100644 index 00000000..6e0f025f --- /dev/null +++ b/specs/248-private-ai-policy-foundation/checklists/requirements.md @@ -0,0 +1,57 @@ +# Specification Quality Checklist: Private AI Execution & Policy Foundation + +**Purpose**: Validate full preparation-package completeness and implementation readiness before the feature moves into the implementation loop +**Created**: 2026-04-27 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] Business value and operator outcomes stay explicit +- [x] The first slice is bounded to one governed decision boundary, two approved internal-only use cases, one workspace AI policy section, and one reused operational control +- [x] Runtime-governance sections are present for an implementation-ready package, not treated as docs-only +- [x] All mandatory sections are completed + +## Requirement Completeness + +- [x] No `[NEEDS CLARIFICATION]` markers remain +- [x] Requirements are testable and unambiguous +- [x] Acceptance scenarios are defined for workspace policy, governed allow-or-block decisions, and central pause/resume handling +- [x] Edge cases are identified, including missing workspace context, unregistered use cases, blocked data classes, and active `ai.execution` control +- [x] Scope is clearly bounded away from customer-facing AI, external public-provider execution, queue or `OperationRun` work, and prompt or result persistence +- [x] Dependencies, assumptions, risks, and follow-up candidates are identified + +## Feature Readiness + +- [x] The first slice is small enough for a bounded implementation loop +- [x] Concrete repo surfaces are named for workspace settings, system ops controls, audit reuse, and the new in-process AI support namespace +- [x] Foundational work stays preparation-only and does not imply model runtime, customer UI, or a new AI table or result store +- [x] The tasks are ordered, testable, and grouped by user story +- [x] No unresolved product question blocks `/speckit.implement` once artifact analysis passes + +## Governance Readiness + +- [x] Workspace-owned AI policy truth is explicitly kept in existing settings persistence with no new AI table or result ledger +- [x] The approved-use-case catalog remains locked to two internal-only consumers and keeps provider vocabulary vendor-neutral +- [x] The package explicitly forbids customer-facing AI, external public-provider execution, and queue or `OperationRun` semantics in v1 +- [x] Existing workspace and platform authorization paths remain authoritative, with confirmation-protected `Pause AI execution` and `Resume AI execution` as the only destructive-like mutations in scope +- [x] Livewire v4 and Filament v5 compliance, unchanged provider registration in `bootstrap/providers.php`, no new global-search resource, and no asset-strategy changes are explicit in the package + +## Test Governance Review + +- [x] Lane fit stays in focused unit plus feature validation with one architecture guard only +- [x] Fixture and helper growth stays local to AI support, workspace settings, operational controls, and guard coverage +- [x] No browser, heavy-governance, queue, or provider-emulator family is introduced implicitly +- [x] Minimal validation commands are explicit in the plan and quickstart +- [x] The active feature PR close-out entry remains `Guardrail` + +## Review Outcome + +- [x] Review outcome class: `keep` +- [x] Workflow outcome: `keep` +- [x] Next command readiness: `/speckit.implement` after artifact analysis is clear + +## Notes + +- This checklist validates the preparation package only: `spec.md`, `plan.md`, supporting artifacts, and `tasks.md`. It does not claim that application code or an AI execution runtime already exists. +- The active slice stops before customer-facing AI, external-public provider execution, queue or `OperationRun` orchestration, prompt or result persistence, and any broader provider marketplace or budgeting work. +- Provider registration remains unchanged in `bootstrap/providers.php`, no new global-search resource is introduced, and no new asset strategy is needed for this package. \ No newline at end of file diff --git a/specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml b/specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml new file mode 100644 index 00000000..80797c7c --- /dev/null +++ b/specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml @@ -0,0 +1,277 @@ +openapi: 3.0.3 +info: + title: TenantPilot AI Governance Foundation (Conceptual) + version: 0.1.0 + description: | + Conceptual contract for the existing workspace settings page, the existing + system operational-controls page, and the in-process governed AI decision + schema planned by Spec 248. + + NOTE: The settings and controls actions are implemented as existing Filament + (Livewire) pages/actions. No new customer-facing AI route or external + provider execution endpoint is introduced in v1. +servers: + - url: / +paths: + /admin/settings/workspace: + get: + summary: View workspace settings page + description: | + Existing singleton workspace settings route. + The AI policy section is planned to render on this page without adding a + second AI admin surface. + responses: + '200': + description: Workspace settings page rendered + content: + text/html: + schema: + type: string + '404': + description: Not found (wrong workspace or non-member) + '403': + description: Forbidden (member without view capability) + + /admin/settings/workspace/ai-policy: + post: + summary: Save workspace AI policy + description: | + Logical action on the existing Filament workspace settings page. + Non-members or wrong-workspace actors receive 404 semantics before any + policy detail is revealed. Members without + `workspace_settings.manage` receive 403 on mutation. + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [policy_mode] + properties: + policy_mode: + $ref: '#/components/schemas/WorkspaceAiPolicyMode' + responses: + '204': + description: Policy saved + '403': + description: Forbidden (member lacks manage capability) + '404': + description: Not found (wrong workspace or non-member) + + /system/ops/controls: + get: + summary: View system operational controls page + description: | + Existing system control-center route. The AI execution control is added + here rather than on a new AI console. Wrong-plane or non-platform + actors keep deny-as-not-found semantics before any system control detail + is revealed. + responses: + '200': + description: Controls page rendered + content: + text/html: + schema: + type: string + '404': + description: Not found (wrong plane or non-platform actor) + '403': + description: Forbidden (platform actor lacks required system capability) + + /system/ops/controls/ai.execution/pause: + post: + summary: Pause AI execution globally + description: | + Logical control action on the existing system controls page. + Wrong-plane or non-platform actors receive 404 semantics before any + control detail is revealed. + Must require confirmation in the UI and enforce + `platform.access_system_panel` plus `platform.ops.controls.manage` + server-side. Spec 248 keeps `ai.execution` global-only in v1. + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [reason_text] + properties: + reason_text: + type: string + expires_at: + type: string + format: date-time + nullable: true + responses: + '204': + description: Control activated + '404': + description: Not found (wrong plane or non-platform actor) + '403': + description: Forbidden (platform actor lacks required control capability) + + /system/ops/controls/ai.execution/resume: + post: + summary: Resume AI execution globally + description: | + Logical control action on the existing system controls page. + Wrong-plane or non-platform actors receive 404 semantics before any + control detail is revealed. + Removes an active `ai.execution` pause using the existing control-center + confirmation and audit flow. Spec 248 keeps `ai.execution` + global-only in v1. + responses: + '204': + description: Control resumed + '404': + description: Not found (wrong plane or non-platform actor) + '403': + description: Forbidden (platform actor lacks required control capability) + +components: + schemas: + WorkspaceAiPolicyMode: + type: string + enum: [disabled, private_only] + + ProviderClass: + type: string + enum: [local_private, external_public] + + AiDataClassification: + type: string + enum: + - product_knowledge + - operational_metadata + - redacted_support_summary + - personal_data + - customer_confidential + - raw_provider_payload + + ApprovedAiUseCaseKey: + type: string + enum: + - product_knowledge.answer_draft + - support_diagnostics.summary_draft + + GovernedAiExecutionRequest: + type: object + description: | + In-process service contract, not a public HTTP endpoint in v1. + This is the preflight envelope evaluated before any provider resolution + or model execution is attempted. The host surface must already have + resolved authorization and scope entitlement before this request is + constructed. + required: + - workspace_id + - actor_type + - actor_id + - use_case_key + - requested_provider_class + - data_classifications + - source_family + properties: + workspace_id: + type: integer + tenant_id: + type: integer + nullable: true + actor_type: + type: string + actor_id: + type: integer + use_case_key: + $ref: '#/components/schemas/ApprovedAiUseCaseKey' + requested_provider_class: + $ref: '#/components/schemas/ProviderClass' + data_classifications: + type: array + items: + $ref: '#/components/schemas/AiDataClassification' + source_family: + type: string + caller_surface: + type: string + nullable: true + context_fingerprint: + type: string + nullable: true + + GovernedAiExecutionDecision: + type: object + required: + - outcome + - reason_code + - workspace_ai_policy_mode + - use_case_key + - requested_provider_class + - data_classifications + - source_family + properties: + outcome: + type: string + enum: [allowed, blocked] + reason_code: + type: string + workspace_ai_policy_mode: + $ref: '#/components/schemas/WorkspaceAiPolicyMode' + matched_operational_control_scope: + type: string + enum: [global] + nullable: true + use_case_key: + $ref: '#/components/schemas/ApprovedAiUseCaseKey' + requested_provider_class: + $ref: '#/components/schemas/ProviderClass' + data_classifications: + type: array + items: + $ref: '#/components/schemas/AiDataClassification' + source_family: + type: string + audit_action: + type: string + audit_metadata: + $ref: '#/components/schemas/AiDecisionAuditMetadata' + + AiDecisionAuditMetadata: + type: object + required: + - use_case_key + - decision_outcome + - decision_reason + - workspace_ai_policy_mode + - requested_provider_class + - data_classifications + - source_family + - workspace_id + properties: + use_case_key: + $ref: '#/components/schemas/ApprovedAiUseCaseKey' + decision_outcome: + type: string + enum: [allowed, blocked] + decision_reason: + type: string + workspace_ai_policy_mode: + $ref: '#/components/schemas/WorkspaceAiPolicyMode' + requested_provider_class: + $ref: '#/components/schemas/ProviderClass' + data_classifications: + type: array + items: + $ref: '#/components/schemas/AiDataClassification' + source_family: + type: string + workspace_id: + type: integer + tenant_id: + type: integer + nullable: true + context_fingerprint: + type: string + nullable: true + matched_operational_control_scope: + type: string + enum: [global] + nullable: true \ No newline at end of file diff --git a/specs/248-private-ai-policy-foundation/data-model.md b/specs/248-private-ai-policy-foundation/data-model.md new file mode 100644 index 00000000..a304eb72 --- /dev/null +++ b/specs/248-private-ai-policy-foundation/data-model.md @@ -0,0 +1,209 @@ +# Data Model — Private AI Execution & Policy Foundation + +**Spec**: [spec.md](spec.md) + +No new persistent tables or AI artifact stores are required for v1. The feature reuses existing workspace settings, operational controls, and audit logs. New AI-specific structures are code-owned or request-scoped. + +## Persisted Truth Reused + +### Workspace AI Policy (`workspace_settings` carrier) + +**Purpose**: Workspace-owned policy truth that determines whether AI is disabled entirely or limited to approved private-only use cases. + +**Persisted carrier**: existing `workspace_settings` row via `WorkspaceSetting` + +**Planned definition**: +- `domain`: `ai` +- `key`: `policy_mode` +- `type`: `string` +- `system_default`: `disabled` +- `allowed values`: `disabled`, `private_only` +- `scope`: workspace only; no tenant override in v1 + +**Validation rules**: +- required +- string +- `in:disabled,private_only` + +**Authorization**: +- view: existing `workspace_settings.view` +- mutation: existing `workspace_settings.manage` + +**Audit strategy**: +- reuse `workspace_setting.updated` and `workspace_setting.reset` +- include AI-specific metadata in the existing workspace-settings audit context + +**State transitions**: +- `disabled` -> `private_only` +- `private_only` -> `disabled` + +### AI Execution Control (`operational_control_activations` carrier) + +**Purpose**: Platform-owned runtime stop for new AI execution attempts. + +**Persisted carrier**: existing `OperationalControlActivation` + +**Planned definition**: +- `control_key`: `ai.execution` +- `label`: `AI execution` +- `supported_scopes`: `global` +- `affected_surfaces`: governed AI decision callers only + +**Behavior**: +- a matching active control blocks new AI execution decisions before provider resolution +- global pause is the required v1 incident path +- workspace-specific pause or tenant-specific pause is out of scope for v1 and remains a follow-up concern if future incident handling genuinely requires it + +**State transitions**: +- `enabled` -> `paused` +- `paused` -> `enabled` + +### AI Decision Audit (`audit_logs` carrier) + +**Purpose**: Stable record of governed AI allow/block evaluations without storing raw prompt or output content. + +**Persisted carrier**: existing `audit_logs` rows through `WorkspaceAuditLogger` / `AuditRecorder` + +**Planned action strategy**: +- reuse existing workspace-setting actions for policy mutation +- add one bounded AI decision action ID, e.g. `ai_execution.decision_evaluated`, for governed decision evaluations + +**Planned metadata**: +- `use_case_key` +- `decision_outcome` (`allowed` or `blocked`) +- `decision_reason` +- `workspace_ai_policy_mode` +- `requested_provider_class` +- `data_classifications` +- `source_family` +- `workspace_id` +- optional `tenant_id` +- optional `context_fingerprint` +- optional `matched_operational_control_scope` + +**Explicit exclusions**: +- raw prompt text +- raw source payloads +- raw provider payloads +- full model output text + +## Code-Owned Truth + +### Approved AI Use Case Definition + +**Purpose**: Code-owned allowlist entry that defines one approved AI purpose and its trust constraints. + +**Fields**: +- `key` +- `future_consumer` +- `visibility` +- `allowed_provider_classes` +- `allowed_data_classifications` +- `source_family` +- `tenant_context_permitted` + +**v1 catalog is locked to exactly two entries**: + +| Key | Future Consumer | Visibility | Allowed Provider Classes | Allowed Data Classifications | Source Family | Tenant Context Permitted | +|---|---|---|---|---|---|---| +| `product_knowledge.answer_draft` | `ContextualHelpResolver` and related code-owned knowledge sources | `internal_only_draft` | `local_private` | `product_knowledge`, `operational_metadata` | `product_knowledge` | no | +| `support_diagnostics.summary_draft` | redacted summary derived from `SupportDiagnosticBundleBuilder` | `internal_only_draft` | `local_private` | `redacted_support_summary` | `support_diagnostics` | yes | + +**Validation rules**: +- key must be registered in the catalog +- no third use case may appear in v1 without a spec update +- `external_public` is never allowed for these entries in v1 + +### Provider Class + +**Purpose**: Vendor-neutral trust boundary for AI routing decisions. + +**Allowed values**: +- `local_private` +- `external_public` + +**Behavioral consequence**: +- `external_public` is always blocked in v1 +- `local_private` may be allowed only when the use case and data classifications permit it + +### AI Data Classification + +**Purpose**: Declarative label that determines whether a data family may cross the governed AI boundary. + +**Values**: +- `product_knowledge` +- `operational_metadata` +- `redacted_support_summary` +- `personal_data` +- `customer_confidential` +- `raw_provider_payload` + +**Behavioral consequence**: +- `personal_data`, `customer_confidential`, and `raw_provider_payload` are always blocked in v1 +- allowed classifications vary by use case + +## Request-Scoped Contracts + +### AI Execution Request + +**Purpose**: In-process request envelope passed to the governed decision boundary before any provider resolution or model execution is attempted. + +**Fields**: +- `workspace_id` +- optional `tenant_id` +- `actor_type` +- `actor_id` +- `use_case_key` +- `requested_provider_class` +- `data_classifications` (list) +- `source_family` +- optional `caller_surface` +- optional `context_fingerprint` + +**Validation rules**: +- `workspace_id` is required +- `use_case_key` must be registered +- `requested_provider_class` must be declared by the registered use case +- every declared data classification must be allowed for the use case +- host-surface authorization must already be resolved before evaluation + +**Important v1 boundary**: +- the request is a preflight contract and does not need to carry raw prompt or payload text in v1 +- future runtime/provider work can extend around this envelope later, but not inside this spec + +### AI Execution Decision + +**Purpose**: Terminal allow/block result returned by the governed boundary. + +**Fields**: +- `outcome` (`allowed` or `blocked`) +- `reason_code` +- `workspace_ai_policy_mode` +- `matched_operational_control_scope` (nullable) +- `use_case_key` +- `requested_provider_class` +- `data_classifications` +- `source_family` +- `audit_action` +- `audit_metadata` + +**Behavioral consequence**: +- `blocked`: provider resolution must not occur +- `allowed`: returns an approved handoff envelope only; v1 still does not execute a provider call or create a persisted result + +## State Transitions Summary + +### Workspace AI Policy + +- `disabled` <-> `private_only` + +### Operational Control + +- `enabled` <-> `paused` + +### AI Execution Decision + +- `evaluating` -> `allowed` +- `evaluating` -> `blocked` + +There is no queued, running, retrying, completed, or persisted-result lifecycle in v1. \ No newline at end of file diff --git a/specs/248-private-ai-policy-foundation/plan.md b/specs/248-private-ai-policy-foundation/plan.md new file mode 100644 index 00000000..bbfa3df3 --- /dev/null +++ b/specs/248-private-ai-policy-foundation/plan.md @@ -0,0 +1,282 @@ +# Implementation Plan: Private AI Execution & Policy Foundation + +**Branch**: `248-private-ai-policy-foundation` | **Date**: 2026-04-27 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from [spec.md](spec.md) + +**Note**: This template is filled in by the `/speckit.plan` command. See `.specify/scripts/` for helper scripts. + +## Summary + +Introduce a narrow AI governance foundation inside the existing Laravel monolith by reusing the workspace settings page for workspace-owned AI posture, reusing the system operational-controls page for a global `ai.execution` stop, and adding one in-process governed AI decision boundary plus a code-owned allowlist for exactly two internal-only use cases. Host-surface authorization remains a precondition; the AI boundary begins only after caller-side entitlement has already succeeded. The first slice is a preflight allow/block contract with audit-ready metadata, not a customer-facing AI workflow and not a model-provider runtime. + +Filament v5 remains on Livewire v4, no panel-provider registration changes are needed (`bootstrap/providers.php` remains the authoritative registration location), no new globally searchable AI resource is introduced, and no new panel-only asset bundle is expected for v1. + +## Technical Context + +**Language/Version**: PHP 8.4, Laravel 12 +**Primary Dependencies**: Filament v5, Livewire v4, Pest v4, existing Settings/Audit/OperationalControls support services +**Storage**: PostgreSQL via existing `workspace_settings`, `operational_control_activations`, and `audit_logs` persistence; no new AI tables +**Testing**: Pest v4 (PHPUnit 12 runner), narrow unit + feature + architecture-guard coverage +**Validation Lanes**: fast-feedback, confidence +**Target Platform**: Laravel monolith in `apps/platform` running via Sail; admin `/admin` and platform `/system` panels +**Project Type**: Web application (Laravel monolith with Filament panels) +**Performance Goals**: decision evaluation remains synchronous and DB-only in v1; no outbound provider call or queue handoff is required to compute allow/block +**Constraints**: no direct external provider calls with tenant data; no `OperationRun`; no result or prompt persistence; reuse existing workspace settings and ops controls; keep `/admin` and `/system` auth planes separate; no new asset bundle or second AI admin surface +**Scale/Scope**: 2 approved use cases, 2 policy modes, 2 provider classes, 6 data classifications, 2 existing operator surfaces, 1 new governed in-process decision seam + +## UI / Surface Guardrail Plan + +> **Fill for operator-facing or guardrail-relevant workflow changes. Docs-only or template-only work may use concise `N/A`. Copy the spec classification forward; do not rename or expand it here.** + +- **Guardrail scope**: changed surfaces on the existing workspace settings and system operational-controls pages +- **Native vs custom classification summary**: native Filament +- **Shared-family relevance**: workspace settings, operational safety controls, audit/status copy +- **State layers in scope**: page +- **Audience modes in scope**: operator-MSP, operator-platform, support-platform +- **Decision/diagnostic/raw hierarchy plan**: decision-first; diagnostics remain secondary on the control history path; no support-raw surface is introduced in v1 +- **Raw/support gating plan**: collapsed; raw prompt, source, and provider payload detail are excluded from the slice entirely +- **One-primary-action / duplicate-truth control**: workspace settings keep `Save` as the single primary mutation action; the system controls card keeps `Pause AI execution` / `Resume AI execution`; workspace policy truth and runtime-stop truth stay on separate surfaces +- **Handling modes by drift class or surface**: review-mandatory; any extra AI page, direct `Run AI` action, or evidence viewer is exception-required +- **Repository-signal treatment**: review-mandatory now, future hard-stop candidate once the no-direct-provider guard exists +- **Special surface test profiles**: standard-native-filament +- **Required tests or manual smoke**: functional-core, state-contract +- **Exception path and spread control**: none; v1 remains inside the two existing pages +- **Active feature PR close-out entry**: Guardrail + +## Shared Pattern & System Fit + +> **Fill when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, navigation entry points, alerts, evidence/report viewers, or any other shared interaction family. Docs-only or template-only work may use concise `N/A`. Carry the same decision forward from the spec instead of renaming it here.** + +- **Cross-cutting feature marker**: yes +- **Systems touched**: `WorkspaceSettings`, `SettingsRegistry`, `SettingsResolver`, `SettingsWriter`, `Controls`, `OperationalControlCatalog`, `OperationalControlEvaluator`, `AuditActionId`, `AuditRecorder`, `WorkspaceAuditLogger`, `ContextualHelpResolver`, and `SupportDiagnosticBundleBuilder` +- **Shared abstractions reused**: existing workspace settings persistence + audit flow, existing operational-control evaluator/catalog, existing audit recorder/logger pipeline, existing product-knowledge resolver, and existing support-diagnostics bundle builder path +- **New abstraction introduced? why?**: one in-process governed AI decision boundary and one code-owned use-case catalog, because the current shared settings/ops/audit services do not own AI allow/block semantics +- **Why the existing abstraction was sufficient or insufficient**: settings, ops controls, and audit are already sufficient for persistence, emergency stop, and logging; they are insufficient for AI decision evaluation because the repo currently has no app-level AI seam at all +- **Bounded deviation / spread control**: none; future callers must depend on the new boundary rather than page-local AI helpers + +## OperationRun UX Impact + +> **Fill when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`. Docs-only or template-only work may use concise `N/A`.** + +- **Touches OperationRun start/completion/link UX?**: no +- **Central contract reused**: `N/A` +- **Delegated UX behaviors**: `N/A` +- **Surface-owned behavior kept local**: initiation remains on the existing settings and controls pages only; no queued start UX is introduced +- **Queued DB-notification policy**: `N/A` +- **Terminal notification path**: `N/A` +- **Exception path**: none + +## Provider Boundary & Portability Fit + +> **Fill when the feature touches shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth. Docs-only or template-only work may use concise `N/A`.** + +- **Shared provider/platform boundary touched?**: yes +- **Provider-owned seams**: none in v1; no vendor adapters, credentials, or model-selection UI are introduced +- **Platform-core seams**: AI use-case key, provider class, data classification, workspace AI policy, and governed decision contract +- **Neutral platform terms / contracts preserved**: `AI use case`, `provider class`, `data classification`, `source family`, `workspace AI policy`, and `execution decision` +- **Retained provider-specific semantics and why**: none; `local_private` and `external_public` are trust classes, not vendor names +- **Bounded extraction or follow-up path**: follow-up-spec for provider integration and usage governance; do not widen inside v1 + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* + +- Inventory-first / snapshot truth: N/A. This slice adds no inventory or backup truth and does not change the Intune source-of-truth model. +- Read/write separation: PASS. Workspace policy writes stay on the existing settings flow, and pause/resume actions stay on the existing controls flow with confirmation + audit. +- Graph contract path: PASS. No Microsoft Graph contract or outbound provider call is introduced. +- Deterministic capabilities: PASS. Reuses `Capabilities::WORKSPACE_SETTINGS_VIEW`, `Capabilities::WORKSPACE_SETTINGS_MANAGE`, `PlatformCapabilities::ACCESS_SYSTEM_PANEL`, and `PlatformCapabilities::OPS_CONTROLS_MANAGE`; no raw capability strings are planned. +- Workspace isolation + tenant isolation: PASS. AI decision requests require a host surface that already resolved workspace context and optional tenant entitlement; the boundary does not become a cross-tenant shortcut. +- RBAC-UX plane separation: PASS. `/admin/settings/workspace` stays tenant-plane/workspace-scoped, `/system/ops/controls` stays platform-scoped, and wrong-plane access remains outside scope. +- Destructive confirmation standard: PASS. `Pause AI execution` and `Resume AI execution` remain confirmation-protected actions on the existing controls page. +- Global search safety: PASS / N/A. No new Resource, Global Search entry, or tenantless AI list is introduced. +- OperationRun and Ops-UX: PASS by non-use. This slice creates no `OperationRun`, queue, notification lifecycle, or Monitoring link. +- Data minimization: PASS. Audit stores decision metadata only; raw prompt, source payload, and output text remain excluded. +- Test governance (TEST-GOV-001): PASS. Proof stays in narrow unit + feature + architecture-guard coverage; no browser or heavy-governance family is required by default. +- Proportionality / no premature abstraction: PASS with bounded exception. One governed AI boundary and one bounded use-case catalog are justified by two concrete future consumers and safety needs; no provider marketplace, queue pipeline, or persistence layer is introduced. +- Persisted truth (PERSIST-001): PASS. Workspace AI policy reuses existing workspace settings; no AI table, cache, result store, or prompt ledger is added. +- Behavioral state (STATE-001): PASS. `disabled` and `private_only` directly change execution eligibility; provider classes and data classifications directly change allow/block behavior. +- Shared pattern first / UI semantics / Filament native UI: PASS. Existing settings, controls, and audit primitives are reused; no custom AI shell, second status framework, or duplicate truth surface is introduced. +- Provider boundary (PROV-001): PASS. Shared terms stay vendor-neutral (`provider class`, `data classification`, `AI use case`), and direct provider-specific seams are deferred. +- Filament/Laravel panel safety: PASS. Livewire v4 remains the Filament v5 runtime, `SystemPanelProvider` stays on the existing `/system` panel, and no provider-registration change beyond `bootstrap/providers.php` is needed. +- Asset strategy: PASS. No new panel-only or shared asset registration is planned; deployment keeps the normal `cd apps/platform && php artisan filament:assets` step if implementation later registers assets. + +**Gate evaluation**: PASS (no constitution violation is required to deliver the narrow v1 slice). + +- The governed boundary is an in-process decision seam only; it does not create provider execution, queueing, or result persistence. +- Workspace policy truth stays inside the existing settings stack and reuses existing audit behavior. +- The system kill switch reuses the existing operational-control evaluator and controls page rather than creating a second AI control surface. + +**Post-design re-check**: PASS (design artifacts: [research.md](research.md), [data-model.md](data-model.md), [quickstart.md](quickstart.md), [contracts/private-ai-governance.openapi.yaml](contracts/private-ai-governance.openapi.yaml)). + +## Test Governance Check + +> **Fill for any runtime-changing or test-affecting feature. Docs-only or template-only work may state concise `N/A` or `none`.** + +- **Test purpose / classification by changed surface**: Unit for the catalog, request/decision contract, operational-control precedence, and audit metadata shaping; Feature for the workspace settings and system controls surfaces; Feature/Guard for the no-direct-provider invariant +- **Affected validation lanes**: fast-feedback, confidence +- **Why this lane mix is the narrowest sufficient proof**: unit coverage proves the decision matrix without Filament boot cost, feature coverage proves the two existing operator surfaces plus authorization/audit integration, and one architecture guard protects against local provider bypasses; browser and heavy-governance coverage add cost without proving new business truth +- **Narrowest proving command(s)**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/AiUseCaseCatalogTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Guards/NoDirectAiProviderBypassTest.php` +- **Fixture / helper / factory / seed / context cost risks**: low-to-moderate; reuse existing workspace settings, membership, platform-user, and operational-control fixtures, but avoid browser harnesses, provider emulators, or seeded AI history +- **Expensive defaults or shared helper growth introduced?**: no; the AI boundary should accept simple value objects/arrays, and feature tests should avoid broad `WorkspaceSettingsManageTest.php` workflow setup unless an implementation change genuinely needs that depth +- **Heavy-family additions, promotions, or visibility changes**: none expected; do not promote this slice into browser or heavy-governance families by default +- **Surface-class relief / special coverage rule**: standard-native-filament relief for the two existing pages, plus one direct service-level rule that blocked requests produce no provider resolution +- **Closing validation and reviewer handoff**: rerun the twelve focused test commands above, verify that `ai.execution` uses the existing operational-control path, verify that workspace policy changes still reuse the existing settings authorization and audit behavior, and verify that no app-level AI provider client exists outside the governed boundary +- **Budget / baseline / trend follow-up**: none expected; if workspace settings coverage broadens into the existing heavy-governance family, document the lane cost in-feature rather than hiding it +- **Review-stop questions**: lane fit, breadth, hidden setup cost, architecture-guard coverage, accidental provider/runtime scope growth +- **Escalation path**: `document-in-feature` for contained lane drift; `reject-or-split` if implementation introduces browser/heavy-governance cost, queue semantics, or provider integration +- **Active feature PR close-out entry**: Guardrail +- **Why no dedicated follow-up spec is needed**: routine narrow test upkeep stays inside this feature; broader AI runtime and provider workflows are already deferred to follow-up candidates + +## Project Structure + +### Documentation (this feature) + +```text +specs/248-private-ai-policy-foundation/ +├── plan.md +├── research.md +├── data-model.md +├── quickstart.md +├── contracts/ +│ └── private-ai-governance.openapi.yaml +└── tasks.md # Created later by /speckit.tasks, not by this plan step +``` + +### Source Code (repository root) + +```text +apps/platform/ +├── app/ +│ ├── Filament/Pages/Settings/WorkspaceSettings.php +│ ├── Filament/System/Pages/Ops/Controls.php +│ ├── Providers/Filament/SystemPanelProvider.php +│ ├── Services/Audit/ +│ │ ├── AuditRecorder.php +│ │ └── WorkspaceAuditLogger.php +│ ├── Services/Settings/ +│ │ ├── SettingsResolver.php +│ │ └── SettingsWriter.php +│ ├── Support/Audit/AuditActionId.php +│ ├── Support/Auth/ +│ │ ├── Capabilities.php +│ │ └── PlatformCapabilities.php +│ ├── Support/OperationalControls/ +│ │ ├── OperationalControlCatalog.php +│ │ └── OperationalControlEvaluator.php +│ ├── Support/ProductKnowledge/ContextualHelpResolver.php +│ ├── Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php +│ └── Support/Ai/ # likely new narrow namespace if implementation proceeds +└── tests/ + ├── Feature/SettingsFoundation/ + ├── Feature/OperationalControls/ + ├── Feature/System/OpsControls/ + ├── Feature/Guards/ + ├── Unit/Support/OperationalControls/ + ├── Unit/Support/ProductKnowledge/ + └── Unit/Support/Ai/ +``` + +**Structure Decision**: Laravel monolith. Implementation stays entirely inside `apps/platform`, reusing existing settings, audit, and operational-control seams while adding only one narrow AI support namespace if code work later proceeds. + +## Complexity Tracking + +> **Fill when Constitution Check has violations that must be justified OR when BLOAT-001 is triggered by new persistence, abstractions, states, or semantic frameworks.** + +| Violation | Why Needed | Simpler Alternative Rejected Because | +|-----------|------------|-------------------------------------| +| BLOAT-001 — governed AI decision boundary | One central allow/block seam is the smallest safe place to enforce workspace policy, operational controls, provider class gating, and audit metadata before any future AI caller can reach a model | Per-surface AI helpers would duplicate policy/control/audit logic and create bypass risk across product knowledge and diagnostics | +| BLOAT-001 — code-owned AI use-case catalog | Two concrete future adopters need a single allowlist and stable vocabulary now | Free-form string keys spread across callers would drift and be difficult to guard or audit consistently | +| STATE-001 — AI policy / provider / data-classification families | These values directly change whether execution is allowed and what may cross the trust boundary | Vendor names or presentation-only labels would not be enforceable, portable, or sufficiently reviewable | + +## Proportionality Review + +> **Fill when the feature introduces a new enum/status family, DTO/presenter/envelope, persisted entity/table/artifact, interface/contract/registry/resolver, taxonomy/classification system, or cross-domain UI framework.** + +- **Current operator problem**: TenantPilot has no safe app-level AI seam today, so future AI work would otherwise begin as local provider calls and local prompt/policy logic that bypass workspace isolation, runtime controls, and auditability. +- **Existing structure is insufficient because**: the repo already has settings, operational controls, and audit infrastructure, but it has no place to classify AI use cases, provider trust classes, or data classifications, and no single decision service that every caller must use. +- **Narrowest correct implementation**: add one workspace setting (`ai.policy_mode`), one operational control key (`ai.execution`), one code-owned use-case catalog for exactly two internal-only consumers, one request/decision contract, and one audit metadata shape. Do not add provider adapters, queue semantics, result persistence, or customer-visible AI surfaces. +- **Ownership cost created**: maintain 2 use-case entries, 2 policy values, 2 provider classes, 6 data classifications, one bounded audit action/metadata shape, and one architecture guard. +- **Alternative intentionally rejected**: local AI helpers on each future surface and a broader multi-provider AI platform were both rejected because they either create safety drift or import speculative architecture before the first real runtime need exists. +- **Release truth**: current-release governance foundation and future-feature preflight seam; not a full AI execution product. + +## Phase 0 — Research (output: research.md) + +Research resolved the remaining implementation-shaping decisions: + +- Reuse `WorkspaceSettings` plus `SettingsRegistry` / `SettingsWriter` for workspace-owned AI policy truth. +- Reuse `OperationalControlCatalog` / `OperationalControlEvaluator` and the existing `Controls` page for `ai.execution` rather than creating a second AI control surface. +- Model v1 as a governed decision boundary, not a provider runtime, queue, or result store. +- Lock the first slice to two code-owned internal use cases tied to `ContextualHelpResolver` and the support-diagnostics bundle path. +- Reuse existing audit infrastructure and keep the AI audit family minimal. + +**Output**: [research.md](research.md) + +## Phase 1 — Design (outputs: data-model.md, contracts/, quickstart.md) + +Design artifacts capture the narrow implementation shape: + +- Existing persisted truth reused: `workspace_settings`, `operational_control_activations`, and `audit_logs`. +- New code-owned truth: AI policy mode, provider class, data classification, approved use-case definitions, and request/decision envelopes. +- Conceptual contracts cover the existing workspace settings page, the existing system controls page, and the in-process governed decision schema. +- Quickstart documents the intended slice order, validation commands, Filament/Livewire assumptions, and the no-new-assets posture. + +**Artifacts**: + +- [data-model.md](data-model.md) +- [contracts/private-ai-governance.openapi.yaml](contracts/private-ai-governance.openapi.yaml) +- [quickstart.md](quickstart.md) + +## Phase 2 — Planning (for tasks.md) + +Dependency-ordered implementation outline for the later `tasks.md` step: + +1. Extend the existing settings registry and workspace settings page with `ai.policy_mode` and plain-language explanation content, without broadening the singleton settings workflow. +2. Add `ai.execution` to the operational-control catalog and controls page, keeping pause/resume confirmation-protected and audit-backed. +3. Introduce a narrow `Support/Ai` namespace containing the use-case catalog, request/decision value objects, and the governed decision boundary only. +4. Reuse the existing audit pipeline for workspace policy mutations and add one bounded AI decision action/metadata shape for allow/block evaluations. +5. Name `ContextualHelpResolver` and `SupportDiagnosticBundleBuilder` as the first adopters, but do not ship customer-facing AI UI, model-provider runtime code, or direct caller wiring beyond what the boundary contract itself requires. +6. Add focused unit, feature, and architecture-guard tests while keeping browser and heavy-governance families out of scope by default. +7. Run focused tests and Pint after implementation; no asset build is expected unless implementation later registers Filament assets. + +## Post-Implementation Close-Out + +- **Implementation status**: Implemented and validated on 2026-04-27. +- **TEST-GOV-001 outcome**: PASS. Proof stayed in focused Pest `Unit` and `Feature` lanes plus one architecture guard, with no browser or heavy-governance suite expansion. +- **Executed validation summary**: + - AI boundary unit lane: 8 tests, 83 assertions passed. + - AI execution controls feature lane: 1 test, 34 assertions passed. + - Operational controls regression lane: 11 tests, 167 assertions passed. + - Workspace settings lane: 20 tests, 267 assertions passed. + - Platform authorization semantics lane: 6 tests, 26 assertions passed. + - No-direct-provider guard lane: 1 test, 1 assertion passed. + - Approved source-input lane: 2 tests, 30 assertions passed. + - Adjacent product-knowledge/support-diagnostics regression lane: 14 tests, 107 assertions passed. + - Final targeted feature validation rollup: 42 tests, 530 assertions passed. + - Formatting: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent` passed. +- **Catalog lock and tenant-context declaration**: + - `product_knowledge.answer_draft`: `tenant_context_permitted = false` + - `support_diagnostics.summary_draft`: `tenant_context_permitted = true` + - Boundary coverage plus the approved source adapters preserved that split. +- **Browser smoke result**: PASS. + - `/admin/settings/workspace`: authenticated as a workspace manager, changed `Workspace AI policy` from the default effective disabled state to `Private only`, saved successfully, and confirmed the effective summary plus approved-use-case/provider-class copy updated on the real page. + - `/system/ops/controls`: authenticated as a platform operator, opened the `AI execution` card, paused execution with confirmation and reason text, confirmed the `Paused globally` state and success notification, then resumed execution and confirmed the enabled state returned. +- **Environment note**: the integrated browser carried a stale or poisoned `localhost` system-panel session during smoke work. The product routes themselves were healthy; the system-panel smoke path completed successfully on `127.0.0.1` to get a clean host-scoped browser session. This was an environment/browser-session workaround, not a feature bug. +- **Guardrail close-out**: no confirmed in-scope findings remained after the code, validation, browser smoke, and artifact analysis loop. No new provider runtime, queue, result persistence, or customer-facing AI surface was introduced. +- **Follow-up-spec deferrals retained**: + - public or external-provider execution + - result persistence, cache, or prompt/output history + - AI budgeting, credits, or cost controls + - queued AI execution or `OperationRun` semantics + - customer-facing AI workflows or approval flows diff --git a/specs/248-private-ai-policy-foundation/quickstart.md b/specs/248-private-ai-policy-foundation/quickstart.md new file mode 100644 index 00000000..b136b10e --- /dev/null +++ b/specs/248-private-ai-policy-foundation/quickstart.md @@ -0,0 +1,76 @@ +# Quickstart — Private AI Execution & Policy Foundation + +## Preconditions + +- Docker is running. +- `apps/platform` dependencies are installed. +- This slice stays inside the existing Laravel / Filament runtime and does not introduce a second AI service. + +## Intended Implementation Order + +1. Add `ai.policy_mode` to the existing settings registry and workspace settings page. +2. Add `ai.execution` to the existing operational-control catalog and controls page. +3. Add a narrow `app/Support/Ai/` namespace containing the use-case catalog, request/decision value objects, and the governed decision boundary only. +4. Reuse the existing audit pipeline for workspace policy mutation and AI decision logging. +5. Add the no-direct-provider architecture guard and the focused unit/feature tests. + +## Targeted Validation Commands (after implementation) + +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/AiUseCaseCatalogTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/System/OpsControls/OperationalControlManagementTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact tests/Feature/Guards/NoDirectAiProviderBypassTest.php` +- `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent` + +## Manual Smoke (after implementation) + +1. Sign in to `/admin`, select a workspace, and open `/admin/settings/workspace`. +2. As a workspace manager, switch the AI policy between `Disabled` and `Private only` and confirm the page shows the allowed use cases, provider classes, and blocked data classes in plain language. +3. Sign in to `/system` as a platform operator with `platform.access_system_panel` and `platform.ops.controls.manage`, then open `/system/ops/controls`. +4. Pause `AI execution`, confirm the global reason/expiry flow, and verify that the control state is visible before resuming it. +5. Exercise the governed AI boundary through focused tests or a narrow internal stub caller only; no customer-facing AI route or UI is part of v1. + +## Implementation Outcome (2026-04-27) + +- `TEST-GOV-001`: PASS. +- Focused validation stayed in Pest `Unit` plus `Feature` lanes with one architecture guard only. +- Executed validation summary: + - AI boundary unit lane: 8 tests, 83 assertions passed. + - AI execution controls feature lane: 1 test, 34 assertions passed. + - Operational controls regression lane: 11 tests, 167 assertions passed. + - Workspace settings lane: 20 tests, 267 assertions passed. + - Platform authorization semantics lane: 6 tests, 26 assertions passed. + - No-direct-provider guard lane: 1 test, 1 assertion passed. + - Approved source-input lane: 2 tests, 30 assertions passed. + - Adjacent product-knowledge/support-diagnostics regression lane: 14 tests, 107 assertions passed. + - Final targeted feature validation rollup: 42 tests, 530 assertions passed. + - Pint: `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent` passed. +- Catalog lock and tenant-context declaration: + - `product_knowledge.answer_draft`: `tenant_context_permitted = false` + - `support_diagnostics.summary_draft`: `tenant_context_permitted = true` +- Browser smoke completed: + 1. `/admin/settings/workspace`: saved `Workspace AI policy = Private only` and confirmed the effective summary updated on the real page. + 2. `/system/ops/controls`: paused and resumed `AI execution` through the confirmation flow and confirmed both state changes plus success notifications. +- Environment note: the integrated browser's `localhost` system-panel session became stale during smoke work, so the system-panel step completed on `127.0.0.1` with a fresh host-scoped session. Route health and product behavior were otherwise unchanged. +- Deferred to follow-up specs only: + - external-public or broader provider execution + - result persistence, caching, or prompt/output history + - budgeting, credits, or cost controls + - queued AI work or `OperationRun` semantics + - customer-facing AI surfaces or approval workflows + +## Notes + +- Filament v5 already runs on Livewire v4 in this repo. +- Panel providers remain registered through `bootstrap/providers.php`; this slice does not add or move providers. +- No new globally searchable AI resource is part of v1, so global search behavior stays unchanged. +- `Pause AI execution` and `Resume AI execution` are the only destructive-like actions in scope and must stay confirmation-protected. +- No new registered assets are expected. If implementation later registers a Filament asset anyway, deployment still needs the normal `cd apps/platform && php artisan filament:assets` step. \ No newline at end of file diff --git a/specs/248-private-ai-policy-foundation/research.md b/specs/248-private-ai-policy-foundation/research.md new file mode 100644 index 00000000..36df5d6f --- /dev/null +++ b/specs/248-private-ai-policy-foundation/research.md @@ -0,0 +1,142 @@ +# Research — Private AI Execution & Policy Foundation + +**Date**: 2026-04-27 +**Spec**: [spec.md](spec.md) + +This document resolves planning unknowns and records the repo-backed decisions that keep Spec 248 narrow. + +## Decision 1 — Reuse workspace settings for AI policy truth + +**Decision**: Store workspace AI posture as a workspace setting at `ai.policy_mode` on the existing [WorkspaceSettings](../../apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php) page, with validation registered through [SettingsRegistry](../../apps/platform/app/Support/Settings/SettingsRegistry.php) and persistence/audit handled by [SettingsWriter](../../apps/platform/app/Services/Settings/SettingsWriter.php). + +**Rationale**: +- The repo already has a singleton workspace settings surface, a central settings registry, and an audited writer path. +- Reusing that stack preserves workspace ownership and avoids inventing a second admin surface or a new AI persistence table. +- The existing workspace settings capabilities already separate view and manage permissions. + +**Evidence**: +- [WorkspaceSettings](../../apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php) already owns the `/admin/settings/workspace` singleton route and uses `Capabilities::WORKSPACE_SETTINGS_VIEW` / `Capabilities::WORKSPACE_SETTINGS_MANAGE`. +- [SettingsRegistry](../../apps/platform/app/Support/Settings/SettingsRegistry.php) is the canonical place for setting definitions and validation. +- [SettingsWriter](../../apps/platform/app/Services/Settings/SettingsWriter.php) already persists workspace settings and records `workspace_setting.updated` / `workspace_setting.reset` audit events. + +**Alternatives considered**: +- Add a dedicated `workspace_ai_policies` table. + - Rejected: new persisted truth is unnecessary for a single workspace-owned mode and would violate the narrow v1 scope. +- Hide AI posture in environment config or feature flags. + - Rejected: not workspace-owned, not operator-auditable, and not compatible with the product requirement for explicit workspace policy. + +## Decision 2 — Reuse the existing operational-controls path for the runtime stop + +**Decision**: Add `ai.execution` to [OperationalControlCatalog](../../apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php), evaluate it through [OperationalControlEvaluator](../../apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php), and expose it only on the existing [Controls](../../apps/platform/app/Filament/System/Pages/Ops/Controls.php) page under the current `/system` panel. + +**Rationale**: +- The repo already has a platform-only control-center pattern with confirmation, scope previews, and audit logging. +- Reusing it avoids a second AI-specific emergency-stop mechanism or a new system AI console. +- The platform plane auth guard and capability checks are already in place for this page. + +**Evidence**: +- [Controls](../../apps/platform/app/Filament/System/Pages/Ops/Controls.php) already owns confirmation-protected pause/resume actions and history for operational controls. +- [OperationalControlCatalog](../../apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php) is the existing source of control keys, labels, and supported scopes. +- [OperationalControlEvaluator](../../apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php) is the existing runtime lookup path. +- [SystemPanelProvider](../../apps/platform/app/Providers/Filament/SystemPanelProvider.php) and [PlatformCapabilities](../../apps/platform/app/Support/Auth/PlatformCapabilities.php) already enforce the `/system` plane and `platform.ops.controls.manage` capability. + +**Alternatives considered**: +- Add an AI-specific console or admin page under `/system`. + - Rejected: duplicates the existing ops-controls pattern and broadens v1 without adding new product truth. +- Use a deploy-time environment flag as the emergency stop. + - Rejected: not operator-owned, not auditable, and not aligned with the current control-center workflow. + +## Decision 3 — Treat v1 as a governed decision boundary, not an AI provider runtime + +**Decision**: The new AI seam should be an in-process governed decision boundary that accepts a registered use-case request and returns an allow/block decision plus audit-ready metadata. It must not include provider adapters, outbound model execution, queue orchestration, or result persistence in this slice. + +**Rationale**: +- The spec explicitly avoids direct external provider calls with tenant data, `OperationRun` semantics, result persistence, and a broad marketplace. +- The repo has no existing AI execution layer, so the smallest safe first step is the allow/block contract itself. +- A decision-first seam is enough to stop local provider calls from appearing feature by feature. + +**Evidence**: +- There is no app-level AI support namespace in `apps/platform/app/**` today. +- Existing shared seams cover settings, ops controls, audit, product knowledge, and support diagnostics, but none of them own AI allow/block semantics. + +**Alternatives considered**: +- Add feature-local AI helpers in product knowledge and diagnostics first. + - Rejected: duplicates policy, provider-class, and data-classification rules across surfaces. +- Build a full provider abstraction layer now. + - Rejected: speculative architecture before the first concrete provider runtime is even in scope. + +## Decision 4 — Lock v1 to two approved internal-only use cases and derive them from existing seams + +**Decision**: Keep the v1 catalog locked to exactly two use cases: + +- `product_knowledge.answer_draft`, anchored to [ContextualHelpResolver](../../apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php) and its code-owned knowledge source +- `support_diagnostics.summary_draft`, anchored to [SupportDiagnosticBundleBuilder](../../apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php) as a derived summary path + +**Rationale**: +- These are the two named likely adopters from the spec and both already exist as internal-only seams. +- Limiting the catalog to two concrete consumers satisfies ABSTR-001 while still proving the shared decision vocabulary is reusable. +- Open-ended catalog growth would silently widen scope into a general AI platform. + +**Evidence**: +- [ContextualHelpResolver](../../apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php) already exposes `knowledgeSource()` for code-owned product knowledge. +- [SupportDiagnosticBundleBuilder](../../apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php) already produces the diagnostics data family used from the tenant dashboard and the tenantless operation viewer. + +**Alternatives considered**: +- Allow any caller to register arbitrary AI use cases at runtime. + - Rejected: creates speculative platform scope and weakens governance. +- Ship only one adopter in v1. + - Rejected: the safety justification for the central catalog is stronger with the two real future consumers already identified by the spec. + +## Decision 5 — Support diagnostics input must be a derived redacted summary, not the raw bundle + +**Decision**: `support_diagnostics.summary_draft` should consume a derived redacted summary of the support-diagnostics bundle, not the raw `sections` array or the raw provider/context payloads already present in the bundle structure. + +**Rationale**: +- The current support-diagnostics bundle is broad, structured, and designed for operator inspection, not AI transport. +- Passing the raw bundle would violate the explicit v1 ban on raw provider payloads, customer-confidential data, and raw evidence excerpts. +- A derived summary keeps the AI boundary honest: if the summary cannot be produced safely, the use case should stay blocked. + +**Evidence**: +- [SupportDiagnosticBundleBuilder](../../apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php) currently produces a rich `sections` structure plus contextual help and redaction notes, not a purpose-built AI summary. + +**Alternatives considered**: +- Feed the full support-diagnostics bundle into AI with field-level filtering. + - Rejected: still too broad for v1, easier to get wrong, and unnecessary for the first governed foundation slice. + +## Decision 6 — Reuse the existing audit pipeline and keep the AI audit family minimal + +**Decision**: Reuse [WorkspaceAuditLogger](../../apps/platform/app/Services/Audit/WorkspaceAuditLogger.php) and the underlying [AuditActionId](../../apps/platform/app/Support/Audit/AuditActionId.php) / `AuditRecorder` path. Keep workspace policy mutations on the existing `workspace_setting.updated` / `workspace_setting.reset` actions and add one bounded AI decision action ID for governed decision evaluations with structured metadata only. + +**Rationale**: +- Policy changes already flow through the workspace settings audit path and should not create a second mutation pattern. +- AI decision evaluations need a stable audit record, but the narrowest shape is one action ID plus metadata, not a full AI run ledger. +- The spec explicitly bans raw prompt, raw source payload, and output persistence. + +**Evidence**: +- [SettingsWriter](../../apps/platform/app/Services/Settings/SettingsWriter.php) already logs workspace-setting updates and resets. +- [WorkspaceAuditLogger](../../apps/platform/app/Services/Audit/WorkspaceAuditLogger.php) already records workspace-scoped and tenant-scoped audit entries. +- [AuditActionId](../../apps/platform/app/Support/Audit/AuditActionId.php) is the canonical action registry. + +**Alternatives considered**: +- Add a dedicated AI audit table or prompt history store. + - Rejected: violates the v1 no-new-persistence constraint and imports a second source of truth. +- Split AI decisions into many action IDs (`allowed`, `blocked`, `control_blocked`, etc.). + - Rejected for v1: one bounded decision action plus metadata is the smaller audit family. + +## Decision 7 — Keep proof narrow: unit + feature + architecture guard + +**Decision**: Prove the slice with narrow unit tests for the decision matrix, focused feature tests for the two existing operator surfaces, and one architecture guard that fails if direct AI-provider access appears outside the governed boundary. + +**Rationale**: +- Unit coverage is the cheapest place to prove the allow/block matrix. +- Feature coverage is still needed because the slice touches the existing workspace settings and system controls surfaces. +- Browser and heavy-governance workflows would add cost without proving additional v1 truth. + +**Evidence**: +- Existing settings and operational-controls tests already show the repo prefers focused Pest feature tests plus targeted unit tests over browser coverage for this class of work. + +**Alternatives considered**: +- Add browser smoke coverage in v1. + - Rejected: unnecessary for the narrow foundation slice and not the cheapest proof. +- Reuse the broad `WorkspaceSettingsManageTest.php` family as the primary proof. + - Rejected: it is workflow-heavy and should not become the default proving lane for a narrow AI policy field. \ No newline at end of file diff --git a/specs/248-private-ai-policy-foundation/spec.md b/specs/248-private-ai-policy-foundation/spec.md new file mode 100644 index 00000000..9389320b --- /dev/null +++ b/specs/248-private-ai-policy-foundation/spec.md @@ -0,0 +1,348 @@ +# Feature Specification: Private AI Execution & Policy Foundation + +**Feature Branch**: `248-private-ai-policy-foundation` +**Created**: 2026-04-27 +**Status**: Implemented +**Input**: User description: "Promote the roadmap-fit candidate Private AI Execution & Policy Foundation as a narrow, implementation-ready slice that introduces a governed central AI execution boundary for approved use cases, workspace policy modes, provider-class gating, and audit-ready decision metadata, while stopping before customer-facing AI features, direct external provider calls with tenant data, or a broad multi-provider marketplace." + +## Spec Candidate Check *(mandatory — SPEC-GATE-001)* + +- **Problem**: TenantPilot now has roadmap pressure to add AI-assisted support and operator workflows, but the repo still has no app-level AI execution seam, no workspace-owned AI policy truth, and no central place to classify which AI inputs are ever allowed to leave a bounded trust boundary. +- **Today's failure**: If AI work starts feature-by-feature, it will likely appear as local provider calls, local prompt assembly, and local allow/block logic that bypass workspace policy, provider trust boundaries, operational controls, and audit-ready decision metadata. That would create privacy drift, provider coupling, and rework before the first real customer-facing AI workflow even lands. +- **User-visible improvement**: Workspace operators can set an explicit workspace AI posture on the existing workspace settings surface, platform operators can pause all AI execution through the existing operational-controls path, and future AI-assisted internal workflows get one auditable allow-or-block decision before any model execution begins. +- **Smallest enterprise-capable version**: Add one concrete governed AI execution boundary, one code-owned approved use-case catalog locked to two internal-only future consumers (`product_knowledge.answer_draft` and `support_diagnostics.summary_draft`), one workspace AI policy section with the modes `disabled` and `private_only`, one bounded provider-class and data-classification contract, one reused operational-control key for emergency stop, and one audit metadata shape on the existing audit infrastructure. +- **Explicit non-goals**: No customer-facing AI surface, no chatbot, no customer communication drafting, no autonomous remediation, no human-approval workflow, no broad provider marketplace, no provider credential-management UI, no usage budgeting, no result cache/store, no prompt/template CMS, no queueing/OperationRun layer for AI, and no external public-provider execution with tenant or customer data. +- **Permanent complexity imported**: One workspace-owned AI policy truth inside the existing settings stack, one bounded AI use-case catalog, one bounded provider-class catalog, one bounded AI data-classification family, one concrete execution-decision service, one operational-control catalog entry, new audit action IDs and metadata fields, and focused unit plus feature guard coverage. +- **Why now**: This is the next roadmap-fit foundation after Specs 242-247 and the provider-vocabulary hardening lane. It directly reduces the current risk that private AI arrives through ungoverned local feature calls before the product has safe workspace isolation, provider gating, and audit semantics. +- **Why not local**: A local AI helper per surface would duplicate policy checks, duplicate data-classification choices, and teach parallel provider semantics across product knowledge, diagnostics, and later customer workflows. The trust boundary needs to exist once before those consumers start shipping. +- **Approval class**: Core Enterprise +- **Red flags triggered**: New axes, new meta-infrastructure, and foundation-sounding scope. Defense: the slice is tightly limited to two approved use cases, two policy modes, one existing admin settings surface, one existing system control surface, no new table, no result persistence, and no customer-visible AI workflow. +- **Score**: Nutzen: 2 | Dringlichkeit: 2 | Scope: 2 | Komplexitaet: 1 | Produktnaehe: 1 | Wiederverwendung: 2 | **Gesamt: 10/12** +- **Decision**: approve + +## Spec Scope Fields *(mandatory)* + +- **Scope**: workspace, platform +- **Primary Routes**: + - `/admin/settings/workspace` on the existing workspace settings page for workspace-owned AI policy + - `/system/ops/controls` on the existing system operational-controls page for a platform emergency stop of AI execution + - No new tenant/admin AI output route, customer-facing AI page, or system AI console is introduced in v1 +- **Data Ownership**: + - Workspace AI policy truth is workspace-owned and stored through the existing workspace settings mechanism rather than a new AI table + - Approved AI use cases, provider classes, and AI data classifications remain code-owned repository truth + - AI execution decisions and policy mutations are recorded on the existing audit infrastructure; no AI result ledger, cache store, or prompt history table is introduced in this slice + - Tenant-scoped AI requests may carry workspace and tenant identifiers for authorization and audit context, but tenant/customer content remains derived input only and is not persisted as a new AI-owned record family +- **RBAC**: + - Workspace AI policy visibility and mutation stay on the existing workspace settings authorization path and reuse the current workspace settings capabilities + - Platform pause/resume of AI execution stays on the existing system panel and requires `PlatformCapabilities::ACCESS_SYSTEM_PANEL` plus `PlatformCapabilities::OPS_CONTROLS_MANAGE` + - The governed AI execution boundary accepts requests only after the caller has already resolved workspace and optional tenant entitlement on the host surface; it does not create a new cross-plane shortcut from `/system` into tenant data + - This slice introduces no new customer-facing or operator-facing `run AI` capability string because it intentionally stops before any new AI action surface is exposed + +For canonical-view specs, the spec MUST define: + +- **Default filter behavior when tenant-context is active**: N/A - this slice does not add a canonical cross-tenant AI list or detail route +- **Explicit entitlement checks preventing cross-tenant leakage**: AI decision evaluation never runs before the host surface has already resolved workspace and tenant entitlement. A non-member or wrong-scope actor receives the existing 404 semantics before any AI policy or data-classification detail is revealed. + +## Cross-Cutting / Shared Pattern Reuse *(mandatory when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, alerts, navigation entry points, evidence/report viewers, or any other existing shared operator interaction family; otherwise write `N/A - no shared interaction family touched`)* + +- **Cross-cutting feature?**: yes +- **Interaction class(es)**: workspace settings, operational safety controls, audit logging, future support-diagnostic and product-knowledge source reuse +- **Systems touched**: existing workspace settings persistence and audit flow, `App\Support\OperationalControls\OperationalControlEvaluator`, `App\Filament\System\Pages\Ops\Controls`, `App\Support\ProductKnowledge\ContextualHelpResolver`, existing support-diagnostic bundle builders, and `App\Support\Audit\AuditActionId` +- **Existing pattern(s) to extend**: workspace settings update/reset audit path, operational-controls evaluation path, platform system-panel capability enforcement, and stable audit action ID conventions +- **Shared contract / presenter / builder / renderer to reuse**: `SettingsResolver`, `SettingsWriter`, `WorkspaceAuditLogger`, `AuditRecorder`, `OperationalControlEvaluator`, `AuditActionId`, `ContextualHelpResolver`, and the existing support-diagnostic summary pipeline +- **Why the existing shared path is sufficient or insufficient**: the existing settings, ops-controls, and audit paths are already sufficient for policy storage, emergency stop, and audit ownership. They are insufficient for AI itself because no central execution boundary or AI-specific allow/block decision contract exists yet. +- **Allowed deviation and why**: none. The first slice must not introduce page-local AI policy checks, page-local provider labels, or page-local audit payloads. +- **Consistency impact**: the same vocabulary for `AI policy mode`, `provider class`, `data classification`, `approved use case`, `blocked reason`, and `private-only` must appear consistently across workspace settings, system controls, audit prose, and all future AI decision callers. +- **Review focus**: reviewers must block any direct provider call, raw feature-level AI helper, or local data-classification rule that bypasses the central AI execution boundary, the workspace AI policy, or the reused operational-control decision. + +## OperationRun UX Impact *(mandatory when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`; otherwise write `N/A - no OperationRun start or link semantics touched`)* + +- **Touches OperationRun start/completion/link UX?**: no +- **Shared OperationRun UX contract/layer reused**: N/A - this slice intentionally stops before queueing, background AI work, or customer/operator-facing AI runs +- **Delegated start/completion UX behaviors**: N/A +- **Local surface-owned behavior that remains**: N/A +- **Queued DB-notification policy**: N/A +- **Terminal notification path**: N/A +- **Exception required?**: none + +## Provider Boundary / Platform Core Check *(mandatory when the feature changes shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth; otherwise write `N/A - no shared provider/platform boundary touched`)* + +- **Shared provider/platform boundary touched?**: yes +- **Boundary classification**: platform-core +- **Seams affected**: AI use-case keys, workspace AI policy vocabulary, provider-class gating, data-classification gating, and the governed execution decision contract +- **Neutral platform terms preserved or introduced**: `AI use case`, `provider class`, `workspace AI policy`, `data classification`, `execution decision`, `source family`, and `private-only` +- **Provider-specific semantics retained and why**: none in v1. The slice intentionally classifies trust boundaries by provider class rather than naming vendors, endpoints, SDKs, or model marketplaces. +- **Why this does not deepen provider coupling accidentally**: the spec keeps provider truth at the class level (`local_private` versus `external_public`) and forbids feature code from depending on vendor-specific semantics or credentials in this foundation slice. +- **Follow-up path**: later provider expansion belongs in follow-up specs, primarily `AI Usage Budgeting, Context & Result Governance` and then `AI-Assisted Customer Operations`, rather than inside this foundation slice + +## UI / Surface Guardrail Impact *(mandatory when operator-facing surfaces are changed; otherwise write `N/A`)* + +| Surface / Change | Operator-facing surface change? | Native vs Custom | Shared-Family Relevance | State Layers Touched | Exception Needed? | Low-Impact / `N/A` Note | +|---|---|---|---|---|---|---| +| Workspace settings AI policy section | yes | Native Filament + existing singleton settings page | settings, status messaging, helper text | page, settings section, resolved policy summary | no | Extends the existing workspace settings page instead of creating a separate AI admin surface | +| System ops controls AI execution control card | yes | Native Filament + existing operational-controls page | operational safety controls, audit-backed state messaging | page, card/action state, confirmation modal | no | Reuses the current control-center pattern for a single new AI execution kill switch | +| Customer-facing or tenant-facing AI output surfaces | no | N/A | none | none | no | `N/A - explicitly out of scope for v1` | + +## Decision-First Surface Role *(mandatory when operator-facing surfaces are changed)* + +| Surface | Decision Role | Human-in-the-loop Moment | Immediately Visible for First Decision | On-Demand Detail / Evidence | Why This Is Primary or Why Not | Workflow Alignment | Attention-load Reduction | +|---|---|---|---|---|---|---|---| +| Workspace settings AI policy section | Primary Decision Surface | Workspace owner or manager decides whether the workspace allows no AI use at all or only private-only AI for approved internal use cases | current policy mode, plain-language effect, approved use cases, allowed provider classes, and blocked data classes | audit attribution, source-family notes, and future-consumer explanation | Primary because this is the one workspace-owned product decision that changes later AI allow/block behavior | Follows configuration-first governance instead of hidden feature flags | Replaces founder memory or code comments with one explicit workspace truth | +| System ops controls AI execution control card | Primary Decision Surface | Platform operator decides whether all new AI execution must be paused during an incident or rollout concern | global control state, reason, expiry, and effect on new AI starts | audit history and affected-use-case summary | Primary because it is the runtime safety stop for the whole AI boundary, not a secondary diagnostic | Follows incident and rollout operations workflow | Removes the need for deploy-time or environment-level emergency stop behavior | + +## Audience-Aware Disclosure *(mandatory when operator-facing surfaces are changed)* + +| Surface | Audience Modes In Scope | Decision-First Default-Visible Content | Operator Diagnostics | Support / Raw Evidence | One Dominant Next Action | Hidden / Gated By Default | Duplicate-Truth Prevention | +|---|---|---|---|---|---|---|---| +| Workspace settings AI policy section | operator-MSP | policy mode, approved use cases, allowed provider classes, blocked data classes, and plain-language effect | last changed attribution and policy-source notes | none | `Save` | vendor-specific credentials, raw prompt examples, raw diagnostic inputs, and future budgeting fields stay out of scope | The same policy vocabulary is reused by the execution boundary and audit prose instead of being restated differently on future surfaces | +| System ops controls AI execution control card | support-platform, operator-platform | control state, reason, expiry, and whether new AI execution is paused | audit history and affected-use-case count | none | `Pause AI execution` or `Resume AI execution` | no prompt content, no provider payload preview, and no workspace content samples appear on the control surface | The control surface owns only runtime stop/start truth; workspace policy detail stays on workspace settings | + +## UI/UX Surface Classification *(mandatory when operator-facing surfaces are changed)* + +| Surface | Action Surface Class | Surface Type | Likely Next Operator Action | Primary Inspect/Open Model | Row Click | Secondary Actions Placement | Destructive Actions Placement | Canonical Collection Route | Canonical Detail Route | Scope Signals | Canonical Noun | Critical Truth Visible by Default | Exception Type / Justification | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| Workspace settings AI policy section | Config / Settings / Singleton | Workspace configuration section | Save or reset the workspace AI policy | In-page settings section on the existing singleton route | forbidden | Helper text and policy explanation stay inside the section | none | `/admin/settings/workspace` | `/admin/settings/workspace` | Active workspace context | Workspace AI policy | Whether AI is disabled or private-only, and what that means | existing singleton-settings exception remains valid | +| System ops controls AI execution control card | Utility / System | Operational safety control center | Pause or resume AI execution | Same-page card actions and confirmation modal | forbidden | Audit/history detail remains secondary inside the page | pause/resume stays on the card with confirmation | `/system/ops/controls` | `/system/ops/controls` | Platform-global control scope | AI execution control | Whether new AI execution is allowed right now and why | none | + +## Operator Surface Contract *(mandatory when operator-facing surfaces are changed)* + +| Surface | Primary Persona | Decision / Operator Action Supported | Surface Type | Primary Operator Question | Default-visible Information | Diagnostics-only Information | Status Dimensions Used | Mutation Scope | Primary Actions | Dangerous Actions | +|---|---|---|---|---|---|---|---|---|---|---| +| Workspace settings AI policy section | Workspace owner or manager | Decide whether the workspace allows private-only AI for approved internal use cases | Singleton settings page | What AI posture applies to this workspace right now? | policy mode, approved use cases, allowed provider classes, blocked data classes, and plain-language effect | last changed attribution and source-family notes | AI policy mode, provider trust boundary, allowed data scope | TenantPilot only | Save, Reset policy | none | +| System ops controls AI execution control card | Platform operator | Decide whether all new AI execution must be paused or resumed | System control center | Should any new AI execution proceed right now? | global control state, reason, expiry, and effect on new starts | audit history and affected use-case summary | global runtime safety state | TenantPilot only | Pause AI execution, Resume AI execution | Pause AI execution, Resume AI execution | + +## Proportionality Review *(mandatory when structural complexity is introduced)* + +- **New source of truth?**: yes - workspace-owned AI policy becomes current-release product truth +- **New persisted entity/table/artifact?**: no - workspace AI policy reuses existing workspace settings persistence and audit paths +- **New abstraction?**: yes - one concrete governed AI execution boundary and one bounded use-case catalog +- **New enum/state/reason family?**: yes - AI policy modes, provider classes, data classifications, and execution decision reasons +- **New cross-domain UI framework/taxonomy?**: no +- **Current operator problem**: TenantPilot needs a safe way to add AI later without letting support, diagnostics, or customer workflows bypass workspace isolation, private-only trust posture, and auditability. +- **Existing structure is insufficient because**: there is currently no app-level AI seam at all. Existing settings, ops controls, and audit paths can store policy and stop work, but they cannot classify AI input, bind use cases to approved data, or force all future AI callers through one decision. +- **Narrowest correct implementation**: keep persistence inside existing workspace settings, reuse existing system ops controls for the emergency stop, lock the use-case catalog to two internal-only future consumers, classify only the first-slice provider/data families, and write audit metadata to the existing audit log instead of building a second AI record system. +- **Ownership cost**: ongoing review of use-case keys, provider-class vocabulary, data classifications, audit metadata shape, and one architecture guard against direct provider calls +- **Alternative intentionally rejected**: direct feature-level AI helpers were rejected as unsafe; a broad provider registry or marketplace was rejected as speculative; a result ledger, cache, or budgeting system was rejected because the first slice does not yet need those truths. +- **Release truth**: current-release truth that deliberately prepares later AI features without shipping them yet + +### Compatibility posture + +This feature assumes a pre-production environment. + +Backward compatibility, legacy aliases, migration shims, historical fixtures, and compatibility-specific tests are out of scope unless explicitly required by this spec. + +Canonical replacement is preferred over preservation. + +## Testing / Lane / Runtime Impact *(mandatory for runtime behavior changes)* + +- **Test purpose / classification**: Unit, Feature +- **Validation lane(s)**: fast-feedback, confidence +- **Why this classification and these lanes are sufficient**: unit coverage proves the approved use-case catalog, workspace AI policy resolution, provider-class and data-classification gating, operational-control precedence, and audit-metadata shaping. Focused feature coverage proves the existing workspace settings and system controls surfaces, plus one architecture guard that blocked requests never reach a direct provider call path. +- **New or expanded test families**: focused AI policy and execution-decision unit coverage, workspace settings feature coverage, operational-control integration feature coverage, and one architecture guard that blocks direct AI provider calls outside the governed boundary +- **Fixture / helper cost impact**: low-to-moderate. Reuse existing workspace, membership, settings, platform-user, and system control fixtures. Avoid browser harnesses, provider-emulator suites, or any seeded AI result history. +- **Heavy-family visibility / justification**: none +- **Special surface test profile**: standard-native-filament +- **Standard-native relief or required special coverage**: ordinary Filament feature coverage is sufficient for workspace settings and system ops controls. The central AI execution boundary also needs direct service-level tests proving that blocked requests produce no provider call and no raw audit payload. +- **Reviewer handoff**: reviewers must confirm that `ai.execution` uses the existing operational-control path, workspace policy changes reuse the existing settings audit path, unregistered use cases or blocked data classes never reach provider resolution, and no result store, queue, or customer-facing AI surface slipped into the slice. +- **Budget / baseline / trend impact**: low increase in narrow unit and feature coverage only +- **Escalation needed**: none +- **Active feature PR close-out entry**: Guardrail +- **Planned validation commands**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact --filter=WorkspaceAiPolicy` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact --filter=GovernedAiExecution` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && cd apps/platform && ./vendor/bin/sail artisan test --compact --filter=AiExecutionArchitectureGuard` + +## First-Slice Approved AI Use Case Inventory *(implementation lock-in for v1)* + +The first slice is locked to the following approved use cases. Adding a third use case requires an explicit spec update. + +| Use Case Key | Intended Future Consumer | Allowed Provider Class(es) | Allowed Data Classification(s) | Visibility | Tenant Context Permitted | Explicitly Excluded Inputs | +|---|---|---|---|---|---|---| +| `product_knowledge.answer_draft` | Product knowledge and contextual help from `ContextualHelpResolver` and related code-owned knowledge sources | `local_private` | `product_knowledge`, `operational_metadata` | internal-only draft | no | tenant policy JSON, raw provider payloads, customer-confidential notes, personal data | +| `support_diagnostics.summary_draft` | Support diagnostics using a redacted summary derived from existing support-diagnostic bundle builders | `local_private` | `redacted_support_summary` | internal-only draft | yes | raw diagnostic bundle sections, raw provider payloads, customer-confidential notes, personal data | + +## First-Slice AI Data Classification Contract *(implementation lock-in for v1)* + +| Data Classification | Meaning In This Slice | V1 Consequence | +|---|---|---| +| `product_knowledge` | Code-owned glossary, contextual-help, and product documentation source content with no tenant/customer payload | Allowed only for approved use cases on `local_private` | +| `operational_metadata` | Minimal non-secret metadata such as safe surface family, route family, or internal workflow context that does not contain tenant/customer content | Allowed only when the approved use case explicitly opts in | +| `redacted_support_summary` | Sanitized support-diagnostic summary content derived from existing product truth without raw provider payloads or customer-confidential detail | Allowed only for `support_diagnostics.summary_draft` on `local_private` | +| `personal_data` | End-user or operator personal data | Blocked for all AI execution in v1 | +| `customer_confidential` | Tenant/customer-confidential narrative, sensitive configuration detail, or customer-owned context that is not reduced to the approved redacted summary | Blocked for all AI execution in v1 | +| `raw_provider_payload` | Raw provider payloads, raw policy JSON, raw Graph/API responses, or equivalent source material | Blocked for all AI execution in v1 | + +## Scope Boundaries *(required for this slice)* + +### In Scope + +- One concrete governed AI execution boundary that all future AI callers must use +- One code-owned approved-use-case catalog locked to `product_knowledge.answer_draft` and `support_diagnostics.summary_draft` +- One workspace-owned AI policy section on the existing workspace settings page with the modes `disabled` and `private_only` +- One bounded provider-class contract with `local_private` and `external_public`, where `external_public` exists only as a blocked trust class in v1 +- One bounded AI data-classification contract as defined above +- One reused operational-control key `ai.execution` on the existing system ops controls surface +- AI decision audit metadata written to the existing audit infrastructure with no prompt/output persistence +- Architecture guardrails that prevent direct provider calls outside the governed boundary + +### Non-Goals + +- Customer-facing AI features, tenant-facing AI summaries, or support-response drafting surfaces +- Broad provider marketplace, vendor credential management, or multi-provider routing UI +- Token or cost budgeting, credits, rate limits, or queue priority rules +- Result cache, prompt store, output history, or reusable AI artifact persistence +- Autonomous remediation, legal/customer communications, or human-approval workflow for AI outputs +- External public-provider execution with tenant/customer data +- Queueing, retries, or `OperationRun` semantics for AI execution in this slice + +## Assumptions + +- The existing workspace settings persistence and audit path are sufficient for storing one workspace AI policy mode without introducing a new table. +- The operational-controls foundation from the existing controls page can safely absorb one additional control key for AI execution. +- `ContextualHelpResolver` and support-diagnostic builders can provide code-owned or redacted source inputs without requiring raw provider payloads to cross the AI boundary. +- The first slice remains internal-only and draft-only, so no customer-visible AI wording, approval queue, or outbound communication contract is needed yet. + +## Risks + +- If the support-diagnostic pipeline cannot produce a clearly redacted summary without raw provider payloads or customer-confidential detail, `support_diagnostics.summary_draft` may need a tighter pre-step before implementation proceeds. +- If the operational-controls slice is unavailable or materially different at implementation time, the `ai.execution` emergency stop may need sequencing adjustment before this feature can land safely. +- A later implementer could still try to add a vendor-specific provider seam or prompt history while wiring the first private model. The architecture guard must stay explicit so the slice does not widen silently. +- A workspace policy surface without an enforced central execution boundary would create false confidence. The execution guard and architecture guard are both mandatory for safe implementation. + +## Follow-up Candidates + +- AI Usage Budgeting, Context & Result Governance +- AI-Assisted Customer Operations +- Decision-pack or review-workspace AI draft assistance after explicit human-approval and evidence-governance rules exist + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Set workspace AI posture once (Priority: P1) + +As a workspace owner or manager, I want to choose whether the workspace disables AI entirely or allows only private-only AI for approved internal use cases so the product has one explicit trust posture before any AI feature is added. + +**Why this priority**: The foundation is not safe unless workspace-owned AI posture is explicit, auditable, and visible before later AI use cases appear. + +**Independent Test**: Open the existing workspace settings page, change the AI policy between `disabled` and `private_only`, and verify that the resolved policy explanation updates and is attributable without touching application code or environment flags. + +**Acceptance Scenarios**: + +1. **Given** a workspace manager opens workspace settings, **When** they save the AI policy mode as `private_only`, **Then** the page shows that only approved private-only use cases may proceed and the change is attributable through the existing workspace settings audit path. +2. **Given** the same workspace changes the mode back to `disabled`, **When** the page reloads, **Then** the page shows that no AI execution is allowed for the workspace and future approved use cases would block before execution. + +--- + +### User Story 2 - Block unsafe AI requests before any provider call (Priority: P1) + +As the product owner responsible for later AI-assisted operator workflows, I want every in-scope AI request to pass through one governed allow-or-block decision so unapproved use cases, external-public trust classes, or disallowed data classes never reach a provider call. + +**Why this priority**: This is the core safety outcome of the foundation. If requests can still bypass the boundary, the slice fails even if the settings UI exists. + +**Independent Test**: Exercise the governed AI boundary with the two approved use cases and several blocked combinations, and verify that allowed requests only accept the approved private input shape while blocked requests never resolve a provider call. + +**Acceptance Scenarios**: + +1. **Given** a workspace is set to `private_only` and a request uses `support_diagnostics.summary_draft` with `redacted_support_summary`, **When** the governed AI boundary evaluates the request for `local_private`, **Then** it allows the request and records an audit-ready decision without persisting prompt or output text. +2. **Given** the same workspace and use case, **When** a request declares `external_public` as the provider class, **Then** the boundary blocks the request before any provider resolution or outbound call occurs. +3. **Given** any workspace AI mode other than `disabled`, **When** a request includes `raw_provider_payload`, `customer_confidential`, or `personal_data`, **Then** the boundary blocks the request before execution even if the requested provider class is `local_private`. +4. **Given** a request uses an unregistered AI use case key or lacks workspace context, **When** the boundary evaluates it, **Then** the request is rejected and no AI provider call is attempted. + +--- + +### User Story 3 - Pause all AI execution centrally during an incident (Priority: P2) + +As a platform operator, I want to pause all new AI execution from the existing system ops controls surface so rollout problems or privacy concerns can be contained without a deployment. + +**Why this priority**: Reusing the operational-controls pattern is the smallest safe incident stop for a cross-cutting AI boundary. + +**Independent Test**: Pause `ai.execution` from the existing controls page, send an otherwise valid AI request through the governed boundary, and verify that it blocks with the operational-control reason until the control is resumed. + +**Acceptance Scenarios**: + +1. **Given** `ai.execution` is paused from `/system/ops/controls`, **When** an otherwise valid approved AI request is evaluated, **Then** the request is blocked before execution and the block reason identifies the active operational control. +2. **Given** the same control is resumed, **When** the same approved request is retried, **Then** the request follows normal workspace policy and data-classification evaluation again. + +### Edge Cases + +- A request may arrive without workspace context or with tenant context from an unauthorized actor; the host authorization boundary must fail first so the AI layer does not leak tenant or policy detail. +- A support-diagnostic request may contain mixed safe and unsafe source material; if the source cannot be reduced to `redacted_support_summary`, the entire AI request is blocked. +- A workspace may be set to `private_only` while the platform-level `ai.execution` control is paused; the pause control wins and blocks all new starts. +- An AI request may be accepted just before `ai.execution` is paused; the control governs new starts only and does not retroactively mutate any in-flight private execution. +- A later feature may try to introduce a third use case or a new data classification in the same implementation PR; that is out of scope unless the active spec is updated explicitly. + +## Requirements *(mandatory)* + +**Constitution alignment (required):** This feature introduces no Microsoft Graph contract change, no tenant-changing provider write, and no new queued workflow family. It creates a governed decision boundary that must run before any future AI provider execution, while reusing the existing workspace settings, operational-controls, and audit infrastructure. + +**Constitution alignment (PROP-001 / ABSTR-001 / PERSIST-001 / STATE-001 / BLOAT-001):** The slice introduces new AI-specific vocabulary and one new execution boundary because the current-release product now needs a safe first truth for AI policy, provider trust class, and allowed data before broader AI features land. It stays narrow by avoiding new tables, queues, result persistence, or provider-marketplace abstractions. + +**Constitution alignment (XCUT-001):** This slice is cross-cutting across workspace settings, operational controls, audit logging, product-knowledge input, and support-diagnostic input. It must reuse the existing settings and ops-controls paths rather than creating page-local AI settings or emergency-stop logic. + +**Constitution alignment (PROV-001):** AI provider trust is classified through neutral provider classes, not vendor-specific names. Provider-specific semantics and provider credential management remain out of scope. + +**Constitution alignment (TEST-GOV-001):** Proof stays in focused unit and feature lanes. The feature must add one explicit architecture guard proving that AI provider access cannot be called directly outside the governed boundary. + +**Constitution alignment (OPS-UX):** This slice does not create or reuse an `OperationRun`. If a later AI feature becomes queued or operationally relevant, that behavior belongs in a follow-up spec and must adopt the canonical Ops-UX contract then. + +**Constitution alignment (RBAC-UX):** The slice spans workspace `/admin` settings and platform `/system` operational controls. Wrong-plane or non-member access remains 404. Existing workspace settings authorization stays authoritative for policy mutation. Existing system-panel capability enforcement stays authoritative for the emergency stop. The governed AI boundary must not become an authorization bypass for tenant-scoped content. + +**Constitution alignment (BADGE-001):** If policy mode or control state is shown with a badge or status chip, the rendering must reuse existing settings/control status semantics rather than introduce page-local AI color language. + +**Constitution alignment (UI-FIL-001):** The only operator-facing surfaces in scope are existing Filament pages. The feature must use native sections, helper text, callouts, actions, and control cards rather than a custom AI admin shell. + +**Constitution alignment (UI-NAMING-001):** Primary operator-facing labels must stay implementation-light and product-truthful: `Workspace AI policy`, `Disabled`, `Private only`, `Approved AI use cases`, `Blocked data classes`, and `AI execution`. Terms such as vendor names, SDK names, or low-level model endpoint jargon stay out of primary labels. + +**Constitution alignment (DECIDE-001):** Workspace settings and system ops controls are the only decision surfaces in scope. No new decision inbox, AI draft viewer, or evidence-heavy AI result page is introduced. + +**Constitution alignment (UI-CONST-001 / UI-SURF-001 / ACTSURF-001 / UI-HARD-001 / UI-EX-001 / UI-REVIEW-001 / HDR-001):** The feature must preserve the existing singleton settings and control-center page patterns. It may not add redundant inspect actions, shadow routes, or mixed action groups for AI management in this first slice. + +**Constitution alignment (ACTSURF-001 - action hierarchy):** Workspace policy mutation stays on the workspace settings page. Platform-wide pause/resume stays on the existing controls page. No other visible AI mutation action is introduced. + +**Constitution alignment (OPSURF-001):** Default-visible content must stay operator-first: whether AI is disabled or private-only for a workspace, and whether all new AI execution is paused globally. No raw prompt content, model internals, or tenant payload excerpts belong on the default surfaces. + +**Constitution alignment (UI-SEM-001 / LAYER-001 / TEST-TRUTH-001):** One decision layer is justified because direct reads from raw settings or local feature flags would still force each future AI surface to duplicate provider-class, data-classification, and policy logic. Tests must target business outcomes such as allowed versus blocked execution and clean audit payloads instead of cosmetic rendering alone. + +**Constitution alignment (Filament Action Surfaces):** The action-surface contract remains satisfied. Workspace settings keep a single in-page save model. System ops controls keep confirmation-protected state-change actions on the same surface. No redundant inspect action or empty action group is introduced. + +**Constitution alignment (UX-001 - Layout & Information Architecture):** The workspace AI policy stays inside the existing settings layout with sectioned content and plain-language guidance. The system AI execution stop stays inside the existing controls page. No new custom layout family is introduced. + +### Functional Requirements + +- **FR-248-001 Approved use-case catalog**: The system MUST define a code-owned AI use-case catalog locked to exactly two first-slice keys: `product_knowledge.answer_draft` and `support_diagnostics.summary_draft`. +- **FR-248-002 Use-case declaration contract**: Each first-slice use case MUST declare its allowed provider class, allowed data classifications, source family, visibility (`internal-only draft`), and whether tenant context is permitted. +- **FR-248-003 Workspace AI policy truth**: The system MUST store workspace AI posture through the existing workspace settings mechanism and audit policy changes through the existing workspace settings audit path. +- **FR-248-004 First-slice policy modes**: The first slice MUST support exactly two workspace AI policy modes: `disabled` and `private_only`. +- **FR-248-005 Provider-class contract**: The system MUST define a bounded provider-class contract containing `local_private` and `external_public`, where `external_public` exists only as a blocked trust class in v1. +- **FR-248-006 Data-classification contract**: The system MUST classify AI inputs using the first-slice data classifications defined in this spec and MUST block `personal_data`, `customer_confidential`, and `raw_provider_payload` for all AI execution in v1. +- **FR-248-007 Central execution boundary**: The system MUST route every future AI execution request through one governed execution boundary that requires a registered use case key, actor context, workspace context, requested provider class, declared data classification, and source family before execution is attempted. +- **FR-248-008 Block precedence**: After the host surface has already resolved authorization and scope entitlement, the governed boundary MUST evaluate `ai.execution` operational control, workspace AI policy mode, use-case registration, provider-class allowance, and data-classification allowance before resolving any AI provider call. +- **FR-248-009 Operational-control reuse**: The feature MUST reuse the existing operational-controls pattern through a new in-scope control key `ai.execution` on `/system/ops/controls` rather than introducing a second AI-specific emergency stop mechanism. +- **FR-248-010 Approved source inputs only**: `product_knowledge.answer_draft` MUST consume only code-owned product-knowledge sources, and `support_diagnostics.summary_draft` MUST consume only redacted support-diagnostic summary content. Raw provider payloads, raw policy JSON, and customer-confidential notes are out of scope. +- **FR-248-011 Audit metadata shape**: The system MUST write stable AI-related audit entries for workspace policy changes and AI execution decisions, including at minimum use case key, provider class, workspace AI policy mode, data classification, decision outcome, decision reason, workspace scope, tenant scope when present, source family, and an optional context fingerprint; audit entries MUST NOT store raw prompt text, raw source payloads, or full output text. +- **FR-248-012 No direct provider calls**: Feature code MUST NOT call AI providers directly. A guard test or equivalent architecture check MUST fail if AI provider access appears outside the central governed boundary. +- **FR-248-013 Workspace settings UX**: The existing workspace settings page MUST show the selected AI policy mode, plain-language effect, approved use cases, allowed provider classes, and blocked data classes without introducing vendor-specific admin UI. +- **FR-248-014 Pause semantics**: When `ai.execution` is paused, all new AI execution requests MUST block before provider resolution, while in-flight work already accepted before the pause MAY complete unchanged. +- **FR-248-015 No hidden scope growth**: The first slice MUST NOT introduce customer-facing AI output surfaces, external public-provider execution with tenant/customer data, AI result persistence, cost budgeting, queue/retry behavior, or a provider marketplace. + +## UI Action Matrix *(mandatory when Filament is changed)* + +| Surface | Location | Header Actions | Inspect Affordance (List/Table) | Row Actions (max 2 visible) | Bulk Actions (grouped) | Empty-State CTA(s) | View Header Actions | Create/Edit Save+Cancel | Audit log? | Notes / Exemptions | +|---|---|---|---|---|---|---|---|---|---|---| +| Workspace settings AI policy section | `app/Filament/Pages/Settings/WorkspaceSettings.php` | `Save` | N/A - singleton settings page | none | none | N/A | N/A | `Save`; optional `Reset policy` if the page already supports per-setting reset interactions | yes | Reuses the existing workspace settings mutation and audit path; no new AI execution action appears here | +| System ops controls AI execution control card | `app/Filament/System/Pages/Ops/Controls.php` | `Pause AI execution`, `Resume AI execution`, `View history` | Same-page control card or confirmation modal | none | none | none | same-page actions only | `Review impact`, `Save changes`, `Cancel` inside the existing control modal flow | yes | Reuses `PlatformCapabilities::OPS_CONTROLS_MANAGE` and the existing operational-controls action pattern; no new system AI console is introduced | + +### Key Entities *(include if feature involves data)* + +- **Workspace AI Policy**: The workspace-owned policy truth that resolves whether AI is `disabled` or `private_only` for the workspace. +- **Approved AI Use Case Definition**: The code-owned catalog entry that defines one allowed AI purpose, its allowed provider class, allowed data classifications, source family, and visibility. +- **AI Execution Request**: The derived request envelope passed into the governed boundary containing actor, workspace, optional tenant, use case key, provider class, data classification, and source provenance. +- **AI Execution Decision**: The allow-or-block result returned by the governed boundary, including policy mode, matched operational-control state, decision reason, and audit-ready metadata. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-248-001**: In validation scenarios, 100% of in-scope AI requests with an unregistered use case, blocked provider class, blocked data classification, missing workspace context, or active `ai.execution` control are stopped before any provider resolution or outbound call occurs. +- **SC-248-002**: Workspace owners can set and review the workspace AI policy on the existing workspace settings page in under 2 minutes without editing environment variables or code. +- **SC-248-003**: In validation coverage, 0 external-public AI executions occur for tenant/customer data in the first slice. +- **SC-248-004**: The two approved first-slice AI use cases resolve through the same governed decision vocabulary and audit metadata shape, with no direct provider call sites outside the central boundary in guard coverage. diff --git a/specs/248-private-ai-policy-foundation/tasks.md b/specs/248-private-ai-policy-foundation/tasks.md new file mode 100644 index 00000000..797cbd23 --- /dev/null +++ b/specs/248-private-ai-policy-foundation/tasks.md @@ -0,0 +1,194 @@ +--- + +description: "Task list for Private AI Execution & Policy Foundation" + +--- + +# Tasks: Private AI Execution & Policy Foundation + +**Input**: Design documents from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/` +**Prerequisites**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md` (required), `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/spec.md` (required), `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/research.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/data-model.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` + +**Tests**: REQUIRED (Pest) for runtime behavior changes. Keep proof in focused `Unit` and `Feature` lanes, plus one architecture guard, using the targeted Sail commands captured in the feature artifacts. +**Operations**: No new `OperationRun`, queue, retry, monitoring page, or result ledger is introduced. This slice remains DB-backed settings, operational-control, and audit work only. +**RBAC**: Existing workspace settings authorization and platform ops-control authorization remain authoritative. Non-members or wrong-plane actors keep `404` deny-as-not-found semantics where applicable; members missing the required capability receive `403`. +**Provider Boundary**: AI trust vocabulary stays platform-core and vendor-neutral (`AI use case`, `provider class`, `data classification`). `external_public` remains blocked in v1. +**Organization**: Tasks are grouped by user story so workspace AI policy, governed decision enforcement, and operational-stop controls remain independently testable once the shared foundation exists. + +## Test Governance Checklist + +- [x] Lane assignment stays `Unit` plus `Feature` and remains the narrowest sufficient proof for the changed behavior. +- [x] New or changed tests stay in `apps/platform/tests/Unit/Support/Ai/`, `apps/platform/tests/Feature/SettingsFoundation/`, `apps/platform/tests/Feature/OperationalControls/`, `apps/platform/tests/Feature/System/OpsControls/`, and `apps/platform/tests/Feature/Guards/` only; no browser or heavy-governance lane is added. +- [x] Shared helpers, factories, fixtures, and context defaults stay cheap by default; do not add provider emulators, queue scaffolding, or seeded AI history. +- [x] Planned validation commands cover workspace settings, governed AI decision logic, audit metadata, operational controls, and the no-direct-provider guard without widening scope. +- [x] The declared surface test profile remains `standard-native-filament` because the slice only extends existing workspace settings and system controls pages. +- [x] Any deferred public-provider execution, result persistence, budgeting, or queued AI follow-up resolves as `document-in-feature` or `follow-up-spec`, not as hidden scope growth. + +## Phase 1: Setup (Shared Context) + +**Purpose**: Confirm the bounded first slice, repo seams, and reviewer stop conditions before runtime implementation begins. + +- [x] T001 Review the bounded slice, explicit non-goals, approved use cases, validation lanes, and guardrail expectations in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/spec.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/research.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/data-model.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/contracts/private-ai-governance.openapi.yaml`, and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` +- [x] T002 [P] Confirm the existing workspace settings persistence, resolver, and audit seams that this slice must reuse in `apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php`, `apps/platform/app/Support/Settings/SettingsRegistry.php`, `apps/platform/app/Services/Settings/SettingsResolver.php`, `apps/platform/app/Services/Settings/SettingsWriter.php`, `apps/platform/app/Support/Audit/AuditActionId.php`, and `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php` +- [x] T003 [P] Confirm the existing operational-control, platform authorization, and guard-test seams that this slice must extend in `apps/platform/app/Filament/System/Pages/Ops/Controls.php`, `apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php`, `apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php`, `apps/platform/app/Support/Auth/Capabilities.php`, `apps/platform/app/Support/Auth/PlatformCapabilities.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php`, `apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php`, and `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php` + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Add the shared AI policy, decision, audit, and operational-stop primitives that every user story depends on. + +**Critical**: No user story work should begin until this phase is complete. + +- [x] T004 [P] Add the `ai.policy_mode` setting definition, allowed values, system default, and resolver plumbing in `apps/platform/app/Support/Settings/SettingsRegistry.php` and `apps/platform/app/Services/Settings/SettingsResolver.php` +- [x] T005 [P] Create the bounded AI support namespace for policy modes, provider classes, data classifications, and request/decision value objects under `apps/platform/app/Support/Ai/` +- [x] T006 Implement the code-owned approved-use-case catalog locked to `product_knowledge.answer_draft` and `support_diagnostics.summary_draft` in `apps/platform/app/Support/Ai/AiUseCaseCatalog.php` and companion definition files under `apps/platform/app/Support/Ai/` +- [x] T007 Implement the governed AI execution boundary so host-surface authorization stays a caller-side precondition, then evaluate `ai.execution`, workspace policy, use-case registration, provider class, and data-classification allowance in `apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php` +- [x] T008 [P] Add the bounded AI decision audit action and metadata-shaping support without prompt, source-payload, or output persistence in `apps/platform/app/Support/Audit/AuditActionId.php`, `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php`, and `apps/platform/app/Support/Ai/` +- [x] T009 [P] Add the `ai.execution` operational-control definition and evaluator lookup path in `apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php` and `apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php` + +**Checkpoint**: Shared workspace policy, governed AI decision, audit metadata, and runtime stop primitives exist; user stories can now proceed independently. + +--- + +## Phase 3: User Story 1 - Set Workspace AI Posture Once (Priority: P1) MVP + +**Goal**: Let a workspace owner or manager set one explicit workspace AI posture on the existing settings surface before any later AI-assisted workflow is added. + +**Independent Test**: Open `/admin/settings/workspace`, save `disabled` and `private_only`, verify the resolved explanation and approved-use-case summary update on the existing settings page, and confirm authorized and unauthorized actors still get the expected settings semantics. + +### Tests for User Story 1 + +- [x] T010 [P] [US1] Add feature coverage for saving, resetting, and rendering the workspace AI policy section on the existing settings page in `apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php` +- [x] T011 [P] [US1] Extend positive and negative workspace-settings authorization coverage so non-members stay `404`, members without manage capability stay `403`, and authorized managers can mutate `ai.policy_mode` in `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php`, and `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php` +- [x] T012 [P] [US1] Extend workspace-settings audit coverage for AI policy mode updates and resets in `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php` + +### Implementation for User Story 1 + +- [x] T013 [US1] Add the `Workspace AI policy` section, approved use-case summary, allowed provider-class summary, and blocked data-class explanation to `apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php` +- [x] T014 [US1] Persist `ai.policy_mode` through the existing audited settings stack in `apps/platform/app/Support/Settings/SettingsRegistry.php`, `apps/platform/app/Services/Settings/SettingsResolver.php`, and `apps/platform/app/Services/Settings/SettingsWriter.php` +- [x] T015 [US1] Keep page-level save and reset behavior, helper text, and default-visible policy explanation derived from the central AI catalog instead of page-local strings in `apps/platform/app/Filament/Pages/Settings/WorkspaceSettings.php` and `apps/platform/app/Support/Ai/` + +**Checkpoint**: User Story 1 is independently functional when the workspace settings page owns one explicit AI posture with correct audit and authorization behavior. + +--- + +## Phase 4: User Story 2 - Block Unsafe AI Requests Before Provider Resolution (Priority: P1) + +**Goal**: Force every in-scope AI request through one governed allow-or-block decision so unregistered use cases, blocked trust classes, and blocked data classifications never reach provider resolution. + +**Independent Test**: Exercise the governed AI boundary with approved and blocked request combinations, verify allowed private-only requests use only approved source families, and prove blocked requests never resolve a provider call. + +### Tests for User Story 2 + +- [x] T016 [P] [US2] Add unit coverage for the approved-use-case catalog and declared provider-class and data-classification rules in `apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php` +- [x] T017 [P] [US2] Add unit coverage for boundary precedence across missing workspace context, unregistered use cases, blocked provider classes, blocked data classifications, `disabled`, `private_only`, and allowed private-only requests in `apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php` +- [x] T018 [P] [US2] Add unit coverage for AI decision audit metadata shape and explicit exclusion of prompt text, raw source payloads, raw provider payloads, and output text in `apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php` +- [x] T019 [P] [US2] Add architecture-guard coverage that no direct AI provider call or vendor-specific runtime entry appears outside the governed boundary in `apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php` + +### Implementation for User Story 2 + +- [x] T020 [US2] Finalize the governed request and decision contract plus no-provider-resolution behavior inside `apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php` and its request/decision collaborators under `apps/platform/app/Support/Ai/` +- [x] T021 [US2] Expose only approved source-family inputs for `product_knowledge.answer_draft` and `support_diagnostics.summary_draft` from `apps/platform/app/Support/ProductKnowledge/ContextualHelpResolver.php` and `apps/platform/app/Support/SupportDiagnostics/SupportDiagnosticBundleBuilder.php` without adding customer-facing AI UI, public-provider execution, or result persistence +- [x] T022 [US2] Route governed AI decision evaluation through the existing audit pipeline with stable allow-or-block metadata and no prompt/output persistence in `apps/platform/app/Support/Audit/AuditActionId.php`, `apps/platform/app/Services/Audit/WorkspaceAuditLogger.php`, and `apps/platform/app/Support/Ai/` + +**Checkpoint**: User Story 2 is independently functional when the central AI boundary blocks unsafe requests before provider resolution and records bounded audit metadata only. + +--- + +## Phase 5: User Story 3 - Pause All AI Execution Centrally During An Incident (Priority: P2) + +**Goal**: Let a platform operator pause and resume new AI execution from the existing system operational-controls surface without introducing a second AI admin console. + +**Independent Test**: Pause `ai.execution` on `/system/ops/controls`, verify an otherwise valid governed AI request blocks with the operational-control reason, then resume the control and verify normal policy evaluation resumes. + +### Tests for User Story 3 + +- [x] T023 [P] [US3] Add feature coverage for pausing and resuming `ai.execution` on the existing controls page, including confirmation-backed state changes and visible control history, in `apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php` and `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php` +- [x] T024 [P] [US3] Extend positive and negative platform authorization coverage so `platform.access_system_panel` plus `platform.ops.controls.manage` remain authoritative for `ai.execution` pause/resume in `apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php` +- [x] T025 [P] [US3] Extend governed-boundary coverage so an active `ai.execution` control blocks otherwise valid requests until the control is resumed in `apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php` + +### Implementation for User Story 3 + +- [x] T026 [US3] Add the `ai.execution` control definition, operator-facing label, global-only scope, and evaluator lookup semantics in `apps/platform/app/Support/OperationalControls/OperationalControlCatalog.php` and `apps/platform/app/Support/OperationalControls/OperationalControlEvaluator.php` +- [x] T027 [US3] Add the AI execution control card plus confirmation-protected `Pause AI execution` and `Resume AI execution` actions to the existing system controls surface in `apps/platform/app/Filament/System/Pages/Ops/Controls.php` +- [x] T028 [US3] Keep operational-control copy, blocked-reason vocabulary, and control-history presentation aligned across `apps/platform/app/Filament/System/Pages/Ops/Controls.php` and `apps/platform/app/Support/Ai/GovernedAiExecutionBoundary.php` without introducing a new AI capability string or system AI console + +**Checkpoint**: User Story 3 is independently functional when the existing system controls page can pause and resume new AI execution and the boundary honors that stop immediately for new requests. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +**Purpose**: Finish narrow validation, formatting, and reviewer close-out without widening scope. + +- [x] T029 [P] Run the focused unit validation commands recorded in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` for `apps/platform/tests/Unit/Support/Ai/AiUseCaseCatalogTest.php`, `apps/platform/tests/Unit/Support/Ai/AiDecisionAuditMetadataTest.php`, and `apps/platform/tests/Unit/Support/Ai/GovernedAiExecutionBoundaryTest.php` +- [x] T030 [P] Run the focused workspace-settings validation commands recorded in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` for `apps/platform/tests/Feature/SettingsFoundation/WorkspaceAiPolicySettingsTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsManageTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsViewOnlyTest.php`, `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsNonMemberNotFoundTest.php`, and `apps/platform/tests/Feature/SettingsFoundation/WorkspaceSettingsAuditTest.php` +- [x] T031 [P] Run the focused system-control and architecture-guard validation commands recorded in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` for `apps/platform/tests/Feature/System/OpsControls/AiExecutionOperationalControlTest.php`, `apps/platform/tests/Feature/System/OpsControls/OperationalControlManagementTest.php`, `apps/platform/tests/Feature/OperationalControls/OperationalControlAuthorizationSemanticsTest.php`, and `apps/platform/tests/Feature/Guards/NoDirectAiProviderBypassTest.php` +- [x] T032 Run dirty-only formatting for touched platform files through `apps/platform/vendor/bin/sail` using the Pint command recorded in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` +- [x] T033 Record the TEST-GOV-001 outcome, guardrail close-out, and any `document-in-feature` or `follow-up-spec` deferrals for public-provider execution, result persistence, budgeting, or queued AI work in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/plan.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/248-private-ai-policy-foundation/quickstart.md` + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: no dependencies; start immediately. +- **Phase 2 (Foundational)**: depends on Phase 1 and blocks all user stories. +- **Phase 3 (US1)**: depends on Phase 2 and establishes the workspace-owned policy truth. +- **Phase 4 (US2)**: depends on Phase 2 and should ship with US1 because policy without a governed boundary would create false confidence. +- **Phase 5 (US3)**: depends on Phase 2 and is safest after US2 because the boundary must already honor `ai.execution` for the system control to be meaningful. +- **Phase 6 (Polish)**: depends on all desired user stories being complete. + +### User Story Dependencies + +- **US1 (P1)**: independently testable after Phase 2, but not safe to ship alone. +- **US2 (P1)**: independently testable after Phase 2 and must pair with US1 for a safe MVP. +- **US3 (P2)**: independently testable after Phase 2, but depends on the governed boundary from US2 to prove runtime stop behavior. + +### Within Each User Story + +- Write the listed Pest coverage first and make it fail for the intended behavior gap. +- Complete shared service enforcement before wiring the corresponding Filament surface. +- Re-run the narrowest affected validation command after each story checkpoint before moving to the next story. + +--- + +## Parallel Execution Examples + +### User Story 1 + +- T010, T011, and T012 can run in parallel before runtime edits begin. +- After test scaffolding exists, T013 and T014 can proceed in parallel because the page wiring and settings-stack persistence touch different files; T015 should follow both. + +### User Story 2 + +- T016, T017, T018, and T019 can run in parallel because they cover separate unit and guard files. +- After T020 settles the governed contract, T021 and T022 can proceed in parallel because source-family helpers and audit plumbing live on separate seams. + +### User Story 3 + +- T023, T024, and T025 can run in parallel before implementation starts. +- T026 should land before T027, and T028 should follow both so control-surface wording and boundary reason vocabulary stay consistent. + +--- + +## Implementation Strategy + +### Suggested MVP Scope + +- MVP = **US1 + US2 together**. Workspace policy alone is not safe to ship because the spec explicitly requires the governed boundary that enforces the policy before any provider resolution can occur. + +### Incremental Delivery + +1. Complete Phase 1 and Phase 2. +2. Deliver US1 and US2 together, then validate the settings-backed policy plus governed boundary behavior. +3. Deliver US3 to add the runtime stop on the existing system controls surface. +4. Finish with narrow validation, formatting, and feature-level close-out in Phase 6. + +### Team Strategy + +1. Finish Phase 2 together before splitting story work. +2. Parallelize test authoring inside each story first. +3. Serialize merges around `apps/platform/app/Support/Ai/` and `apps/platform/app/Filament/System/Pages/Ops/Controls.php`, because those seams are shared by multiple story tasks. \ No newline at end of file