diff --git a/apps/platform/app/Filament/Pages/Monitoring/Operations.php b/apps/platform/app/Filament/Pages/Monitoring/Operations.php index 5e5059e7..8d19c3cc 100644 --- a/apps/platform/app/Filament/Pages/Monitoring/Operations.php +++ b/apps/platform/app/Filament/Pages/Monitoring/Operations.php @@ -41,7 +41,6 @@ use Filament\Tables\Contracts\HasTable; use Filament\Tables\Table; use Illuminate\Database\Eloquent\Builder; -use Illuminate\Support\Collection; use Illuminate\Support\Str; use UnitEnum; @@ -115,6 +114,21 @@ class Operations extends Page implements HasForms, HasTable 'localOnlyStateKeys' => [], ]; + private const WORKBENCH_CANDIDATE_COLUMNS = [ + 'id', + 'workspace_id', + 'managed_environment_id', + 'user_id', + 'initiator_name', + 'type', + 'status', + 'outcome', + 'started_at', + 'completed_at', + 'created_at', + 'updated_at', + ]; + public string $activeTab = 'all'; /** @@ -378,66 +392,65 @@ private function topOperationFromQuery(?callable $scope = null, bool $sortByAtte return null; } - $query - ->with('tenant') - ->latest('id') - ->limit(50); - if ($scope !== null) { $query = $scope($query); } - /** @var Collection $runs */ - $runs = $query->get(); + $query->select(self::WORKBENCH_CANDIDATE_COLUMNS); - if ($runs->isEmpty()) { + if ($sortByAttention) { + $this->orderWorkbenchCandidateByAttention($query); + } else { + $query->latest('id'); + } + + $candidate = $query->first(); + + return $candidate instanceof OperationRun + ? $this->hydrateWorkbenchOperation((int) $candidate->getKey()) + : null; + } + + private function orderWorkbenchCandidateByAttention(Builder $query): Builder + { + return $query + ->orderByRaw( + 'case + when outcome = ? then 50 + when outcome = ? then 40 + when outcome = ? then 30 + when status in (?, ?) then 20 + when status = ? then 10 + else 0 + end desc', + [ + OperationRunOutcome::Blocked->value, + OperationRunOutcome::Failed->value, + OperationRunOutcome::PartiallySucceeded->value, + OperationRunStatus::Queued->value, + OperationRunStatus::Running->value, + OperationRunStatus::Completed->value, + ], + ) + ->orderByRaw('created_at is null asc') + ->latest('created_at') + ->latest('id'); + } + + private function hydrateWorkbenchOperation(int $operationRunId): ?OperationRun + { + $query = $this->scopedSummaryQuery(); + + if (! $query instanceof Builder) { return null; } - if (! $sortByAttention) { - return $runs->first(); - } - - return $runs - ->sort(function (OperationRun $left, OperationRun $right): int { - return [ - $this->attentionPriority($right), - $right->created_at?->getTimestamp() ?? 0, - (int) $right->getKey(), - ] <=> [ - $this->attentionPriority($left), - $left->created_at?->getTimestamp() ?? 0, - (int) $left->getKey(), - ]; - }) + return $query + ->with(['tenant', 'user']) + ->whereKey($operationRunId) ->first(); } - private function attentionPriority(OperationRun $run): int - { - if ((string) $run->outcome === OperationRunOutcome::Blocked->value) { - return 50; - } - - if ((string) $run->outcome === OperationRunOutcome::Failed->value) { - return 40; - } - - if ((string) $run->outcome === OperationRunOutcome::PartiallySucceeded->value) { - return 30; - } - - if ($run->problemClass() === OperationRun::PROBLEM_CLASS_ACTIVE_STALE_ATTENTION) { - return 20; - } - - if ($run->problemClass() === OperationRun::PROBLEM_CLASS_TERMINAL_FOLLOW_UP) { - return 10; - } - - return 0; - } - /** * @return array */ diff --git a/apps/platform/app/Filament/Resources/OperationRunResource.php b/apps/platform/app/Filament/Resources/OperationRunResource.php index 2c13c789..2b9a25b6 100644 --- a/apps/platform/app/Filament/Resources/OperationRunResource.php +++ b/apps/platform/app/Filament/Resources/OperationRunResource.php @@ -9,6 +9,7 @@ use App\Models\RestoreRun; use App\Models\User; use App\Models\VerificationCheckAcknowledgement; +use App\Services\Auth\ManagedEnvironmentAccessScopeResolver; use App\Support\Badges\BadgeCatalog; use App\Support\Badges\BadgeDomain; use App\Support\Badges\BadgeRenderer; @@ -214,6 +215,13 @@ public static function table(Table $table): Table } return collect($user->getTenants(Filament::getCurrentOrDefaultPanel())) + ->filter(function (ManagedEnvironment $tenant): bool { + $workspaceId = app(WorkspaceContext::class)->currentWorkspaceId(); + + return $workspaceId !== null + && (int) $tenant->workspace_id === (int) $workspaceId + && $tenant->isActive(); + }) ->mapWithKeys(static fn (ManagedEnvironment $tenant): array => [ (string) $tenant->getKey() => $tenant->getFilamentName(), ]) @@ -237,17 +245,11 @@ public static function table(Table $table): Table ->searchable(), Tables\Filters\SelectFilter::make('type') ->options(function (): array { - $workspaceId = app(WorkspaceContext::class)->currentWorkspaceId(); - - if ($workspaceId === null) { - return []; - } - - $types = OperationRun::query() - ->where('workspace_id', (int) $workspaceId) + $types = static::workspaceScopedFilterOptionQuery() ->select('type') ->distinct() ->orderBy('type') + ->limit(100) ->pluck('type', 'type') ->all(); @@ -271,18 +273,12 @@ public static function table(Table $table): Table Tables\Filters\SelectFilter::make('initiator_name') ->label('Initiator') ->options(function (): array { - $workspaceId = app(WorkspaceContext::class)->currentWorkspaceId(); - - if ($workspaceId === null) { - return []; - } - - return OperationRun::query() - ->where('workspace_id', (int) $workspaceId) + return static::workspaceScopedFilterOptionQuery() ->whereNotNull('initiator_name') ->select('initiator_name') ->distinct() ->orderBy('initiator_name') + ->limit(100) ->pluck('initiator_name', 'initiator_name') ->all(); }) @@ -299,6 +295,56 @@ public static function table(Table $table): Table ->emptyStateIcon('heroicon-o-queue-list'); } + private static function workspaceScopedFilterOptionQuery(): Builder + { + $workspaceId = app(WorkspaceContext::class)->currentWorkspaceId(); + $user = auth()->user(); + + $query = OperationRun::query(); + + if (! is_int($workspaceId) || ! $user instanceof User) { + return $query->whereRaw('1 = 0'); + } + + $query->where('workspace_id', $workspaceId); + + $allowedEnvironmentIds = app(ManagedEnvironmentAccessScopeResolver::class) + ->allowedManagedEnvironmentIdsForWorkspace($user, $workspaceId); + + $environmentFilterId = static::requestedEnvironmentFilterId(); + + if ($environmentFilterId !== null) { + if ($allowedEnvironmentIds !== null && ! in_array($environmentFilterId, $allowedEnvironmentIds, true)) { + return $query->whereRaw('1 = 0'); + } + + return $query->where('managed_environment_id', $environmentFilterId); + } + + if ($allowedEnvironmentIds === null) { + return $query; + } + + $allowedEnvironmentIds = array_values(array_unique(array_map('intval', $allowedEnvironmentIds))); + + if ($allowedEnvironmentIds === []) { + return $query->whereRaw('1 = 0'); + } + + return $query->where(function (Builder $query) use ($allowedEnvironmentIds): void { + $query + ->whereNull('managed_environment_id') + ->orWhereIn('managed_environment_id', $allowedEnvironmentIds); + }); + } + + private static function requestedEnvironmentFilterId(): ?int + { + $value = request()->query('environment_id'); + + return is_numeric($value) ? (int) $value : null; + } + private static function enterpriseDetailPage(OperationRun $record): \App\Support\Ui\EnterpriseDetail\EnterpriseDetailPageData { $factory = new \App\Support\Ui\EnterpriseDetail\EnterpriseDetailSectionFactory; diff --git a/apps/platform/tests/Browser/Spec391OperationsHubStabilitySmokeTest.php b/apps/platform/tests/Browser/Spec391OperationsHubStabilitySmokeTest.php new file mode 100644 index 00000000..02e37ee1 --- /dev/null +++ b/apps/platform/tests/Browser/Spec391OperationsHubStabilitySmokeTest.php @@ -0,0 +1,150 @@ +browser()->timeout(60_000); + +it('Spec391 smokes the environment-filtered Operations Hub without debug or asset failure signatures', function (): void { + [$user, $environment] = spec391OperationsHubSmokeFixture(); + + spec391AuthenticateOperationsHubBrowser($this, $user, $environment); + + visit(OperationRunLinks::index($environment)) + ->resize(1440, 1100) + ->waitForText('Operations Hub') + ->assertSee('Environment filter: '.$environment->name) + ->assertSee('Which operation needs attention now?') + ->assertSee('Inventory sync') + ->assertSee('Recent runs') + ->assertDontSee('Spec391 browser hidden sibling operator') + ->assertDontSee('Maximum execution time') + ->assertDontSee('HasAttributes.php') + ->assertDontSee('Stack trace') + ->assertDontSee('spec391 browser raw payload should stay hidden') + ->assertDontSee('spec391 browser stack trace should stay hidden') + ->assertDontSee('filamentSchema is not defined') + ->assertScript('typeof window.Livewire !== "undefined"', true) + ->assertScript('typeof window.Alpine !== "undefined"', true) + ->assertScript('(() => { + const html = document.documentElement.outerHTML; + const urls = Array.from(document.querySelectorAll("a[href], script[src], link[href]")) + .map((element) => element.getAttribute("href") || element.getAttribute("src") || ""); + + return ! html.includes("_debugbar") + && ! html.includes("phpstorm://") + && urls.every((url) => ! url.includes("_debugbar") && ! url.startsWith("phpstorm://")); + })()', true) + ->assertNoJavaScriptErrors() + ->assertNoConsoleLogs() + ->screenshot(true, spec391OperationsHubScreenshot('operations-hub-stability')); + + expect(base_path('tests/Browser/Screenshots/'.spec391OperationsHubScreenshot('operations-hub-stability').'.png')) + ->toBeFile(); +}); + +/** + * @return array{0: User, 1: ManagedEnvironment} + */ +function spec391OperationsHubSmokeFixture(): array +{ + bindFailHardGraphClient(); + + $environment = ManagedEnvironment::factory()->active()->create([ + 'name' => 'Spec391 Browser Stable Environment', + 'external_id' => 'spec391-browser-stable-environment', + ]); + + [$user, $environment] = createUserWithTenant( + tenant: $environment, + role: 'owner', + workspaceRole: 'owner', + ); + + $siblingEnvironment = ManagedEnvironment::factory()->active()->create([ + 'workspace_id' => (int) $environment->workspace_id, + 'name' => 'Spec391 Browser Hidden Sibling', + 'external_id' => 'spec391-browser-hidden-sibling', + ]); + + createUserWithTenant( + tenant: $siblingEnvironment, + user: $user, + role: 'owner', + workspaceRole: 'owner', + ); + + OperationRun::factory()->forTenant($environment)->create([ + 'type' => 'inventory_sync', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => OperationRunOutcome::Blocked->value, + 'initiator_name' => 'Spec391 browser visible operator', + 'context' => [ + 'reason_code' => 'write_gate_blocked', + 'raw_payload' => 'spec391 browser raw payload should stay hidden', + 'stack_trace' => 'spec391 browser stack trace should stay hidden', + 'target_scope' => [ + 'scope_display_name' => 'Spec391 Browser Stable Environment', + ], + ], + 'completed_at' => null, + ]); + + foreach (range(1, 8) as $index) { + OperationRun::factory()->forTenant($environment)->create([ + 'type' => $index % 2 === 0 ? 'policy.sync' : 'backup.schedule.execute', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => $index % 3 === 0 + ? OperationRunOutcome::Failed->value + : OperationRunOutcome::Succeeded->value, + 'initiator_name' => 'Spec391 browser bulk operator '.$index, + 'completed_at' => now()->subMinutes($index), + ]); + } + + OperationRun::factory()->forTenant($siblingEnvironment)->create([ + 'type' => 'restore.execute', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => OperationRunOutcome::Failed->value, + 'initiator_name' => 'Spec391 browser hidden sibling operator', + ]); + + return [$user, $environment]; +} + +function spec391AuthenticateOperationsHubBrowser( + mixed $test, + User $user, + ManagedEnvironment $environment, +): void { + $workspaceId = (int) $environment->workspace_id; + + $session = [ + WorkspaceContext::SESSION_KEY => $workspaceId, + WorkspaceContext::LAST_ENVIRONMENT_IDS_SESSION_KEY => [ + (string) $workspaceId => (int) $environment->getKey(), + ], + SuppressDebugbarForSmokeRequests::SESSION_KEY => SuppressDebugbarForSmokeRequests::COOKIE_VALUE, + ]; + + $test->actingAs($user)->withSession($session); + + foreach ($session as $key => $value) { + session()->put($key, $value); + } + + setAdminPanelContext($environment); +} + +function spec391OperationsHubScreenshot(string $name): string +{ + return 'spec391-'.$name; +} diff --git a/apps/platform/tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php b/apps/platform/tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php new file mode 100644 index 00000000..c92d63d4 --- /dev/null +++ b/apps/platform/tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php @@ -0,0 +1,249 @@ +active()->create([ + 'name' => 'Spec391 Stable Environment', + ]); + [$user, $environment] = createUserWithTenant($environment, role: 'owner', workspaceRole: 'owner'); + + $siblingEnvironment = ManagedEnvironment::factory()->active()->create([ + 'workspace_id' => (int) $environment->workspace_id, + 'name' => 'Spec391 Hidden Sibling Environment', + ]); + createUserWithTenant(tenant: $siblingEnvironment, user: $user, role: 'owner', workspaceRole: 'owner'); + + OperationRun::factory()->forTenant($environment)->create([ + 'type' => 'inventory_sync', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => OperationRunOutcome::Blocked->value, + 'initiator_name' => 'Spec391 allowed operator', + 'context' => [ + 'reason_code' => 'write_gate_blocked', + 'raw_payload' => 'spec391 raw payload should stay hidden', + 'stack_trace' => 'spec391 stack trace should stay hidden', + 'debug_metadata' => 'spec391 debug metadata should stay hidden', + 'target_scope' => [ + 'scope_display_name' => 'Spec391 Stable Environment', + ], + ], + 'failure_summary' => [ + 'exception' => 'spec391 exception detail should stay hidden', + ], + 'completed_at' => null, + ]); + + foreach (range(1, 54) as $index) { + OperationRun::factory()->forTenant($environment)->create([ + 'type' => $index % 2 === 0 ? 'policy.sync' : 'backup.schedule.execute', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => $index % 3 === 0 + ? OperationRunOutcome::Failed->value + : OperationRunOutcome::Succeeded->value, + 'initiator_name' => 'Spec391 bulk operator '.$index, + 'context' => [ + 'target_scope' => [ + 'scope_display_name' => 'Spec391 Stable Environment', + 'scope_identifier' => 'spec391-'.$index, + ], + 'raw_payload' => str_repeat('large-spec391-payload-', 200), + ], + 'completed_at' => now()->subMinutes($index), + ]); + } + + OperationRun::factory()->forTenant($siblingEnvironment)->create([ + 'type' => 'restore.execute', + 'status' => OperationRunStatus::Completed->value, + 'outcome' => OperationRunOutcome::Failed->value, + 'initiator_name' => 'Spec391 hidden sibling operator', + ]); + + $operationsUrl = OperationRunLinks::index($environment); + + expect($operationsUrl) + ->toContain('environment_id='.(int) $environment->getKey()) + ->toContain('/operations'); + + $queries = []; + + DB::flushQueryLog(); + DB::enableQueryLog(); + + try { + $response = assertNoOutboundHttp(function () use ($environment, $operationsUrl, $user) { + return $this + ->actingAs($user) + ->withSession([WorkspaceContext::SESSION_KEY => (int) $environment->workspace_id]) + ->get($operationsUrl); + }); + } finally { + $queries = DB::getQueryLog(); + DB::disableQueryLog(); + } + + $response + ->assertOk() + ->assertSee('Operations Hub') + ->assertSee('Environment filter:') + ->assertSee('Spec391 Stable Environment') + ->assertSee('Inventory sync'); + + foreach ([ + 'Maximum execution time', + 'HasAttributes.php', + 'Stack trace', + '_debugbar', + 'phpstorm://', + 'filamentSchema is not defined', + 'spec391 raw payload should stay hidden', + 'spec391 stack trace should stay hidden', + 'spec391 debug metadata should stay hidden', + 'spec391 exception detail should stay hidden', + 'Spec391 hidden sibling operator', + ] as $signature) { + $response->assertDontSee($signature, false); + } + + $operationRunQueries = collect($queries) + ->pluck('query') + ->map(static fn (string $query): string => mb_strtolower($query)); + + $candidateQueries = $operationRunQueries + ->filter(static fn (string $query): bool => str_contains($query, 'operation_runs') + && str_contains($query, 'case') + && str_contains($query, 'limit 1')) + ->values(); + + expect($candidateQueries)->not->toBeEmpty() + ->and($candidateQueries->first())->not->toContain('select *') + ->and($operationRunQueries->contains( + static fn (string $query): bool => str_contains($query, 'operation_runs') + && str_contains($query, 'limit 50'), + ))->toBeFalse(); +}); + +it('Spec391 renders a controlled empty Operations state for an entitled environment', function (): void { + bindFailHardGraphClient(); + + $environment = ManagedEnvironment::factory()->active()->create([ + 'name' => 'Spec391 Empty Environment', + ]); + [$user, $environment] = createUserWithTenant($environment, role: 'owner', workspaceRole: 'owner'); + + $response = assertNoOutboundHttp(function () use ($environment, $user) { + return $this + ->actingAs($user) + ->withSession([WorkspaceContext::SESSION_KEY => (int) $environment->workspace_id]) + ->get(OperationRunLinks::index($environment)); + }); + + $response + ->assertOk() + ->assertSee('Operations Hub') + ->assertSee('Environment filter:') + ->assertSee('Spec391 Empty Environment') + ->assertSee('No operations need follow-up') + ->assertSee('No failed, blocked, partial, or stale OperationRuns are visible in this scope.') + ->assertSee('No operations found') + ->assertDontSee('environment is healthy') + ->assertDontSee('governance health is complete') + ->assertDontSee('Maximum execution time') + ->assertDontSee('Stack trace'); +}); + +it('Spec391 bounds Operations Hub filter option catalogs to the requested environment scope', function (): void { + $environment = ManagedEnvironment::factory()->active()->create([ + 'name' => 'Spec391 Entitled Environment', + ]); + [$user, $environment] = createUserWithTenant($environment, role: 'owner', workspaceRole: 'owner'); + + $unentitledSibling = ManagedEnvironment::factory()->active()->create([ + 'workspace_id' => (int) $environment->workspace_id, + 'name' => 'Spec391 Unentitled Sibling', + ]); + + $foreignEnvironment = ManagedEnvironment::factory()->active()->create([ + 'name' => 'Spec391 Foreign Environment', + ]); + + OperationRun::factory()->forTenant($environment)->create([ + 'type' => 'inventory_sync', + 'initiator_name' => 'Spec391 entitled operator', + ]); + + OperationRun::factory()->tenantlessForWorkspace($environment->workspace()->firstOrFail())->create([ + 'type' => 'backup.schedule.execute', + 'initiator_name' => 'Spec391 workspace operator', + ]); + + OperationRun::factory()->forTenant($unentitledSibling)->create([ + 'type' => 'policy.sync', + 'initiator_name' => 'Spec391 unentitled operator', + ]); + + OperationRun::factory()->forTenant($foreignEnvironment)->create([ + 'type' => 'restore.execute', + 'initiator_name' => 'Spec391 foreign operator', + ]); + + $this->actingAs($user); + setAdminPanelContext($environment); + session()->put(WorkspaceContext::SESSION_KEY, (int) $environment->workspace_id); + + $component = Livewire::withQueryParams(['environment_id' => (int) $environment->getKey()]) + ->actingAs($user) + ->test(Operations::class); + + $environmentFilter = $component->instance()->getTable()->getFilter('managed_environment_id'); + $typeFilter = $component->instance()->getTable()->getFilter('type'); + $initiatorFilter = $component->instance()->getTable()->getFilter('initiator_name'); + + expect($environmentFilter?->getOptions()) + ->toHaveKey((string) $environment->getKey()) + ->not->toHaveKey((string) $unentitledSibling->getKey()) + ->not->toHaveKey((string) $foreignEnvironment->getKey()); + + expect($typeFilter?->getOptions()) + ->toHaveKey('inventory.sync') + ->not->toHaveKey('backup.schedule.execute') + ->not->toHaveKey('policy.sync') + ->not->toHaveKey('restore.execute'); + + expect($initiatorFilter?->getOptions()) + ->toHaveKey('Spec391 entitled operator') + ->not->toHaveKey('Spec391 workspace operator') + ->not->toHaveKey('Spec391 unentitled operator') + ->not->toHaveKey('Spec391 foreign operator'); +}); + +it('Spec391 rejects same-workspace environment filters when the environment is not entitled', function (): void { + $environment = ManagedEnvironment::factory()->active()->create(); + [$user, $environment] = createUserWithTenant($environment, role: 'owner', workspaceRole: 'owner'); + + $unentitledSibling = ManagedEnvironment::factory()->active()->create([ + 'workspace_id' => (int) $environment->workspace_id, + ]); + + $this + ->actingAs($user) + ->withSession([WorkspaceContext::SESSION_KEY => (int) $environment->workspace_id]) + ->get(route('admin.operations.index', [ + 'workspace' => $environment->workspace, + 'environment_id' => (int) $unentitledSibling->getKey(), + ])) + ->assertNotFound(); +}); diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/spec391-operations-hub-stability.png b/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/spec391-operations-hub-stability.png new file mode 100644 index 00000000..72df2dc0 Binary files /dev/null and b/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/spec391-operations-hub-stability.png differ diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md b/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md new file mode 100644 index 00000000..a1a80790 --- /dev/null +++ b/specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md @@ -0,0 +1,106 @@ +# Spec 391 Verification Notes + +## Preparation Status + +- Created: 2026-06-20. +- Initial branch before Spec Kit script: `platform-dev`. +- Initial latest commit: `c0c3286a feat: add restore readiness resolution adapter improvements (#461)`. +- Initial `git status --short`: clean. +- Spec Kit branch after script: `391-operations-hub-stability-debug-safe-runtime`. +- Preparation only: no application implementation, tests, migrations, seeders, provider syncs, restore jobs, exports, deletes, archives, notifications, or destructive commands were run. +- Implementation start branch: `391-operations-hub-stability-debug-safe-runtime`. +- Implementation start latest commit: `c0c3286a feat: add restore readiness resolution adapter improvements (#461)`. +- Implementation start `git status --short --branch`: branch plus untracked `specs/391-operations-hub-stability-debug-safe-runtime/`. + +## Audit Evidence Source + +- BUG-001: `specs/browser-productization-bug-audit/browser-bug-report.md`. +- BUG-009: `specs/browser-productization-bug-audit/browser-bug-report.md`. +- Known audited route: `http://localhost/admin/workspaces/3/operations?environment_id=4`. +- Observed audit failure: HTTP 500/timeout/debug page with `Maximum execution time of 30 seconds exceeded`. +- Observed runtime leakage: missing Filament globals, Vite dev-client failures, Debugbar requests/source links, and `phpstorm://open` links. +- Latest Laravel error inspected before implementation still showed `Maximum execution time of 30 seconds exceeded` at `Illuminate\Database\Eloquent\Concerns\HasAttributes.php:1577`. + +## Root Cause + +- `Operations::topOperationFromQuery()` selected up to 50 full `OperationRun` models, eager-loaded `tenant`, hydrated JSON-cast attributes, then sorted in PHP with `problemClass()`/freshness-derived accessors. +- `OperationRunResource` type and initiator filter options were workspace-only scans and did not narrow on the durable `environment_id` route prefilter. +- The fix selects one bounded workbench candidate in SQL priority order, hydrates only that selected run for presentation, and bounds filter option scans to workspace entitlement plus the requested `environment_id` where present. + +## Implementation Verification + +| Check | Result | +|---|---| +| Route checked | Generated canonical `/admin/workspaces/{workspace}/operations?environment_id={environment}` with safe fixture ids | +| HTTP status | 200 in feature request and browser smoke | +| Browser render time after authentication | Browser smoke command duration 4.79s / 5.54s including test harness; deterministic feature guard asserts bounded SQL candidate query instead of a hard browser threshold | +| Page title/header | `Operations Hub` visible | +| Visible table/empty state | Seeded table renders `Inventory sync`; no-run environment renders controlled `No operations need follow-up` and `No operations found` states | +| Workspace/environment context | `Environment filter: Spec391 ... Environment` visible and canonical `environment_id` query present | +| Console errors | `assertNoJavaScriptErrors()` and `assertNoConsoleLogs()` passed in browser smoke | +| Network 500s | Operations route returned 200; no direct Pest Browser network log assertion available in this harness | +| Laravel debug page visible | Not visible in feature/browser assertions | +| Stack trace / `Maximum execution time` visible | Not visible in feature/browser assertions | +| Debugbar/source-link leakage in productization-smoke mode | `_debugbar`, `phpstorm://`, source-link/debug signatures not visible; smoke session uses `SuppressDebugbarForSmokeRequests` | +| Missing Filament globals | `window.Livewire` and `window.Alpine` present; `filamentSchema is not defined` not visible; no JS/console errors | +| Vite dev-client failure in productization-smoke mode | No console errors/logs in browser smoke; `PanelThemeAsset` behavior unchanged | +| Safe OperationRun detail action | Existing Operations link tests still pass; Spec 391 render test asserts canonical Operations URL and bounded detail/action surface remains available through existing table behavior | +| Provider mutations / restore jobs / exports / deletes / archives / notifications | None executed | +| Screenshot artifact | Browser test asserts `apps/platform/tests/Browser/Screenshots/spec391-operations-hub-stability.png`; spec artifact copy is stored at `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/spec391-operations-hub-stability.png` | + +## Commands Run + +```bash +cd apps/platform && ./vendor/bin/sail php -l app/Filament/Pages/Monitoring/Operations.php +cd apps/platform && ./vendor/bin/sail php -l app/Filament/Resources/OperationRunResource.php +cd apps/platform && ./vendor/bin/sail php -l tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php +cd apps/platform && ./vendor/bin/sail php -l tests/Browser/Spec391OperationsHubStabilitySmokeTest.php +cd apps/platform && ./vendor/bin/sail artisan test tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php +cd apps/platform && ./vendor/bin/sail artisan test tests/Browser/Spec391OperationsHubStabilitySmokeTest.php +cd apps/platform && ./vendor/bin/sail artisan test tests/Feature/Monitoring/OperationsHubProductizationTest.php tests/Feature/Monitoring/OperationsTenantScopeTest.php tests/Feature/Filament/OperationRunListFiltersTest.php +cd apps/platform && ./vendor/bin/sail pint app/Filament/Pages/Monitoring/Operations.php app/Filament/Resources/OperationRunResource.php tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php tests/Browser/Spec391OperationsHubStabilitySmokeTest.php +git diff --check +``` + +## Final Command Results + +- `Spec391OperationsHubStabilityTest`: 4 passed, 46 assertions. +- `Spec391OperationsHubStabilitySmokeTest`: 1 passed, 17 assertions. +- Adjacent feature suite (`OperationsHubProductizationTest`, `OperationsTenantScopeTest`, `OperationRunListFiltersTest`): 25 passed, 234 assertions. +- `pint`: PASS, 4 files. +- `git diff --check`: PASS. + +## Changed Files + +- `apps/platform/app/Filament/Pages/Monitoring/Operations.php` +- `apps/platform/app/Filament/Resources/OperationRunResource.php` +- `apps/platform/tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php` +- `apps/platform/tests/Browser/Spec391OperationsHubStabilitySmokeTest.php` +- `specs/391-operations-hub-stability-debug-safe-runtime/tasks.md` +- `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md` +- `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/spec391-operations-hub-stability.png` + +## Final Git Status + +```text +## 391-operations-hub-stability-debug-safe-runtime + M apps/platform/app/Filament/Pages/Monitoring/Operations.php + M apps/platform/app/Filament/Resources/OperationRunResource.php +?? apps/platform/tests/Browser/Spec391OperationsHubStabilitySmokeTest.php +?? apps/platform/tests/Feature/Monitoring/Spec391OperationsHubStabilityTest.php +?? specs/391-operations-hub-stability-debug-safe-runtime/ +``` + +## Safety Confirmation + +- No migrations, seeders, provider syncs, provider mutations, restore jobs, exports, deletes, archives, force-deletes, notifications, customer-facing delivery actions, or destructive commands were executed. +- No PHP `max_execution_time` increase. +- No new persisted entity, enum/status family, operation type, summary-count key, lifecycle semantic, cache layer, provider registration, panel path, or global-search posture change. +- Filament v5 / Livewire v4.0+ compliance preserved; no Livewire v3 or Filament legacy APIs introduced. +- Panel provider registration remains `apps/platform/bootstrap/providers.php`. +- `OperationRunResource` remains non-globally-searchable. + +## Known Limitations + +- The browser test runs inside Sail with the repo mounted read-only at `/var/www/repo`, so it validates the generated Pest Browser screenshot under `apps/platform/tests/Browser/Screenshots/` instead of attempting an unreliable write into repo-level `specs/`. The spec artifact screenshot was copied from that generated browser screenshot on the host side. +- Pest Browser in this harness was used for DOM, JS, and console assertions; direct network-request introspection was not available, so route 200 plus absence of explicit debug/source-link signatures is the recorded network-adjacent proof. diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/checklists/requirements.md b/specs/391-operations-hub-stability-debug-safe-runtime/checklists/requirements.md new file mode 100644 index 00000000..de72e053 --- /dev/null +++ b/specs/391-operations-hub-stability-debug-safe-runtime/checklists/requirements.md @@ -0,0 +1,54 @@ +# Requirements Checklist: Spec 391 - Operations Hub Stability and Debug-Safe Runtime + +**Purpose**: Validate that Spec 391 is scoped, implementable, constitution-aligned, and ready for a later implementation loop. +**Created**: 2026-06-20 +**Feature**: `specs/391-operations-hub-stability-debug-safe-runtime/spec.md` + +## Scope And Candidate Gate + +- [ ] CHK001 The selected candidate is directly backed by browser audit BUG-001 and BUG-009. +- [ ] CHK002 The package does not reopen completed Operations productization specs except as context. +- [ ] CHK003 Out-of-scope Evidence, Provider, Review Pack, Restore, dashboard, system-login branding, and customer-facing delivery topics remain excluded. +- [ ] CHK004 No application implementation is included in the preparation package. + +## UI And Operations Surface + +- [ ] CHK005 The UI Surface Impact decision is coherent and names the existing Operations hub surface. +- [ ] CHK006 Existing UI-016 coverage is treated as still valid unless implementation materially changes route/archetype. +- [ ] CHK007 The Operations hub remains a read-only monitoring/registry surface with no new dangerous actions. +- [ ] CHK008 The environment filter is explicit route/page state, not hidden global context. +- [ ] CHK009 Controlled empty/error/loading states are required without masking the expensive render path or flashing raw framework/debug output. + +## RBAC, Isolation, And OperationRun Truth + +- [ ] CHK010 Workspace membership and environment entitlement are required before rows, counts, filters, or links reveal tenant-bound runs. +- [ ] CHK011 Non-member/non-entitled access remains deny-as-not-found according to existing route contract. +- [ ] CHK012 OperationRun execution truth is preserved; no status/outcome/lifecycle semantics are changed to satisfy performance. +- [ ] CHK013 Existing OperationRun detail/view links remain delegated to `OperationRunLinks` / tenantless viewer paths. + +## Runtime And Test Governance + +- [ ] CHK014 Feature/Livewire tests cover route render, scope, bounded index behavior, no debug page, no Graph calls, empty state, and safe detail links. +- [ ] CHK015 Browser tests cover HTTP success, render timing, console/runtime globals, Vite dev-client failure, Debugbar/source links, stack traces, and network 500s. +- [ ] CHK016 Productization-smoke controls are opt-in and do not disable normal local Debugbar/Vite workflow. +- [ ] CHK017 Fixtures do not require seeders, real provider access, provider syncs, restore jobs, exports, deletes, archives, or notifications. +- [ ] CHK018 Browser lane addition is explicit and bounded to Spec 391. + +## Proportionality And Architecture + +- [ ] CHK019 No new persisted entity, migration, enum/status family, taxonomy, domain abstraction, or cross-domain framework is introduced by the spec. +- [ ] CHK020 Any migration/index need discovered later must update spec/plan before implementation continues. +- [ ] CHK021 The plan requires identifying the actual expensive render path instead of increasing timeouts or adding a catch-all. +- [ ] CHK022 Existing shared OperationRun, badge, table pagination, asset, and smoke-login patterns are reused first. + +## Review Outcome + +- [ ] CHK023 Review outcome class selected: `acceptable-special-case` if artifacts remain bounded; otherwise document finding. +- [ ] CHK024 Workflow outcome selected: `keep` if artifacts remain implementation-ready; otherwise `split` or `document-in-feature`. +- [ ] CHK025 Manual reviewer confirms Spec Readiness Gate before implementation begins. + +## Notes + +- Intended review outcome after preparation: `acceptable-special-case`. +- Intended workflow outcome after preparation: `keep`. +- Final implementation close-out target: `Guardrail / Exception / Smoke Coverage`. diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/plan.md b/specs/391-operations-hub-stability-debug-safe-runtime/plan.md new file mode 100644 index 00000000..77b53cc9 --- /dev/null +++ b/specs/391-operations-hub-stability-debug-safe-runtime/plan.md @@ -0,0 +1,247 @@ +# Implementation Plan: Spec 391 - Operations Hub Stability and Debug-Safe Runtime + +**Branch**: `391-operations-hub-stability-debug-safe-runtime` | **Date**: 2026-06-20 | **Spec**: `specs/391-operations-hub-stability-debug-safe-runtime/spec.md` +**Input**: Feature specification from `/specs/391-operations-hub-stability-debug-safe-runtime/spec.md` + +## Summary + +Stabilize the existing admin Operations hub so the environment-filtered route renders quickly and safely, then add focused productization browser-smoke guardrails for the exact debug/runtime leakage observed in BUG-001 and BUG-009. The work stays inside the Operations render/query/runtime-smoke surface and must not change Evidence, Provider, Review Pack, Restore, dashboard, provider mutation, export, or customer delivery semantics. + +## Technical Context + +**Language/Version**: PHP 8.4.15, Laravel 12.52, Filament 5.2.1, Livewire 4.1.4. +**Primary Dependencies**: Filament v5, Livewire v4, Pest 4, PostgreSQL, existing browser smoke helpers. +**Storage**: Existing PostgreSQL `operation_runs`, `workspaces`, and `managed_environments`; no new storage expected. +**Testing**: Pest 4 feature/Livewire/browser tests. +**Validation Lanes**: fast-feedback/confidence for feature tests; browser for productization smoke; targeted formatting. +**Target Platform**: Laravel admin panel at `/admin`, local Sail/Dokploy-style container runtime. +**Project Type**: Laravel monolith under `apps/platform`. +**Performance Goals**: Operations route under 3 seconds after auth for audited data shape; bounded/paginated index render. +**Constraints**: No migrations unless proven and spec/plan updated first; no seeders; no queues/jobs that mutate provider/customer state; no Graph/provider calls in render; do not increase PHP max execution time. +**Scale/Scope**: Existing Operations hub, environment-filtered route, runtime-smoke checks. + +## UI / Surface Guardrail Plan + +- **Guardrail scope**: changed existing operator-facing Operations surface plus workflow-only productization browser smoke guardrail. +- **Affected routes/pages/actions/states/navigation/panel/provider surfaces**: + - `/admin/workspaces/{workspace}/operations` + - `/admin/workspaces/{workspace}/operations?environment_id={managedEnvironment}` + - `App\Filament\Pages\Monitoring\Operations` + - `App\Filament\Resources\OperationRunResource` + - Existing dashboard/workspace drilldowns that link to Operations + - Productization-smoke browser route checks +- **No-impact class, if applicable**: N/A. +- **Native vs custom classification summary**: Native Filament page/table/resource plus existing Operations Blade composition; no new visual system. +- **Shared-family relevance**: OperationRun monitoring/detail family, action links, status badges, browser-smoke runtime guard. +- **State layers in scope**: URL-query `environment_id`, page/table filters, session filter state where already used, browser console/network/DOM assertions. +- **Audience modes in scope**: operator-MSP, manager, support-platform. +- **Decision/diagnostic/raw hierarchy plan**: Operations default-visible list/workbench remains decision-first; raw context, stack traces, provider payloads, and source links remain diagnostic-only or absent from productization-smoke output. +- **Raw/support gating plan**: no new raw/support exposure; smoke must fail if debug pages/source links/raw stack traces become visible. +- **One-primary-action / duplicate-truth control**: preserve existing open/detail action as the dominant safe next step; do not add competing retry/export/destructive actions. +- **Handling modes by drift class or surface**: review-mandatory for Operations render path and runtime-smoke guard; report-only for existing UI-016 coverage unless implementation materially changes route/archetype. +- **Repository-signal treatment**: review-mandatory because this touches a strategic monitoring surface and adds Browser lane proof. +- **Special surface test profiles**: `monitoring-state-page` and `global-context-shell`. +- **Required tests or manual smoke**: Feature/Livewire render/scoping/bounded tests plus Browser productization smoke. +- **Exception path and spread control**: none expected. +- **Active feature PR close-out entry**: Guardrail / Exception / Smoke Coverage. +- **UI/Productization coverage decision**: Existing UI-016 coverage remains valid; implementation must update audit registry only if visible archetype/route changes exceed stability-state changes. +- **Coverage artifacts to update**: none by default; screenshots under the spec artifacts folder for final browser verification. +- **No-impact rationale**: N/A. +- **Navigation / Filament provider-panel handling**: no panel provider changes; provider registration remains `apps/platform/bootstrap/providers.php`. +- **Screenshot or page-report need**: screenshot required for final smoke evidence; no full page report unless implementation changes the Operations page archetype. + +## Shared Pattern & System Fit + +- **Cross-cutting feature marker**: yes, bounded. +- **Systems touched**: Operations hub, OperationRunResource table/list rendering, OperationRun links/presenters, productization browser smoke, Debugbar/Vite asset-smoke controls. +- **Shared abstractions reused**: `OperationRunLinks`, `OperationUxPresenter`, `BadgeCatalog`, `BadgeRenderer`, `TablePaginationProfiles`, `SuppressDebugbarForSmokeRequests`, `PanelThemeAsset`, existing Pest Browser smoke patterns. +- **New abstraction introduced? why?**: none expected. If needed, add only a small test/support helper for productization-smoke runtime assertions. +- **Why the existing abstraction was sufficient or insufficient**: Existing OperationRun UI semantics are sufficient; existing smoke coverage missed BUG-001/BUG-009 under the audited route and runtime mode. +- **Bounded deviation / spread control**: Any new smoke helper must be test/support-local, explicitly opt-in, and must not disable normal local Debugbar/Vite behavior. + +## OperationRun UX Impact + +- **Touches OperationRun start/completion/link UX?**: yes, link/render path only. +- **Central contract reused**: `OperationRunLinks`, existing tenantless OperationRun detail viewer, OperationRunResource table conventions. +- **Delegated UX behaviors**: `Open operation` / `View run` URL resolution stays delegated to existing helpers; no queued toast or terminal notification change. +- **Surface-owned behavior kept local**: environment filter application, bounded list rendering, controlled empty/error/loading state, browser runtime assertions. +- **Queued DB-notification policy**: N/A. +- **Terminal notification path**: N/A. +- **Exception path**: none. + +## Provider Boundary & Portability Fit + +- **Shared provider/platform boundary touched?**: no. +- **Provider-owned seams**: none. +- **Platform-core seams**: OperationRun execution truth and Operations monitoring view only. +- **Neutral platform terms / contracts preserved**: workspace, managed environment, operation, OperationRun, execution truth. +- **Retained provider-specific semantics and why**: none added. +- **Bounded extraction or follow-up path**: none. + +## Constitution Check + +- Inventory-first: N/A, no inventory truth changes. +- Read/write separation: read-only render/smoke work only; no provider/customer mutations. +- Graph contract path: no Graph calls; render path must remain DB-only. +- Deterministic capabilities: existing entitlement/capability paths retained. +- RBAC-UX: admin plane route, workspace membership, environment entitlement, 404 not-found semantics for non-entitled scopes; UI visibility is not authorization. +- Workspace isolation: Operations query and summary/filter options must scope by current workspace before rows render. +- Tenant isolation: tenant-bound runs must be visible only when actor is entitled to referenced managed environment. +- Run observability: no new OperationRun creation/status transition; existing OperationRun truth remains the source. +- OperationRun start UX: no start UX change; links reuse central helpers. +- Ops-UX lifecycle: no `OperationRun.status` / `OperationRun.outcome` transitions. +- Ops-UX summary counts: no new keys; default list render must not parse large summary/context payloads unnecessarily. +- Automation: no queues/jobs are triggered by this spec. +- Data minimization: debug pages, stack traces, raw context, provider payloads, `_debugbar`, and source links must not appear in productization-smoke mode. +- Test governance: Feature + Browser lanes are explicit and bounded. +- Proportionality: no new persistence, domain abstraction, status family, taxonomy, or cross-domain framework. +- Filament-native UI: preserve native Filament table/page/resource semantics; no new ad-hoc status styling. +- UI/Productization coverage: existing UI-016 coverage remains valid unless implementation discovers material route/archetype change. + +## Test Governance Check + +- **Test purpose / classification by changed surface**: Feature/Livewire for render/scoping/bounded query proof; Browser for runtime/debug leakage; Unit only if a helper is introduced. +- **Affected validation lanes**: fast-feedback/confidence and browser. +- **Why this lane mix is the narrowest sufficient proof**: Feature tests catch deterministic server render/scoping/performance issues; Browser test catches JS globals, Vite dev-client, Debugbar/source-link, and visible debug page regressions. +- **Narrowest proving command(s)**: + - `cd apps/platform && php vendor/bin/pest tests/Feature/Monitoring/Spec391OperationsHubRendersWithEnvironmentFilterTest.php` + - `cd apps/platform && php vendor/bin/pest tests/Feature/Monitoring/Spec391OperationRunResourceIndexPerformanceTest.php` + - `cd apps/platform && php artisan test --compact tests/Browser/Spec391OperationsHubProductizationSmokeTest.php` + - `cd apps/platform && php vendor/bin/pint --test ` + - `git diff --check` +- **Fixture / helper / factory / seed / context cost risks**: Use factories and smoke-login helpers; no seeders; no provider setup; no real Graph; no queue mutation. +- **Expensive defaults or shared helper growth introduced?**: no; any browser helper must be explicit and local. +- **Heavy-family additions, promotions, or visibility changes**: one explicit browser smoke file. +- **Surface-class relief / special coverage rule**: special `monitoring-state-page` / `global-context-shell` coverage required. +- **Closing validation and reviewer handoff**: reviewers should check render timing/query bounds, runtime smoke assertions, and no unrelated semantic changes. +- **Budget / baseline / trend follow-up**: document actual render timing and whether lower-level guard substitutes for CI browser timing. +- **Review-stop questions**: Did implementation fix the expensive path, or merely catch/mask it? Did any helper widen browser/default setup? Did any provider/evidence/review/restore semantics change? +- **Escalation path**: document-in-feature. +- **Active feature PR close-out entry**: Guardrail / Exception / Smoke Coverage. +- **Why no dedicated follow-up spec is needed**: This is a direct audit-regression fix with bounded smoke guardrails; broader BUG-009/system branding follow-up remains separate if needed. + +## Project Structure + +### Documentation (this feature) + +```text +specs/391-operations-hub-stability-debug-safe-runtime/ +├── spec.md +├── plan.md +├── tasks.md +├── checklists/ +│ └── requirements.md +└── artifacts/ + ├── verification.md + └── screenshots/ +``` + +### Source Code (repository root) + +Implementation is expected to remain in existing Laravel app and test paths: + +```text +apps/platform/app/Filament/Pages/Monitoring/Operations.php +apps/platform/app/Filament/Resources/OperationRunResource.php +apps/platform/app/Models/OperationRun.php +apps/platform/app/Http/Middleware/SuppressDebugbarForSmokeRequests.php +apps/platform/app/Support/Filament/PanelThemeAsset.php +apps/platform/tests/Feature/Monitoring/ +apps/platform/tests/Browser/ +apps/platform/tests/Unit/Filament/ +``` + +**Structure Decision**: Existing Laravel/Filament app structure under `apps/platform`; no new base folders and no migrations expected. + +## Complexity Tracking + +| Violation | Why Needed | Simpler Alternative Rejected Because | +|-----------|------------|-------------------------------------| +| N/A | No constitution violation planned | N/A | + +## Proportionality Review + +- **Current operator problem**: A common Operations drilldown fails with timeout/500/debug page and productization browser validation is polluted by debug/runtime leakage. +- **Existing structure is insufficient because**: Existing route/tests did not catch environment-filtered render-path cost or productization-smoke runtime leakage. +- **Narrowest correct implementation**: Stabilize existing query/render path and add focused runtime leak assertions. +- **Ownership cost created**: Small targeted test/browser smoke upkeep. +- **Alternative intentionally rejected**: Increase timeout, hide route, broad catch-all, broad UI redesign, broad productization infrastructure rewrite. +- **Release truth**: Current productization blocker. + +## Technical Approach + +1. Reproduce or confirm BUG-001 in browser/Playwright or by targeted route render before editing. +2. Inspect the current render path: + - `Operations::decisionWorkbench()` + - `Operations::selectedWorkbenchOperation()` + - `Operations::topOperationFromQuery()` + - `Operations::summaryCount()` + - `Operations::table()` + - `OperationRunResource::table()` + - OperationRun accessors/casts used by status/outcome/next-action/scope columns. +3. Identify the expensive path rather than masking it. Likely investigation areas: + - `dashboardNeedsFollowUp()` and current terminal/actionability scopes. + - `topOperationFromQuery()` fetching up to 50 full rows and sorting with `requiresOperatorReview()` / `problemClass()` in PHP. + - Table columns invoking `actionDecision()`, `primaryActionUrl()`, `targetScopeDisplay()`, `history*Description()`, or badge renderers for every visible row. + - `context`, `failure_summary`, and `summary_counts` JSON casts hydrated by `select *`. + - Filter option queries for type/initiator scanning historical rows. + - Relationship access for tenant/user/related artifacts. +4. Fix by bounding and scoping: + - Apply workspace/environment entitlement in base queries. + - Keep pagination and page-size profile. + - Use selective eager loading only for relationships actually displayed. + - Avoid full JSON hydration on index rows where possible. + - Move heavy proof/diagnostic work to detail or collapsed/support surfaces. + - Replace PHP sorting over hydrated runs with query-level ordering or a smaller deterministic candidate set when possible. +5. Add controlled states: + - No-runs empty state for active scope. + - Productization-safe non-debug failure assertions. + - No false health claims. +6. Add productization-smoke path: + - Prefer existing smoke-login and `SuppressDebugbarForSmokeRequests`. + - Prefer existing `PanelThemeAsset` / built asset fallback behavior. + - Fail on the exact BUG-009 signatures in smoke mode only. + +## Data / Migration Implications + +- No migrations are expected. +- If an index becomes necessary to meet the render budget, stop and update `spec.md` and `plan.md` with the proven query plan, migration safety, rollback/forward notes, and PostgreSQL lane coverage before implementing the migration. + +## Rollout Considerations + +- No environment variables are expected unless implementation proves a narrow productization-smoke-only flag is needed. +- No queue, scheduler, storage, or provider credential changes. +- Normal local Debugbar/Vite developer workflow must remain unchanged outside explicit productization-smoke sessions. +- Deployment asset strategy remains normal Filament/Vite deployment; if assets are registered or changed, include `cd apps/platform && php artisan filament:assets` in deploy notes. + +## Risk Controls + +- Do not change OperationRun lifecycle/status/outcome semantics. +- Do not add new operation types or summary-count keys. +- Do not add unscoped cache. +- Do not call Graph or remote provider clients from render. +- Do not dispatch provider/restore/export jobs. +- Do not rewrite completed Operations productization specs. +- Use browser as final source of truth for route status/runtime leakage. + +## Implementation Phases + +### Phase 1 - Baseline and focused regression tests + +Confirm current failure or relevant logs, then add failing feature/browser tests around environment-filtered render, scoping, bounded rows, and runtime leakage. + +### Phase 2 - Operations render-path stabilization + +Optimize only the existing Operations query/table/workbench path. Preserve user-visible workbench semantics while eliminating unbounded scans, heavy per-row JSON/accessor work, and unrelated relationship traversal. + +### Phase 3 - Controlled states and safe detail links + +Ensure empty/error/loading states are clear and that safe OperationRun detail links still work for authorized records. + +### Phase 4 - Productization-smoke runtime guardrail + +Make the browser smoke fail on BUG-009 signatures in productization-smoke mode without breaking normal local development. + +### Phase 5 - Verification and close-out + +Run targeted tests, formatting checks, browser smoke, direct route verification, and complete `artifacts/verification.md`. diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/spec.md b/specs/391-operations-hub-stability-debug-safe-runtime/spec.md new file mode 100644 index 00000000..7c23c0cf --- /dev/null +++ b/specs/391-operations-hub-stability-debug-safe-runtime/spec.md @@ -0,0 +1,397 @@ +# Feature Specification: Spec 391 - Operations Hub Stability and Debug-Safe Runtime + +**Feature Branch**: `391-operations-hub-stability-debug-safe-runtime` +**Created**: 2026-06-20 +**Status**: Draft +**Input**: User-provided Spec 391 prompt plus browser productization audit BUG-001 and BUG-009. + +## Problem + +The Operations hub times out under environment filtering and exposes debug/runtime leakage during productization audit. + +## Goals + +- Stable Operations render for the audited environment-filtered route. +- Scoped environment filtering with bounded, paginated index rendering. +- Controlled empty, error, and loading states. +- Browser-smoke guard for debug/runtime leakage. +- No destructive, provider-mutating, restore, export, or customer-delivery side effects. + +## Non-Goals + +- Evidence/provider/review-pack semantics. +- Restore workflow redesign. +- System login branding. +- Broad app-wide UI overhaul. +- Broad productization infrastructure redesign beyond the narrow smoke path required for this regression. + +## Spec Candidate Check *(mandatory - SPEC-GATE-001)* + +- **Problem**: The admin Operations hub can time out under an environment filter and expose a raw Laravel debug page, while productization browser audits are polluted by Debugbar/source links, missing Filament globals, and Vite dev-client failures. +- **Today's failure**: `/admin/workspaces/3/operations?environment_id=4` was observed taking roughly 40 seconds, returning HTTP 500, and showing a debug page with `Maximum execution time of 30 seconds exceeded`; browser logs also showed missing Filament/Alpine globals and debug/source-link leakage. +- **User-visible improvement**: Operators can open the Operations hub from dashboard/workspace drilldowns and receive a bounded, scoped, customer-ready operations list or controlled empty/error state without debug/runtime pollution. +- **Smallest enterprise-capable version**: Stabilize only the existing admin Operations hub and productization-smoke runtime checks needed to catch the audited regression; keep evidence, provider readiness, review pack, restore, and dashboard semantics out of scope. +- **Explicit non-goals**: No Evidence anchor changes, provider permission/readiness semantics, review-pack download gating, Customer Review Workspace labeling, system login branding, restore readiness redesign, broad UI redesign, production infrastructure overhaul, provider mutations, restore jobs, exports, deletes, archives, or notifications. +- **Permanent complexity imported**: No new persisted entity, table, enum/status family, domain abstraction, taxonomy, or operation lifecycle truth is intended. Some focused tests/browser-smoke helpers may be added if existing smoke controls are insufficient. +- **Why now**: The route is a common drilldown from operations/workspace surfaces and currently blocks productization browser validation with a P1 500/timeout. +- **Why not local**: The fix should stay local to Operations render/query/runtime-smoke paths, but it still requires a spec because the route is a strategic operator surface, uses OperationRun execution truth, and adds explicit Browser lane guardrails. +- **Approval class**: Core Enterprise. +- **Red flags triggered**: None requiring defense. Browser-smoke guardrails are bounded to productization validation and do not create a general UI/runtime framework. +- **Score**: Nutzen: 2 | Dringlichkeit: 2 | Scope: 2 | Komplexitaet: 2 | Produktnaehe: 2 | Wiederverwendung: 1 | **Gesamt: 11/12** +- **Decision**: approve. + +## Spec Scope Fields *(mandatory)* + +- **Scope**: canonical-view. +- **Primary Routes**: `/admin/workspaces/{workspace}/operations`, including `?environment_id={managedEnvironment}`. +- **Data Ownership**: `operation_runs` are tenant-owned execution records with `workspace_id` and nullable `managed_environment_id`; the Operations hub is a workspace-context canonical view and must enforce workspace and environment entitlement before revealing rows. +- **RBAC**: Admin plane only. A workspace member may view the workspace Operations route; environment-filtered data must be limited to environments the actor is entitled to view. Non-member or non-entitled workspace/environment access remains deny-as-not-found (404). Member-without-capability semantics for any existing detail/action links remain unchanged. + +For canonical-view specs: + +- **Default filter behavior when tenant-context is active**: `environment_id` is an explicit URL/table filter owned by the Operations page; it must not rely on hidden global environment context, legacy aliases, or remembered tenant state. +- **Explicit entitlement checks preventing cross-tenant leakage**: The query must constrain by current workspace and permitted managed environment ids before rendering rows, summary counts, filter options, or drilldown links. + +## UI Surface Impact *(mandatory - UI-COV-001)* + +Does this spec add, remove, rename, or materially change any reachable UI surface? + +- [ ] No UI surface impact +- [x] Existing page changed +- [ ] New page/route added +- [ ] Navigation changed +- [ ] Filament panel/provider surface changed +- [ ] New modal/drawer/wizard/action added +- [x] New table/form/state added +- [ ] Customer-facing surface changed +- [ ] Dangerous action changed +- [x] Status/evidence/review presentation changed +- [x] Workspace/environment context presentation changed + +## UI/Productization Coverage *(mandatory when UI Surface Impact is not "No UI surface impact"; otherwise write `N/A - no reachable UI surface impact` plus rationale)* + +- **Route/page/surface**: Admin Operations hub, `App\Filament\Pages\Monitoring\Operations`, backed by `App\Filament\Resources\OperationRunResource`. +- **Current or new page archetype**: Existing Operations Hub strategic surface, UI-016. +- **Design depth**: Strategic Surface, but this spec is a stability/runtime guardrail pass rather than a visual redesign. +- **Repo-truth level**: repo-verified for route, page class, resource, OperationRun model, existing browser tests, and audit evidence. +- **Existing pattern reused**: Existing Spec 328 Operations Hub workbench, OperationRun monitoring/detail family, `OperationRunLinks`, `OperationUxPresenter`, `BadgeCatalog` / `BadgeRenderer`, `TablePaginationProfiles`, `SuppressDebugbarForSmokeRequests`, and `PanelThemeAsset` patterns. +- **New pattern required**: none expected; add only narrow productization-smoke assertions/helpers if existing smoke controls cannot express BUG-009 checks. +- **Screenshot required**: yes for final browser smoke if implementation changes visible Operations states; store under `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/`. +- **Page audit required**: no new full page audit by default; this is a regression-stability pass over an existing audited strategic surface. Escalate only if implementation materially changes the page archetype. +- **Customer-safe review required**: no, this route is admin/operator-facing. It still must avoid raw debug pages, stack traces, raw provider secrets, and customer-facing artifact leakage in productization-smoke mode. +- **Dangerous-action review required**: no new dangerous actions. Existing detail/actions must retain existing authorization, confirmation, and audit behavior. +- **Coverage files updated or explicitly not needed**: + - [ ] `docs/ui-ux-enterprise-audit/route-inventory.md` + - [ ] `docs/ui-ux-enterprise-audit/design-coverage-matrix.md` + - [ ] `docs/ui-ux-enterprise-audit/page-reports/...` + - [ ] `docs/ui-ux-enterprise-audit/strategic-surfaces.md` + - [ ] `docs/ui-ux-enterprise-audit/grouped-follow-up-candidates.md` + - [ ] `docs/ui-ux-enterprise-audit/unresolved-pages.md` + - [x] `N/A - existing UI-016 Operations route coverage remains valid unless implementation discovers a material archetype or route change` +- **No-impact rationale when applicable**: N/A. + +## Cross-Cutting / Shared Pattern Reuse *(mandatory when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, alerts, navigation entry points, evidence/report viewers, or any other existing shared operator interaction family; otherwise write `N/A - no shared interaction family touched`)* + +- **Cross-cutting feature?**: yes, bounded. +- **Interaction class(es)**: status messaging, table/list rendering, action links, navigation/drilldown, browser-smoke runtime guardrails. +- **Systems touched**: Operations hub, OperationRun list/detail links, productization browser smoke, Debugbar/Vite/Filament runtime checks. +- **Existing pattern(s) to extend**: OperationRun monitoring family, `OperationRunLinks`, `OperationUxPresenter`, `BadgeCatalog` / `BadgeRenderer`, `TablePaginationProfiles`, `SuppressDebugbarForSmokeRequests`, `PanelThemeAsset`, existing Pest Browser smoke tests. +- **Shared contract / presenter / builder / renderer to reuse**: Existing OperationRun and Filament-native presentation paths; no new shared runtime framework unless a tiny helper is required to keep smoke assertions deterministic. +- **Why the existing shared path is sufficient or insufficient**: Existing paths already own status/action/link semantics; the gap is bounded render performance and runtime-smoke coverage, not a missing domain contract. +- **Allowed deviation and why**: none expected. Any productization-smoke helper must remain test/support-local and not change normal local developer workflow. +- **Consistency impact**: Operations list/detail language must continue to use OperationRun execution truth and existing run-link vocabulary. Debug/runtime checks must not fail arbitrary local development warnings outside smoke mode. +- **Review focus**: Verify no parallel status language, action-link path, or broad runtime framework is introduced. + +## OperationRun UX Impact *(mandatory when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`; otherwise write `N/A - no OperationRun start or link semantics touched`)* + +- **Touches OperationRun start/completion/link UX?**: yes, link/render semantics only. No OperationRun creation, queueing, status transition, completion, deduplication, or reconciliation write is in scope. +- **Shared OperationRun UX contract/layer reused**: `OperationRunLinks`, `OperationUxPresenter`, `OperationRunResource`, existing tenantless OperationRun viewer/detail routes. +- **Delegated start/completion UX behaviors**: `Open operation` / `View run` links and tenant/workspace-safe URL resolution stay delegated to existing OperationRun link helpers. Start/completion messaging is N/A. +- **Local surface-owned behavior that remains**: Query scoping, environment filter display, bounded list rendering, empty/error/loading state copy, and smoke-regression checks. +- **Queued DB-notification policy**: N/A - no queued operation starts or notifications. +- **Terminal notification path**: N/A - no terminal lifecycle notification changes. +- **Exception required?**: none. + +## Provider Boundary / Platform Core Check *(mandatory when the feature changes shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth; otherwise write `N/A - no shared provider/platform boundary touched`)* + +- **Shared provider/platform boundary touched?**: no. +- **Boundary classification**: N/A. +- **Seams affected**: N/A. +- **Neutral platform terms preserved or introduced**: Operations, OperationRun, workspace, managed environment, execution truth. +- **Provider-specific semantics retained and why**: none added. +- **Why this does not deepen provider coupling accidentally**: The feature must not call Graph, mutate provider state, or add provider-specific filters/labels beyond existing recorded run context. +- **Follow-up path**: none. + +## UI / Surface Guardrail Impact *(mandatory when operator-facing surfaces are changed; otherwise write `N/A`)* + +| Surface / Change | Operator-facing surface change? | Native vs Custom | Shared-Family Relevance | State Layers Touched | Exception Needed? | Low-Impact / `N/A` Note | +|---|---|---|---|---|---|---| +| Operations hub environment-filtered index stability | yes | Native Filament page/resource plus existing Blade composition | OperationRun monitoring family | page, table, URL-query, browser runtime | no | Existing surface; stability and bounded states only | +| Productization browser smoke runtime checks | yes, validation workflow only | Pest Browser / existing smoke helpers | Browser smoke guardrail | browser session, console/network/DOM assertions | no | Smoke mode must not alter normal local dev workflow | + +## Decision-First Surface Role *(mandatory when operator-facing surfaces are changed)* + +| Surface | Decision Role | Human-in-the-loop Moment | Immediately Visible for First Decision | On-Demand Detail / Evidence | Why This Is Primary or Why Not | Workflow Alignment | Attention-load Reduction | +|---|---|---|---|---|---|---|---| +| Operations hub | Primary Decision Surface for execution follow-up | Operator decides whether an operation needs inspection or whether the current filtered scope has no runs | Page title, workspace/environment context, bounded table or empty state, status/outcome, time, safe next action | Operation detail, diagnostics, raw context, stack traces, provider payloads | Primary because it is the canonical execution monitoring hub | Follows operations triage and drilldown from dashboard/workspace surfaces | Removes blocker caused by timeout/debug page and keeps rows bounded | + +## Audience-Aware Disclosure *(mandatory when operator-facing surfaces are changed)* + +| Surface | Audience Modes In Scope | Decision-First Default-Visible Content | Operator Diagnostics | Support / Raw Evidence | One Dominant Next Action | Hidden / Gated By Default | Duplicate-Truth Prevention | +|---|---|---|---|---|---|---|---| +| Operations hub | operator-MSP, manager, support-platform | operation type/name, status/outcome, environment, started/updated time, duration if available, attention/error indicator, empty/error state | run detail and collapsed diagnostics | raw context, failure summary, stack trace, provider payloads | Open operation/detail for safe records | raw debug pages, stack traces, provider secrets, source links, Debugbar | list states the run outcome once; detail adds proof only | + +## UI/UX Surface Classification *(mandatory when operator-facing surfaces are changed)* + +| Surface | Action Surface Class | Surface Type | Likely Next Operator Action | Primary Inspect/Open Model | Row Click | Secondary Actions Placement | Destructive Actions Placement | Canonical Collection Route | Canonical Detail Route | Scope Signals | Canonical Noun | Critical Truth Visible by Default | Exception Type / Justification | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| Operations hub | List / Table / Monitoring | Read-only Registry / Report Surface | Open an operation or clear/adjust filter | row/detail route | allowed | existing filters/contextual links only | none introduced | `/admin/workspaces/{workspace}/operations` | `/admin/workspaces/{workspace}/operations/{run}` | workspace route plus explicit environment filter chip | Operations / Operation | successful render, scoped rows, status/outcome, environment, time, safe next action | none | + +## Operator Surface Contract *(mandatory when operator-facing surfaces are changed)* + +| Surface | Primary Persona | Decision / Operator Action Supported | Surface Type | Primary Operator Question | Default-visible Information | Diagnostics-only Information | Status Dimensions Used | Mutation Scope | Primary Actions | Dangerous Actions | +|---|---|---|---|---|---|---|---|---|---|---| +| Operations hub | Operations responder / MSP operator | Determine whether filtered operations need attention and open safe detail | Monitoring list/workbench | Did the selected workspace/environment operations route load successfully, and what run needs attention? | page title, context, active environment filter, bounded rows or empty state, status/outcome, timing, duration, next action | raw context, stack traces, debug/source links, provider payloads, support diagnostics | execution status, terminal outcome, environment scope, lifecycle/freshness where already supported | none in this spec | open operation/detail; clear filter | none introduced | + +## Proportionality Review *(mandatory when structural complexity is introduced)* + +- **New source of truth?**: no. +- **New persisted entity/table/artifact?**: no. +- **New abstraction?**: no domain abstraction expected. Test/support helpers are allowed only if existing smoke controls cannot express the checks. +- **New enum/state/reason family?**: no. +- **New cross-domain UI framework/taxonomy?**: no. +- **Current operator problem**: Operations route fails to render and productization smoke cannot distinguish real UX issues from debug/runtime leakage. +- **Existing structure is insufficient because**: Existing tests did not catch the environment-filtered timeout/debug-page regression or the BUG-009 runtime pollution path. +- **Narrowest correct implementation**: Optimize existing Operations query/render path and add focused smoke assertions for the affected route/runtime conditions. +- **Ownership cost**: A small feature/browser test family and possibly a productization-smoke test helper; no new runtime truth. +- **Alternative intentionally rejected**: Increasing PHP `max_execution_time`, hiding/removing the route, generic catch-all masking, broad UI redesign, or app-wide debug infrastructure rewrite. +- **Release truth**: Current-release productization blocker. + +### Compatibility posture + +This feature assumes a pre-production environment. Backward compatibility, legacy aliases, migration shims, historical fixtures, and compatibility-specific tests are out of scope unless implementation proves an existing contract requires them. + +## Testing / Lane / Runtime Impact *(mandatory for runtime behavior changes)* + +- **Test purpose / classification**: Feature/Livewire for route/render/scoping/query guards; Browser for authenticated productization smoke and JS/runtime leak checks; Unit only if a small asset/debug helper is introduced. +- **Validation lane(s)**: fast-feedback/confidence for targeted Pest feature tests; browser for productization runtime smoke; profiling only if implementation needs query/render measurement. +- **Why this classification and these lanes are sufficient**: The regression is both server-render and browser-runtime visible; a feature-only test would miss console/Vite/Debugbar leakage, while browser-only proof would be too slow and less deterministic for query/scoping guards. +- **New or expanded test families**: One explicit Spec 391 Operations Hub feature/Livewire family and one explicit Spec 391 browser smoke file. +- **Fixture / helper cost impact**: Must use factories or existing browser smoke-login helpers, no seeders, no provider setup, no real Graph access, no queues/jobs that mutate provider/customer state. +- **Heavy-family visibility / justification**: Browser smoke is explicit because BUG-009 is browser/runtime-specific. It must remain named and scoped to Operations/productization smoke. +- **Special surface test profile**: `monitoring-state-page` plus `global-context-shell`. +- **Standard-native relief or required special coverage**: Special coverage required for environment-filtered render budget, debug-page absence, missing Filament globals, Vite client failures, Debugbar/source-link leakage, and network 500s. +- **Reviewer handoff**: Reviewers must confirm lane fit, no hidden seed/provider setup, no broad browser suite drift, and exact proof commands. +- **Budget / baseline / trend impact**: The browser smoke should fail over a reasonable threshold; target is under 3 seconds for the audited local data shape. If CI timing is flaky, keep a lower-level query/render guard and record measured browser timing in verification. +- **Escalation needed**: document-in-feature. +- **Active feature PR close-out entry**: Guardrail / Exception / Smoke Coverage. +- **Planned validation commands**: + - `cd apps/platform && php vendor/bin/pest tests/Feature/Monitoring/Spec391OperationsHubRendersWithEnvironmentFilterTest.php` + - `cd apps/platform && php vendor/bin/pest tests/Feature/Monitoring/Spec391OperationRunResourceIndexPerformanceTest.php` + - `cd apps/platform && php artisan test --compact tests/Browser/Spec391OperationsHubProductizationSmokeTest.php` + - `cd apps/platform && php vendor/bin/pint --test ` + - `git diff --check` + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Environment-filtered Operations route renders (Priority: P1) + +As an admin operator, I want `/admin/workspaces/{workspace}/operations?environment_id={id}` to render a bounded Operations hub for an entitled environment so dashboard/workspace drilldowns do not land on a 500 or timeout. + +**Why this priority**: This is the audited P1 blocker. + +**Independent Test**: Authenticate as a workspace/environment-entitled admin and open the route with fixture operation runs; assert HTTP success, no debug page text, bounded rows, active environment context, and safe detail link for a record. + +**Acceptance Scenarios**: + +1. **Given** an entitled workspace user and an environment filter, **When** the Operations hub opens, **Then** the response is successful and renders within the agreed budget. +2. **Given** another environment exists in the same or another workspace, **When** the filter is applied, **Then** rows, counts, and filter options remain scoped to the permitted workspace/environment. + +--- + +### User Story 2 - Bounded Operations index rendering (Priority: P1) + +As an operator, I want the Operations index to paginate and avoid per-row heavy accessors so the page stays responsive even when many operation runs exist. + +**Why this priority**: The observed max-execution error points to render-path cost rather than missing infrastructure. + +**Independent Test**: Create many OperationRun rows with large context/failure payloads and assert the index route/table render does not hydrate unbounded rows or scan expensive per-row details for every record. + +**Acceptance Scenarios**: + +1. **Given** more operation runs than one table page, **When** the Operations index renders, **Then** only the bounded page/list context is evaluated. +2. **Given** operation rows contain large JSON context, **When** the list renders, **Then** default columns do not parse or present raw detail payloads per row. + +--- + +### User Story 3 - Controlled empty/error/loading states (Priority: P2) + +As an operator, I want filtered Operations states to be understandable and customer-ready so no-data or recoverable render problems do not look like application crashes. + +**Why this priority**: The route should fail closed and explain the current scope without masking the real performance issue. + +**Independent Test**: Open Operations with no rows for an entitled environment and with a safely simulated render failure where applicable; assert the page shows controlled copy and no raw Laravel stack trace. + +**Acceptance Scenarios**: + +1. **Given** no operation runs exist for the active environment filter, **When** the page loads, **Then** a specific empty state is visible and no false health claim appears. +2. **Given** a non-debug productization-smoke browser session, **When** Operations encounters a handled display-only state, **Then** no raw Laravel debug page, stack trace, or `Maximum execution time` text is visible. + +--- + +### User Story 4 - Productization-safe browser smoke catches runtime leakage (Priority: P2) + +As a productization reviewer, I want the Operations smoke path to fail on debug/runtime leakage so future audits are not polluted by Debugbar, Vite dev-client failures, or missing Filament globals. + +**Why this priority**: BUG-009 directly affected audit signal quality and Filament table/action reliability. + +**Independent Test**: Run the Spec 391 browser smoke in productization-smoke mode and assert no missing Filament globals, Vite client connection failures, `_debugbar` requests/DOM, `phpstorm://open` links, visible stack traces, network 500s, or debug page text. + +**Acceptance Scenarios**: + +1. **Given** productization-smoke mode is active, **When** the browser opens Operations, **Then** Debugbar/source links are absent and compiled/stable assets or existing test asset fallbacks are used. +2. **Given** Filament/Livewire runtime is missing, **When** the smoke runs, **Then** the test fails with a specific console/global/runtime assertion. + +### Edge Cases + +- The audited workspace/environment ids may not exist in every test database; automated browser tests must discover or create a safe fixture instead of hardcoding ids unless the audited fixture is explicitly present. +- Environment filter values from another workspace must not leak rows or options. +- Empty filters must render workspace-wide entitled rows only. +- Invalid `environment_id` must be discarded or rejected according to existing Operations route contract without leaking existence. +- Large `context`, `failure_summary`, or summary count payloads must not become default-visible list content. +- Productization-smoke mode must not disable normal local developer Debugbar/Vite behavior outside the explicit smoke session. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-391-001**: The admin Operations hub MUST render successfully for an authenticated, workspace-entitled user with an entitled `environment_id` filter. +- **FR-391-002**: The Operations hub MUST NOT return HTTP 500 or expose a raw Laravel debug page for the audited environment-filtered path. +- **FR-391-003**: The Operations hub MUST apply workspace and environment filters at the query level before rows, summary counts, filter options, or links are rendered. +- **FR-391-004**: The Operations list MUST remain paginated and bounded using the existing table pagination profile or a narrower documented equivalent. +- **FR-391-005**: The Operations index MUST avoid expensive per-row work during render, including Graph calls, unbounded relationship traversal, and default parsing/presentation of large JSON context/failure payloads. +- **FR-391-006**: Visible Operations columns MUST remain useful for operation type/name, status/outcome, environment/scope, started/updated time, duration when available, and attention/error/next-action signal. +- **FR-391-007**: Filter option queries MUST be scoped and bounded enough not to scan unrelated workspaces or unbounded historical rows during normal index render. +- **FR-391-008**: The route MUST show a controlled empty state when no operation runs exist for the active workspace/environment filter. +- **FR-391-009**: The route MUST preserve filters/context during normal loading states and MUST NOT flash raw framework/debug output while loading. +- **FR-391-010**: The route MUST show controlled error/notice states only for appropriate display conditions and MUST NOT hide an expensive render path behind a catch-all. +- **FR-391-011**: Existing safe operation detail/view links from the list MUST still route to the canonical tenantless OperationRun detail viewer for authorized records. +- **FR-391-012**: Dashboard/workspace navigation links that point to Operations MUST no longer lead to a broken Operations page. +- **FR-391-013**: Productization browser smoke MUST fail on visible Laravel debug pages, stack traces, `Maximum execution time`, network 500s, missing Filament globals, missing Livewire/Alpine runtime needed by the page, Vite dev-client connection failures in smoke mode, `_debugbar` leakage, or `phpstorm://open` source links. +- **FR-391-014**: Productization-smoke mode MUST use existing environment/test controls when possible and MUST NOT disable Debugbar or Vite globally for ordinary local development. +- **FR-391-015**: Tests MUST be deterministic and MUST NOT require real provider access, seeders, provider syncs, restore execution, exports, deletes, archives, or queued customer/provider mutations. + +### Non-Functional Requirements + +- **NFR-391-001**: Target browser render budget is under 3 seconds after authentication for the audited local data shape. +- **NFR-391-002**: If browser timing is too flaky for CI, implementation MUST add a lower-level query/render guard and record observed browser timing in verification. +- **NFR-391-003**: Operations render must remain DB-only and must not invoke `GraphClientInterface` or external provider clients. +- **NFR-391-004**: No migrations are expected. If implementation proves a migration or index is required, update this spec and plan before continuing. +- **NFR-391-005**: No new operation type, status, outcome, reason family, summary-count key, or persisted truth is allowed. + +### Acceptance Criteria + +1. `/admin/workspaces/3/operations?environment_id=4` renders successfully for the audited workspace/environment or the implementation browser test discovers an equivalent safe fixture when exact ids differ. +2. The route does not return 500. +3. The route does not expose a raw Laravel debug page or stack trace. +4. The route renders in a bounded time under normal local productization validation conditions. +5. Target browser render budget is under 3 seconds after authentication for the audited data shape. +6. Operations list is paginated/bounded. +7. Environment filtering does not trigger N+1-heavy presenter/model accessor work. +8. Table columns/actions do not perform expensive per-row work during render. +9. Empty states are controlled and customer-ready. +10. Error states are controlled and customer-ready. +11. Existing operation detail/view actions still work for safe records. +12. Navigation links from dashboard/workspace surfaces to Operations no longer lead to a broken page. +13. Browser smoke test catches a future Operations 500/timeout regression. +14. Browser smoke test catches raw Laravel debug-page exposure on Operations. +15. Browser/runtime smoke check fails on missing Filament JS globals on the Operations route. +16. Browser/runtime smoke check fails on Vite dev-client connection failures when running in productization-smoke mode. +17. Browser/runtime smoke check fails on visible Debugbar/source-link leakage when running in productization-smoke mode. +18. Tests are deterministic and do not require real provider access. +19. No destructive operations are performed. +20. No unrelated Evidence/Provider/Review/Restore semantics are changed. + +## UI Action Matrix *(mandatory when Filament is changed)* + +| Surface | Location | Header Actions | Inspect Affordance (List/Table) | Row Actions (max 2 visible) | Bulk Actions (grouped) | Empty-State CTA(s) | View Header Actions | Create/Edit Save+Cancel | Audit log? | Notes / Exemptions | +|---|---|---|---|---|---|---|---|---|---|---| +| Operations hub | `apps/platform/app/Filament/Pages/Monitoring/Operations.php`, `apps/platform/app/Filament/Resources/OperationRunResource.php` | Existing scope/back/filter-reset navigation only | Existing row/detail route | Existing safe detail/open links only | none | controlled no-runs state; no mutation CTA unless already permitted and existing | owned by tenantless OperationRun detail viewer | N/A | existing run lifecycle audit only | No destructive action added; no retry/cancel/start/export/delete/archive behavior in scope | + +### Key Entities *(include if feature involves data)* + +- **OperationRun**: Existing execution-truth record used for list rows, detail links, status/outcome, timing, and scoped monitoring. +- **ManagedEnvironment**: Existing environment filter target; filter values must be workspace-entitled and active according to existing route rules. +- **Workspace**: Existing primary route/session context for admin Operations. +- **Productization smoke session**: Test/browser-mode behavior, not persisted product truth, used to suppress Debugbar/source-link leakage and use stable assets where supported. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-391-001**: Environment-filtered Operations route returns a successful response in targeted feature/Livewire coverage. +- **SC-391-002**: Browser smoke opens the Operations hub and observes no network 500s, debug page text, stack trace text, missing Filament globals, Vite dev-client failures, `_debugbar` leakage, or `phpstorm://open` source links in productization-smoke mode. +- **SC-391-003**: Browser verification records render timing, with target under 3 seconds for the audited local data shape or a documented lower-level guard if browser timing is unsuitable for CI. +- **SC-391-004**: Feature/performance guard proves index rendering stays bounded when more operation rows exist than a single table page. +- **SC-391-005**: No provider mutations, restore jobs, exports, deletes, archives, or customer-facing delivery actions are executed during tests or verification. + +## Expected UX + +The Operations hub should present a clear title, workspace/environment context, visible active environment filter, bounded table or controlled empty state, useful operation status columns, safe detail actions, and no raw stack traces, Debugbar/source links, missing runtime globals, or framework/debug branding in productization-smoke validation. + +## Risks + +- The root cause may be a combination of table filter option scans, summary/top-run queries, `OperationRunResource` column/action helpers, JSON casts, and actionability/freshness accessors. Implementation must profile or instrument enough to fix the render path rather than masking it. +- Browser timing can be flaky in local/CI environments. If so, keep browser leak assertions and add deterministic lower-level query/render guards. +- Productization-smoke mode could accidentally disable normal local debugging if implemented too broadly; keep it explicit to smoke requests. +- Existing completed Operations specs contain validated productization behavior and must not be rewritten. + +## Assumptions + +- Spec 391 is a fresh regression/stability package, not a continuation of Spec 328 productization redesign. +- The audited ids `workspace_id=3` and `environment_id=4` may be available locally, but tests should create/discover safe fixtures when they are not. +- No schema migration is required unless implementation proves the current query path cannot be bounded without an index or schema change. +- Existing `SuppressDebugbarForSmokeRequests` and `PanelThemeAsset` patterns are the preferred starting point for BUG-009 smoke controls. + +## Open Questions + +- None blocking preparation. Implementation must confirm the exact render-path root cause before changing code. + +## Out Of Scope + +- Evidence anchor selection. +- Provider permission/readiness semantics. +- Review pack download gating. +- Customer Review Workspace evidence labeling. +- System login branding. +- Restore readiness behavior unless the Operations hub directly depends on it. +- Broad app-wide UI redesign. +- Broad production infrastructure configuration changes unrelated to this spec. +- Real provider mutations, provider syncs, restore jobs, destructive actions, exports, notifications, customer-facing delivery actions, archives, deletes, or force-deletes. +- Increasing PHP `max_execution_time`. +- Hiding/removing the Operations route or links. +- Generic catch-all error masking while leaving the expensive render path intact. + +## Follow-up Spec Candidates + +- System login branding and cross-panel debug-safe branding from BUG-008/BUG-009 if productization audit keeps it separate. +- Evidence/current-vs-anchored follow-up from BUG-002/BUG-003. +- Review pack/customer download gating from BUG-004/BUG-007. +- Provider readiness semantics from BUG-005/BUG-006 if separately promoted. + +## Verification + +Planned verification details live in `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md`. + +Implementation verification must capture: + +- HTTP status. +- Render time. +- Page title/header. +- Visible table/empty state. +- Active workspace/environment context. +- Console errors. +- Network 500s. +- Absence of Laravel debug page. +- Absence of Debugbar/source-link leakage in productization-smoke mode. +- Confirmation that no provider mutations, restore jobs, exports, deletes, archives, notifications, or customer-facing delivery actions were executed. diff --git a/specs/391-operations-hub-stability-debug-safe-runtime/tasks.md b/specs/391-operations-hub-stability-debug-safe-runtime/tasks.md new file mode 100644 index 00000000..ee42266f --- /dev/null +++ b/specs/391-operations-hub-stability-debug-safe-runtime/tasks.md @@ -0,0 +1,100 @@ +# Tasks: Spec 391 - Operations Hub Stability and Debug-Safe Runtime + +**Input**: Design documents from `/specs/391-operations-hub-stability-debug-safe-runtime/` +**Prerequisites**: `plan.md`, `spec.md` +**Tests**: Required. Use Pest 4 feature/Livewire/browser coverage. No seeders, provider syncs, restore execution, exports, deletes, archives, force-deletes, notifications, or customer-facing delivery actions. + +## Test Governance Checklist + +- [x] Lane assignment is named and is the narrowest sufficient proof for the changed behavior. +- [x] New or changed tests stay in the smallest honest family, and the browser addition is explicit. +- [x] Shared helpers, factories, seeds, fixtures, and context defaults stay cheap by default; any widening is isolated or documented. +- [x] Planned validation commands cover the change without pulling in unrelated lane cost. +- [x] The declared surface test profile (`monitoring-state-page` plus `global-context-shell`) is explicit. +- [x] Any material budget, baseline, trend, or escalation note is recorded in the active spec or PR. + +## Phase 1: Setup and Safety Boundary + +- [x] T001 Record initial `git status --short`, current branch, and latest commit in `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/verification.md`. +- [x] T002 Re-read `specs/391-operations-hub-stability-debug-safe-runtime/spec.md`, `plan.md`, `tasks.md`, `specs/browser-productization-bug-audit/browser-bug-report.md`, and completed context-only Specs 328, 361, 362, 364, 367, and 377 before editing runtime code. +- [x] T003 Confirm the implementation scope excludes Evidence, Provider, Review Pack, Restore, dashboard semantics, provider mutations, restore jobs, exports, deletes, archives, force-deletes, notifications, customer-facing delivery actions, migrations, seeders, and `max_execution_time` changes. +- [x] T004 Confirm Filament v5 / Livewire v4.0+ compliance and no Livewire v3/Filament legacy API use in touched code. +- [x] T005 Confirm panel provider registration remains `apps/platform/bootstrap/providers.php` and no panel provider path changes are required. +- [x] T006 Confirm `OperationRunResource` remains non-globally-searchable, or update this spec before changing global-search posture. +- [x] T007 Confirm no new persisted entity, migration, enum/status family, operation type, summary-count key, or domain abstraction is needed; if one appears necessary, stop and update `spec.md` and `plan.md` first. + +## Phase 2: Reproduce and Locate Root Cause + +- [x] T008 Reproduce or confirm BUG-001 with the browser/Playwright or a targeted route request for `/admin/workspaces/3/operations?environment_id=4`, recording HTTP status, elapsed time, and visible/debug output in `artifacts/verification.md`. +- [x] T009 Inspect the latest Laravel error/log context for the audited max-execution failure without mutating data; record whether `HasAttributes.php:1577` still appears. +- [x] T010 Inspect `apps/platform/app/Filament/Pages/Monitoring/Operations.php` render methods, especially `decisionWorkbench()`, `selectedWorkbenchOperation()`, `topOperationFromQuery()`, `summaryCount()`, `table()`, `scopedSummaryQuery()`, filter handling, and environment entitlement helpers. +- [x] T011 Inspect `apps/platform/app/Filament/Resources/OperationRunResource.php` table columns, filters, actions, URL builders, status/outcome descriptions, target-scope helpers, and any helpers used per visible row. +- [x] T012 Inspect `apps/platform/app/Models/OperationRun.php` accessors/casts used by the list and workbench, including `context`, `failure_summary`, `summary_counts`, `problemClass()`, `freshnessState()`, `requiresOperatorReview()`, and actionability-related helpers. +- [x] T013 Identify whether the render cost comes from unbounded row hydration, query option scans, relationship N+1, JSON casts/accessors, PHP sorting over hydrated rows, actionability/freshness evaluation, or table column/action helper work; record the confirmed root cause in `artifacts/verification.md`. + +## Phase 3: Automated Regression Tests First + +- [x] T014 Add `apps/platform/tests/Feature/Monitoring/Spec391OperationsHubRendersWithEnvironmentFilterTest.php` proving an authenticated admin can open the Operations route with an entitled environment filter, receives a successful response, sees Operations title/context/table or empty state, and does not see Laravel debug-page, stack-trace, or `Maximum execution time` text. +- [x] T015 Add a test in the same feature file proving the environment filter remains scoped: rows/counts/filter context for another environment or workspace do not appear, and non-entitled environment filters fail closed according to existing 404/filter-discard contract. +- [x] T016 Add a test proving dashboard/workspace links that target Operations with `environment_id` produce the canonical Operations URL and the target route renders. +- [x] T017 Add `apps/platform/tests/Feature/Monitoring/Spec391OperationRunResourceIndexPerformanceTest.php` with more operation runs than a table page and large `context`/`failure_summary` payloads, asserting the index remains bounded and does not require unbounded rows to render. +- [x] T018 Add or extend a no-Graph render guard proving Operations index/workbench rendering never invokes `GraphClientInterface` or provider clients. +- [x] T019 Add a focused empty-state test proving no-runs for an entitled environment displays controlled copy and no false health claim. +- [x] T020 Add a loading-state/context test where feasible, or a browser assertion, proving the Operations route preserves the active workspace/environment filter and does not flash raw framework/debug output while loading. +- [x] T021 Add a safe detail-link test proving at least one authorized row still opens the tenantless OperationRun detail route. +- [x] T022 If a smoke/runtime helper is introduced, add a Unit or Feature test proving it is opt-in and does not disable Debugbar/Vite behavior for normal local requests. + +## Phase 4: Browser/Productization Smoke Tests + +- [x] T023 Add `apps/platform/tests/Browser/Spec391OperationsHubProductizationSmokeTest.php` using existing browser smoke-login/auth fixture patterns where possible. +- [x] T024 Make the browser test discover or create a safe workspace/environment fixture instead of hardcoding ids, unless the audited workspace 3/environment 4 fixture is explicitly present and safe to use. +- [x] T025 Browser-smoke the authenticated route `/admin/workspaces/{workspace}/operations?environment_id={environment}` and assert page renders successfully with `Operations`/`Operations Hub`, active environment context, and bounded table or controlled empty state. +- [x] T026 Add a browser render-time guard targeting under 3 seconds after authentication for the audited local data shape; if too flaky for CI, keep browser timing recorded and rely on a deterministic lower-level render/query guard. +- [x] T027 Add browser assertions that no visible Laravel debug page, stack trace, `Maximum execution time`, `_debugbar`, `phpstorm://open`, raw source links, or debug exception text is visible in productization-smoke mode. +- [x] T028 Add browser console assertions that fail on missing Filament/Livewire/Alpine runtime globals needed by the route, including `filamentSchema is not defined`, `filamentSchemaComponent is not defined`, `filamentTable is not defined`, and `selectFormComponent is not defined`. +- [x] T029 Add browser network/console assertions that fail on Vite dev-client connection failures for `http://localhost:5173/@vite/client` when running in productization-smoke mode. +- [x] T030 Add browser network assertions that fail on Operations HTTP 500s and `_debugbar` requests in productization-smoke mode. +- [x] T031 Capture the final screenshot under `specs/391-operations-hub-stability-debug-safe-runtime/artifacts/screenshots/` or record why screenshot capture is unavailable. + +## Phase 5: Operations Render-Path Stabilization + +- [x] T032 Update `apps/platform/app/Filament/Pages/Monitoring/Operations.php` so workspace and environment entitlement filters apply at the query level before list rows, summary counts, selected workbench operation, and filter state render. +- [x] T033 Keep the Operations table paginated with `TablePaginationProfiles::resource()` or a narrower documented equivalent. +- [x] T034 Bound `selectedWorkbenchOperation()` / `topOperationFromQuery()` so it does not hydrate unbounded rows or sort expensive accessor-derived state across large result sets. +- [x] T035 Replace or defer expensive per-row work in `OperationRunResource::table()` columns/actions; keep default list columns useful without parsing raw context/failure payloads for every visible row. +- [x] T036 Restrict eager loading to relationships actually rendered on the index (`tenant`, `user`, or narrower selected columns) and avoid N+1 relationship traversal for status/scope/next-action display. +- [x] T037 Avoid default index hydration/presentation of large JSON payloads (`context`, `failure_summary`, `summary_counts`) unless a visible column truly needs them; move heavy diagnostics to detail/collapsed support paths. +- [x] T038 Scope and bound filter option queries for type and initiator so they do not scan unrelated workspaces or unbounded historical rows during normal index render. +- [x] T039 Preserve existing OperationRun status/outcome/actionability semantics; do not change lifecycle truth to make the list faster. +- [x] T040 Preserve existing canonical detail/view links through `OperationRunLinks` and tenantless OperationRun viewer routes. + +## Phase 6: Controlled States and Runtime Smoke Mode + +- [x] T041 Ensure the Operations empty state is specific to the active workspace/environment scope, customer-ready, and avoids false health claims. +- [x] T042 Ensure loading behavior preserves the active workspace/environment filter and does not expose framework/debug output. +- [x] T043 Add a controlled display-only error/notice state only if implementation proves one is appropriate; do not use a catch-all to hide the expensive path or raw exceptions. +- [x] T044 Reuse `App\Http\Middleware\SuppressDebugbarForSmokeRequests` for smoke-cookie/session suppression where possible. +- [x] T045 Reuse or extend `App\Support\Filament\PanelThemeAsset` behavior so productization-smoke mode can run without requiring the Vite dev client when built assets are available. +- [x] T046 If a new env/config flag is required, name it narrowly for productization/browser smoke, document it in this spec's verification artifact, and ensure normal local developer Debugbar/Vite workflow remains unchanged. +- [x] T047 Ensure productization-smoke assertions do not fail all arbitrary local warnings; fail only on the explicit runtime/debug leakage signatures from this spec. + +## Phase 7: Validation and Formatting + +- [x] T048 Run targeted feature tests for Spec 391 render/scoping/bounded behavior. +- [x] T049 Run targeted browser smoke for Spec 391. +- [x] T050 Run targeted formatting for touched PHP files with `php vendor/bin/pint --test ` or the project-equivalent narrow formatting command. +- [x] T051 Run `git diff --check` from the repository root. +- [x] T052 Open the Operations route in the browser after implementation and record route, HTTP status, render time, page title/header, table/empty state, workspace/environment context, console errors, network errors, absence of debug page, and absence of Debugbar/source-link leakage in `artifacts/verification.md`. +- [x] T053 Confirm in `artifacts/verification.md` that no provider mutations, restore jobs, exports, deletes, archives, force-deletes, notifications, customer-facing delivery actions, migrations, seeders, or destructive commands were executed. +- [x] T054 Record final `git status --short`, intentionally changed files, pre-existing unrelated dirty files if any, and known limitations in `artifacts/verification.md`. + +## Non-Tasks / Guardrails + +- [x] NT001 Do not increase PHP `max_execution_time`. +- [x] NT002 Do not hide or remove the Operations route or links. +- [x] NT003 Do not mask the error with a generic catch-all while leaving the expensive render path intact. +- [x] NT004 Do not change Evidence, Provider, Review Pack, Restore, dashboard, or customer-facing artifact semantics. +- [x] NT005 Do not run provider syncs, provider mutations, restore jobs, exports, deletes, archives, force-deletes, seeders, or destructive commands. +- [x] NT006 Do not add migrations unless spec/plan are updated first with proof. +- [x] NT007 Do not add new OperationRun types, statuses, outcomes, summary-count keys, lifecycle semantics, or unscoped caching. +- [x] NT008 Do not rewrite or normalize completed Operations/productization specs.