From e6550ee1c6fe84b40009f091583d413b1e2fc264 Mon Sep 17 00:00:00 2001 From: Ahmed Darrazi Date: Mon, 11 May 2026 13:11:06 +0200 Subject: [PATCH] test: capture spec 295 full suite ci baseline --- .../Guards/TestLaneCommandContractTest.php | 13 + scripts/platform-test-artifacts | 15 +- .../checklists/requirements.md | 45 +++ .../295-full-suite-ci-baseline/data-model.md | 67 ++++ .../failure-classification.md | 122 +++++++ specs/295-full-suite-ci-baseline/plan.md | 181 +++++++++ .../295-full-suite-ci-baseline/quickstart.md | 90 +++++ specs/295-full-suite-ci-baseline/research.md | 58 +++ specs/295-full-suite-ci-baseline/spec.md | 342 ++++++++++++++++++ specs/295-full-suite-ci-baseline/tasks.md | 173 +++++++++ 10 files changed, 1097 insertions(+), 9 deletions(-) create mode 100644 specs/295-full-suite-ci-baseline/checklists/requirements.md create mode 100644 specs/295-full-suite-ci-baseline/data-model.md create mode 100644 specs/295-full-suite-ci-baseline/failure-classification.md create mode 100644 specs/295-full-suite-ci-baseline/plan.md create mode 100644 specs/295-full-suite-ci-baseline/quickstart.md create mode 100644 specs/295-full-suite-ci-baseline/research.md create mode 100644 specs/295-full-suite-ci-baseline/spec.md create mode 100644 specs/295-full-suite-ci-baseline/tasks.md diff --git a/apps/platform/tests/Feature/Guards/TestLaneCommandContractTest.php b/apps/platform/tests/Feature/Guards/TestLaneCommandContractTest.php index 7e3f0f23..1e8ae1db 100644 --- a/apps/platform/tests/Feature/Guards/TestLaneCommandContractTest.php +++ b/apps/platform/tests/Feature/Guards/TestLaneCommandContractTest.php @@ -38,6 +38,19 @@ ->and(file_exists(repo_path('scripts/platform-test-artifacts')))->toBeTrue(); }); +it('passes artifact staging inputs through php argv for sail execution', function (): void { + $artifactRunner = (string) file_get_contents(repo_path('scripts/platform-test-artifacts')); + + expect($artifactRunner) + ->toContain('./vendor/bin/sail php -- "${LANE}" "${STAGING_DIRECTORY}" "${ARTIFACT_DIRECTORY}"') + ->and($artifactRunner)->toContain('$laneId = (string) ($argv[1] ?? \'\');') + ->and($artifactRunner)->toContain('$stagingDirectory = (string) ($argv[2] ?? \'\');') + ->and($artifactRunner)->toContain('$artifactDirectory = (string) ($argv[3] ?? \'\');') + ->and($artifactRunner)->not->toContain("getenv('LANE_ID')") + ->and($artifactRunner)->not->toContain("getenv('STAGING_DIRECTORY')") + ->and($artifactRunner)->not->toContain("getenv('ARTIFACT_DIRECTORY')"); +}); + it('keeps heavy-governance baseline capture support inside the checked-in wrappers', function (): void { $laneRunner = (string) file_get_contents(repo_path('scripts/platform-test-lane')); $reportRunner = (string) file_get_contents(repo_path('scripts/platform-test-report')); diff --git a/scripts/platform-test-artifacts b/scripts/platform-test-artifacts index df974095..c6e512bf 100755 --- a/scripts/platform-test-artifacts +++ b/scripts/platform-test-artifacts @@ -48,20 +48,17 @@ fi cd "${APP_DIR}" -LANE_ID="${LANE}" \ -STAGING_DIRECTORY="${STAGING_DIRECTORY}" \ -ARTIFACT_DIRECTORY="${ARTIFACT_DIRECTORY}" \ -./vendor/bin/sail php <<'PHP' +./vendor/bin/sail php -- "${LANE}" "${STAGING_DIRECTORY}" "${ARTIFACT_DIRECTORY}" <<'PHP' ` commands for report/artifact classification +- **Fixture / helper / factory / seed / context cost risks**: no new defaults; classify fixture-heavy failures instead of widening setup by default +- **Expensive defaults or shared helper growth introduced?**: no +- **Heavy-family additions, promotions, or visibility changes**: none by default +- **Surface-class relief / special coverage rule**: browser/heavy lane output is classification-only unless active fix scope explicitly owns it +- **Closing validation and reviewer handoff**: reviewers should confirm no unclassified failing group, no hidden budget relaxation, no new lane family, and no legacy cutover behavior restoration +- **Budget / baseline / trend follow-up**: classify in `failure-classification.md`; only adjust a baseline when the row explains why current evidence supports it +- **Review-stop questions**: lane fit, hidden fixture cost, product repair scope creep, browser scope creep, budget baseline relaxation +- **Escalation path**: `document-in-feature` for CI/lane contract corrections, `follow-up-spec` for product/runtime failures +- **Active feature PR close-out entry**: `FullSuiteClassification` +- **Why no dedicated follow-up spec is needed**: this spec is itself the bounded classification pass. Follow-up specs are created only for classified product/runtime groups. + +## Project Structure + +### Documentation (this feature) + +```text +specs/295-full-suite-ci-baseline/ +├── checklists/ +│ └── requirements.md +├── data-model.md +├── failure-classification.md +├── plan.md +├── quickstart.md +├── research.md +├── spec.md +└── tasks.md +``` + +### Source Code (repository root) + +```text +scripts/ +├── platform-test-artifacts +├── platform-test-lane +└── platform-test-report + +apps/platform/ +├── composer.json +└── tests/ + ├── Feature/Guards/ + └── Support/ +``` + +**Structure Decision**: implementation should touch only the documentation artifacts above unless classification proves a small CI/lane contract defect in the listed scripts/support/guard-test surfaces. Runtime application code, migrations, models, Filament resources, routes, views, and provider services are out of scope. + +## Complexity Tracking + +| Violation | Why Needed | Simpler Alternative Rejected Because | +|---|---|---| +| Spec-local failure-classification vocabulary | The full-suite readiness decision needs one bounded way to classify all red groups after Specs `293` and `294` | Raw terminal notes would not preserve ownership, lane, or follow-up decisions | + +## Proportionality Review + +- **Current operator problem**: maintainers cannot safely decide whether CI is restored without a classified full-suite baseline. +- **Existing structure is insufficient because**: targeted green lanes and raw full-suite output answer different questions; neither alone assigns follow-up ownership. +- **Narrowest correct implementation**: one spec-local classification artifact and existing lane wrappers. +- **Ownership cost**: temporary classification upkeep during implementation and possibly small lane contract guard adjustments. +- **Alternative intentionally rejected**: new full-suite CI framework or fix-all suite cleanup. +- **Release truth**: current-release test governance and CI readiness. + +## Phase 0: Research Output + +See `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/research.md`. + +## Phase 1: Design Output + +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/data-model.md` +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/quickstart.md` +- `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` + +## Phase 2: Task Planning Output + +See `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/tasks.md`. diff --git a/specs/295-full-suite-ci-baseline/quickstart.md b/specs/295-full-suite-ci-baseline/quickstart.md new file mode 100644 index 00000000..6612d771 --- /dev/null +++ b/specs/295-full-suite-ci-baseline/quickstart.md @@ -0,0 +1,90 @@ +# Quickstart: Full Suite Failure Classification & CI Lane Baseline + +## Purpose + +Use this package to classify whether the complete platform test suite is a reliable CI signal after Specs `293` and `294`. + +## Before Implementation + +1. Review: + - `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/spec.md` + - `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/plan.md` + - `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/data-model.md` + - `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +2. Confirm the branch is clean. +3. Confirm no implementation step is about restoring TenantPanelProvider, `/admin/t/...`, or tenant-scoped legacy fallbacks. + +## Primary Classification Flow + +Use only the pinned categories and seams from `failure-classification.md`: `ci-signal-restored`, `ci-wrapper-or-manifest-regression`, `artifact-publication-regression`, `budget-or-trend-baseline-drift`, `product-runtime-or-test-regression`, `browser-lane-regression`, `flaky-or-environment`, `follow-up-spec-required`, `resolved-or-not-needed`; and `raw-full-suite`, `fast-feedback-lane`, `confidence-lane`, `heavy-governance-lane`, `browser-lane`, `profiling-or-junit-support`, `lane-reporting`, `artifact-publication`, `budget-trend-baseline`, `legacy-cutover-regression-guard`, `provider-verification-regression-guard`. + +Run the raw full suite when feasible: + +```bash +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail artisan test --compact) +``` + +Record the outcome in `failure-classification.md`. + +If the raw full suite is too slow, noisy, or environment-blocked to classify, run the explicit lane split: + +```bash +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane fast-feedback +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane confidence +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane heavy-governance +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane browser +``` + +## CI Report and Artifact Flow + +After lane runs, generate lane reports when needed: + +```bash +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report fast-feedback +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report confidence +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report heavy-governance +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report browser +``` + +Use artifact staging only if artifact publication itself is being validated: + +```bash +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-artifacts fast-feedback /tmp/tenantpilot-fast-feedback-artifacts +``` + +## Fix Rules + +Fix in `295` only when the failure is directly and narrowly caused by: + +- `scripts/platform-test-lane` +- `scripts/platform-test-report` +- `scripts/platform-test-artifacts` +- `apps/platform/tests/Support/TestLaneManifest.php` +- `apps/platform/tests/Support/TestLaneReport.php` +- `apps/platform/tests/Support/TestLaneBudget.php` +- directly related CI guard tests under `apps/platform/tests/Feature/Guards/` + +Do not fix in `295` when the failure requires: + +- application runtime behavior changes +- Filament page/resource changes +- routes, middleware, policies, services, jobs, migrations, views, or models +- provider/verification runtime changes beyond the completed Spec `294` +- browser UI repair +- tenant-cutover compatibility restoration + +Classify those as follow-up work instead. + +## Expected Close-Out + +Close out with exactly one final readiness decision: + +- `restored-ci-signal` +- `classified-follow-up-required` +- `blocked-by-environment` + +Then run formatting for any changed PHP files: + +```bash +export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent) +``` diff --git a/specs/295-full-suite-ci-baseline/research.md b/specs/295-full-suite-ci-baseline/research.md new file mode 100644 index 00000000..7f298064 --- /dev/null +++ b/specs/295-full-suite-ci-baseline/research.md @@ -0,0 +1,58 @@ +# Research: Full Suite Failure Classification & CI Lane Baseline + +## Decision: Use classification-first implementation + +**Rationale**: The user explicitly asked not to blindly repair the full suite. Specs `293` and `294` already handled known focused stabilization slices. `295` must first answer whether the full suite is a reliable signal and only then allow small CI/lane fixes. + +**Alternatives considered**: + +- **Fix every failing test immediately**: rejected because it hides ownership, scope-creeps into unrelated features, and violates the requested goal. +- **Run only targeted lanes**: rejected because the central question is the complete suite signal after the targeted lanes were stabilized. +- **Skip full-suite run and rely on CI lanes**: rejected because lane split can hide cross-lane fallout or raw-suite issues. + +## Decision: Prefer raw full suite, then explicit lane split fallback + +**Rationale**: The raw command `cd apps/platform && ./vendor/bin/sail artisan test --compact` is the most direct answer to the full-suite readiness question. If it times out, produces output too large to classify, or is environment-blocked, the existing wrappers provide explicit fallback segmentation: `fast-feedback`, `confidence`, `heavy-governance`, and `browser`. + +**Alternatives considered**: + +- **Create a new full-suite wrapper**: rejected as premature CI framework growth. +- **Use only `confidence`**: rejected because confidence intentionally excludes browser, heavy-governance, and some discovery-heavy families. + +## Decision: Reuse existing lane and failure-class contracts + +**Rationale**: `TestLaneManifest` already defines lanes, workflow profiles, budgets, artifact contracts, and lane scope notes. `TestLaneReport` already classifies CI failures as `test-failure`, `wrapper-failure`, `budget-breach`, `artifact-publication-failure`, or `infrastructure-failure`. Spec `295` should verify and minimally correct those contracts rather than inventing another taxonomy. + +**Pinned Spec 295 categories**: `ci-signal-restored`, `ci-wrapper-or-manifest-regression`, `artifact-publication-regression`, `budget-or-trend-baseline-drift`, `product-runtime-or-test-regression`, `browser-lane-regression`, `flaky-or-environment`, `follow-up-spec-required`, `resolved-or-not-needed`. + +**Pinned Spec 295 seams**: `raw-full-suite`, `fast-feedback-lane`, `confidence-lane`, `heavy-governance-lane`, `browser-lane`, `profiling-or-junit-support`, `lane-reporting`, `artifact-publication`, `budget-trend-baseline`, `legacy-cutover-regression-guard`, `provider-verification-regression-guard`. + +**Alternatives considered**: + +- **Add a separate CI readiness model**: rejected because the existing support classes already own this truth. +- **Record only plain-text notes**: rejected because future maintainers need stable categories, seams, and follow-up decisions. + +## Decision: Allow only small CI/lane contract fixes + +**Rationale**: In-scope fixes are limited to wrappers, manifest/report support, artifact publication, budget/report contract drift, and their direct guard tests. This keeps the package focused on CI signal readiness. + +**Alternatives considered**: + +- **Fix application/runtime failures discovered by the suite**: rejected unless a failure is proven to be a small CI/lane contract defect. +- **Update historical Specs `293` or `294`**: rejected by completed-spec guardrail and user scope. + +## Decision: Preserve legacy cutover retirement + +**Rationale**: The request explicitly forbids reopening tenant cutover, legacy `/admin/t/...`, or TenantPanelProvider. Any failure that appears to depend on those retired paths must be classified without restoring them. + +**Alternatives considered**: + +- **Add temporary route aliases to make old tests pass**: rejected as direct conflict with the cutover baseline. + +## Decision: Browser output is classification input, not automatic repair ownership + +**Rationale**: The browser lane is intentionally isolated and may expose environment or smoke fallout. Spec `295` should classify browser failures and only repair browser-specific contract issues if they are lane/report artifacts, not product UI behavior. + +**Alternatives considered**: + +- **Run a browser smoke fix loop inside 295**: rejected because this is not a UI implementation spec. diff --git a/specs/295-full-suite-ci-baseline/spec.md b/specs/295-full-suite-ci-baseline/spec.md new file mode 100644 index 00000000..8ca947ec --- /dev/null +++ b/specs/295-full-suite-ci-baseline/spec.md @@ -0,0 +1,342 @@ +# Feature Specification: Full Suite Failure Classification & CI Lane Baseline + +**Feature Branch**: `295-full-suite-ci-baseline` +**Created**: 2026-05-11 +**Status**: Ready +**Input**: User description: "Spec 295 - Full Suite Failure Classification & CI Lane Baseline. After Specs 293 and 294, run a full-suite classification to determine whether the full platform suite is again a reliable CI signal or whether remaining failures must be classified into separate follow-up specs or lanes. Do not blindly fix the full suite, do not scope-creep, do not reopen tenant cutover, do not restore legacy `/admin/t/...` or TenantPanelProvider behavior, and perform only small clearly in-scope fixes." + +## Spec Candidate Check *(mandatory - SPEC-GATE-001)* + +- **Problem**: Specs `293` and `294` closed the known post-cutover route/action-surface and ProviderConnections/Verification failure blocks, but the complete platform suite has not yet been classified as a restored CI signal. Maintainers need one bounded pass that distinguishes green signal, CI wrapper or lane baseline failures, remaining product regressions, flaky or environment failures, and follow-up-spec debt. +- **Today's failure**: targeted lanes can be green while the raw full suite or CI lane wrappers may still fail for unrelated product debt, wrapper/report/artifact drift, budget baseline changes, browser-specific fallout, or environment-only failures. Without classification, future work cannot tell whether a red run means "fix this PR", "rerun because infrastructure failed", "update lane baseline", or "open a follow-up spec". +- **User-visible improvement**: maintainers get an attributable CI readiness decision: either the complete platform suite is a reliable blocking signal again, or every remaining red group is explicitly assigned to the right lane, owner, and follow-up path without reviving retired tenant routes or reopening Specs `293` and `294`. +- **Smallest enterprise-capable version**: one classification-first package that runs the raw full suite or its explicit fallback lane split, records every failing group in `failure-classification.md`, validates existing lane wrappers/report/artifact contracts, applies only small CI-signal fixes when the failure is clearly in scope, and records all product/runtime failures as follow-up candidates instead of absorbing them. +- **Explicit non-goals**: no broad full-suite repair, no tenant-cutover rework, no TenantPanelProvider reactivation, no `/admin/t/...` route restoration, no provider/verification runtime expansion beyond Spec `294`, no new CI framework, no new permanent test lane by default, no new browser family, no new runtime persistence, no UI redesign, no product feature work, no unrelated failing-test cleanup, and no historical-spec rewrites. +- **Permanent complexity imported**: one spec-local `failure-classification.md` artifact, one bounded failure-category inventory, one bounded CI/lane seam inventory, and focused tasks against existing test lane scripts, lane manifest/report support, and current Pest lane commands. No runtime table, model, enum, provider abstraction, Filament resource, or product surface is introduced. +- **Why now**: after `293` and `294`, the next quality question is no longer one known red cluster. It is whether CI can be trusted again as a whole. If this is not classified now, later specs will either over-trust a partially red suite or keep rediscovering unrelated failures as local surprises. +- **Why not local**: the signal spans raw Pest execution, `scripts/platform-test-lane`, `scripts/platform-test-report`, `scripts/platform-test-artifacts`, `Tests\Support\TestLaneManifest`, `Tests\Support\TestLaneReport`, browser isolation, heavy-governance budget/reporting, and current workflow profiles. A one-file patch would not prove CI readiness. +- **Approval class**: Cleanup +- **Red flags triggered**: full-suite scope, cross-cutting test governance, and possible temptation to repair unrelated product failures. Defense: this spec is classification-first, uses existing lane/failure-class contracts, imports only a spec-local artifact, and forbids broad repair or legacy route restoration. +- **Score**: Nutzen: 2 | Dringlichkeit: 2 | Scope: 2 | Komplexitaet: 1 | Produktnaehe: 1 | Wiederverwendung: 2 | **Gesamt: 10/12** +- **Decision**: approve + +## Review Outcome + +- **Outcome class**: `acceptable-special-case` +- **Workflow outcome**: `keep` +- **Test-governance outcome**: `keep` +- **Reason**: full-suite work is normally too broad, but this package is justified because it is a classification and CI-signal baseline pass after two completed stabilization slices, not a fix-all implementation. +- **Workflow result**: Ready for implementation as one bounded suite-signal classification package after Specs `293` and `294`. + +## Candidate Selection Gate + +- **Selected candidate**: Full Suite Failure Classification & CI Lane Baseline +- **Source location**: explicit user-provided manual follow-up after `specs/293-post-cutover-suite-stabilization/` and `specs/294-provider-verification-runtime-semantics/` +- **Why selected now**: the known cutover and provider/verification red blocks have been stabilized, so the remaining decision is whether the full platform suite and lane wrappers now form a trustworthy CI signal. +- **Why close alternatives were deferred**: + - reopening Spec `293` would blur route/action-surface cutover cleanup with full-suite CI readiness + - reopening Spec `294` would blur provider/verification runtime semantics with unrelated suite failures + - starting Package Execution, Guided Operations, Microsoft Starter Pack, or Virtual Consultant would hide CI uncertainty under new product work + - creating a new permanent full-suite lane would import CI framework complexity before proving the existing lanes are insufficient + - fixing every failing test in one pass would scope-creep beyond classification and make follow-up ownership unclear +- **Roadmap relationship**: test-governance and platform quality follow-through under `TEST-GOV-001`; this is not a new product roadmap lane and not an automatic active queue promotion. +- **Completed-spec guardrail result**: Specs `293` and `294` are context only and are excluded from refresh. Spec `294` carries implementation close-out evidence. Spec `293` is treated as the completed post-cutover baseline described by the user and its failure-classification history is preserved; this spec does not rewrite 293 tasks or close-out history. Specs `287` and `288` remain prior cutover and no-legacy guard context only. +- **Smallest viable implementation slice**: run the full suite or explicit lane split, classify every remaining failure group, validate CI wrapper/report/artifact contracts, and perform only small CI-signal fixes that do not change product behavior. +- **Proposed concise feature description to feed into specify**: Classify the full platform test suite after Specs 293 and 294 and establish whether existing CI lanes provide a trustworthy baseline, while splitting unrelated failures into explicit follow-up ownership instead of repairing the suite blindly. + +## Pinned Failure-Classification Categories + +- `ci-signal-restored` +- `ci-wrapper-or-manifest-regression` +- `artifact-publication-regression` +- `budget-or-trend-baseline-drift` +- `product-runtime-or-test-regression` +- `browser-lane-regression` +- `flaky-or-environment` +- `follow-up-spec-required` +- `resolved-or-not-needed` + +## Pinned CI / Suite Seams + +- `raw-full-suite` +- `fast-feedback-lane` +- `confidence-lane` +- `heavy-governance-lane` +- `browser-lane` +- `profiling-or-junit-support` +- `lane-reporting` +- `artifact-publication` +- `budget-trend-baseline` +- `legacy-cutover-regression-guard` +- `provider-verification-regression-guard` + +## Spec Scope Fields *(mandatory)* + +- **Scope**: repository / CI test-governance workflow +- **Primary Routes**: N/A - no application routes or operator-facing navigation are added or restored. Retired `/admin/t/...` routes and TenantPanelProvider behavior remain forbidden. +- **Data Ownership**: + - no new application persistence is introduced + - no runtime source of truth is introduced + - `failure-classification.md` is a spec-local implementation artifact and is not product/runtime truth + - existing test lane truth remains in `apps/platform/tests/Support/TestLaneManifest.php`, `apps/platform/tests/Support/TestLaneReport.php`, and the wrapper scripts under `scripts/` +- **RBAC**: + - no authorization model changes are introduced + - existing workspace and managed-environment isolation tests remain ordinary suite participants + - if a failing group concerns RBAC, it must be classified as product/runtime debt or a follow-up spec unless it is clearly only a stale CI/lane assertion + +For canonical-view specs, the spec MUST define: + +- **Default filter behavior when tenant-context is active**: N/A - no canonical-view application surface is added or changed. +- **Explicit entitlement checks preventing cross-tenant leakage**: N/A for this prep package. Any suite failure suggesting leakage must be classified as product-runtime debt and not hidden as a lane issue. + +## Cross-Cutting / Shared Pattern Reuse *(mandatory when the feature touches notifications, status messaging, action links, header actions, dashboard signals/cards, alerts, navigation entry points, evidence/report viewers, or any other existing shared operator interaction family; otherwise write `N/A - no shared interaction family touched`)* + +- **Cross-cutting feature?**: yes +- **Interaction class(es)**: CI lane execution, full-suite signal classification, lane report generation, artifact publication, budget/trend baseline review, and follow-up-spec routing +- **Systems touched**: + - `scripts/platform-test-lane` + - `scripts/platform-test-report` + - `scripts/platform-test-artifacts` + - `apps/platform/composer.json` + - `apps/platform/tests/Support/TestLaneManifest.php` + - `apps/platform/tests/Support/TestLaneReport.php` + - `apps/platform/tests/Support/TestLaneBudget.php` + - `apps/platform/tests/Feature/Guards/TestLaneManifestTest.php` + - `apps/platform/tests/Feature/Guards/CiLaneFailureClassificationContractTest.php` + - `apps/platform/tests/Feature/Guards/CiFastFeedbackWorkflowContractTest.php` + - `apps/platform/tests/Feature/Guards/CiConfidenceWorkflowContractTest.php` + - `apps/platform/tests/Feature/Guards/CiHeavyBrowserWorkflowContractTest.php` + - existing lane-selected Pest tests and browser smoke files only as classification inputs unless a small CI-signal fix is proven +- **Existing pattern(s) to extend**: existing `TestLaneManifest` lane definitions, existing `TestLaneReport` failure classes, existing lane wrapper scripts, existing Gitea workflow profile metadata, existing report/artifact publication contracts +- **Shared contract / presenter / builder / renderer to reuse**: `TestLaneManifest::lanes()`, `TestLaneManifest::workflowProfiles()`, `TestLaneManifest::failureClasses()`, `TestLaneReport::classifyPrimaryFailure()`, `TestLaneReport::buildCiSummary()`, `TestLaneReport::artifactPublicationStatus()`, and `scripts/platform-test-*` +- **Why the existing shared path is sufficient or insufficient**: the repo already has explicit lane, failure-class, artifact, and budget contracts. Spec `295` must prove whether they are currently enough and fix only small contract drift; it must not create a new CI orchestration layer before existing contracts are classified. +- **Allowed deviation and why**: only a bounded CI/lane contract correction is allowed when a wrapper, manifest, report, artifact, or budget baseline defect prevents classification. Product/runtime failures must be classified and split instead of fixed here. +- **Consistency impact**: raw suite output, lane wrapper output, report artifacts, budget/trend summaries, and final follow-up classification must tell the same story about whether the suite is green, blocked, flaky, or split. +- **Review focus**: reviewers must verify that this spec does not become a general failing-test cleanup, does not restore tenant-cutover legacy behavior, and does not add a new permanent lane unless the artifacts explicitly prove existing lanes are insufficient. + +## OperationRun UX Impact *(mandatory when the feature creates, queues, deduplicates, resumes, blocks, completes, or deep-links to an `OperationRun`; otherwise write `N/A - no OperationRun start or link semantics touched`)* + +- **Touches OperationRun start/completion/link UX?**: no +- **Shared OperationRun UX contract/layer reused**: N/A +- **Delegated start/completion UX behaviors**: N/A +- **Local surface-owned behavior that remains**: N/A +- **Queued DB-notification policy**: N/A +- **Terminal notification path**: N/A +- **Exception required?**: none + +## Provider Boundary / Platform Core Check *(mandatory when the feature changes shared provider/platform seams, identity scope, governed-subject taxonomy, compare strategy selection, provider connection descriptors, or operator vocabulary that may leak provider-specific semantics into platform-core truth; otherwise write `N/A - no shared provider/platform boundary touched`)* + +- **Shared provider/platform boundary touched?**: no product boundary change +- **Boundary classification**: N/A +- **Seams affected**: provider and verification tests may fail during classification, but this spec may only classify them as regression or follow-up debt unless the failure is purely a CI/lane contract issue. +- **Neutral platform terms preserved or introduced**: `workspace`, `managed environment`, `provider connection`, `operation`, `lane`, `failure group`, `CI signal` +- **Provider-specific semantics retained and why**: N/A +- **Why this does not deepen provider coupling accidentally**: Spec `295` does not change provider runtime, provider identity, target-scope semantics, or provider copy. It treats provider-specific failures as test/runtime debt requiring explicit follow-up unless they are already covered by the completed Spec `294` seam and proven to be a small regression in the CI contract. +- **Follow-up path**: any real provider/verification product failure after Spec `294` must become a follow-up spec or explicitly named failure group, not hidden in this classification pass. + +## UI / Surface Guardrail Impact *(mandatory when operator-facing surfaces are changed; otherwise write `N/A`)* + +N/A - no operator-facing surface change. Browser tests may be run as a lane signal only; visible UI repair is out of scope unless a later implementation explicitly stops and opens a follow-up spec. + +## Decision-First Surface Role *(mandatory when operator-facing surfaces are changed)* + +N/A - no application decision surface is added or changed. + +## Audience-Aware Disclosure *(mandatory when operator-facing surfaces are changed)* + +N/A - no application disclosure layer is added or changed. + +## UI/UX Surface Classification *(mandatory when operator-facing surfaces are changed)* + +N/A - no Filament screen, table, widget, relation manager, or resource is added or materially refactored. + +## Operator Surface Contract *(mandatory when operator-facing surfaces are changed)* + +N/A - no operator-facing page contract is introduced. + +## Proportionality Review *(mandatory when structural complexity is introduced)* + +- **New source of truth?**: no runtime source of truth +- **New persisted entity/table/artifact?**: no application persistence; one spec-local `failure-classification.md` artifact is added for implementation tracking only +- **New abstraction?**: no +- **New enum/state/reason family?**: yes, one spec-local failure-classification category set used only inside this spec package +- **New cross-domain UI framework/taxonomy?**: no +- **Current operator problem**: maintainers need one reliable answer to whether the full suite is a usable CI signal after Specs `293` and `294`, and if not, exactly which lane or follow-up owns the remaining failures. +- **Existing structure is insufficient because**: targeted green lanes do not prove full-suite readiness, while raw red output without classification does not tell maintainers whether to fix, split, rerun, or update lane baseline artifacts. +- **Narrowest correct implementation**: add one spec-local failure-classification artifact, use existing lane wrappers and support classes, classify all remaining groups, and fix only small CI-signal defects that block classification. +- **Ownership cost**: low to moderate; maintain one temporary classification artifact and any small lane contract correction made during implementation. +- **Alternative intentionally rejected**: a new full-suite framework, broad test rewrite, or permanent new lane. Those options import durable complexity before the existing lane system is proven insufficient. +- **Release truth**: current-release CI/test-governance readiness only + +### Compatibility posture + +This feature assumes a pre-production environment. + +Backward compatibility, legacy aliases, route shims, TenantPanelProvider restoration, and compatibility-specific tests are out of scope. Canonical replacement remains preferred over preservation. + +## Testing / Lane / Runtime Impact *(mandatory for runtime behavior changes)* + +- **Test purpose / classification**: Heavy-Governance, Feature, Browser, Support/JUnit, and full-suite classification +- **Validation lane(s)**: raw full suite, fast-feedback, confidence, heavy-governance, browser, profiling/support when needed, junit/report/artifact publication when needed +- **Why this classification and these lanes are sufficient**: the goal is not one feature behavior. The proving purpose is whether the complete platform suite and existing CI lanes produce a trustworthy pass/fail signal after the known stabilization work. +- **New or expanded test families**: none by default. Any new test must be limited to a small CI/lane contract guard if a wrapper/report/artifact regression is proven. +- **Fixture / helper cost impact**: no new expensive fixture defaults are allowed. If fixture drift appears in the full suite, classify it by failing family and split to follow-up unless a one-line lane/guard baseline is the direct cause. +- **Heavy-family visibility / justification**: explicit. Heavy-governance and browser lanes are signal inputs, not automatic repair ownership. +- **Special surface test profile**: `global-context-shell`, `standard-native-filament`, `shared-detail-family`, `browser-smoke`, `surface-guard`, `discovery-heavy` +- **Standard-native relief or required special coverage**: no UI coverage expansion; browser lane reruns are used only to classify the existing smoke baseline. +- **Reviewer handoff**: reviewers must confirm that Livewire remains v4.0+, Filament remains v5, provider registration stays in `apps/platform/bootstrap/providers.php`, globally searchable resources are not changed, destructive actions are not changed, no assets are registered, every remaining failure is classified, and any in-scope fix is tied directly to a CI/lane contract defect. +- **Budget / baseline / trend impact**: the classification may update the documented status of budget or trend baseline drift, but it must not silently relax lane budgets or create a new baseline without an explicit row in `failure-classification.md`. +- **Escalation needed**: `document-in-feature` for contained lane baseline findings; `follow-up-spec` for product/runtime failures, fixture-family debt, new heavy cost centers, browser fallout, or any repair that exceeds CI/lane contract correction. +- **Active feature PR close-out entry**: `FullSuiteClassification` +- **Planned validation commands**: + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && git status --short --branch` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && git diff --stat` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail artisan test --compact)` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane fast-feedback` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane confidence` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane heavy-governance` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane browser` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report fast-feedback` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report confidence` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report heavy-governance` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report browser` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane junit` + - `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent)` + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Classify the Full Suite Before Any Repair (Priority: P1) + +As a maintainer, I want the complete platform suite run or explicit fallback lane split classified before any fixes so the project knows whether CI is green, blocked, flaky, or split into follow-up work. + +**Why this priority**: without classification first, Spec `295` would become an uncontrolled full-suite repair pass. + +**Independent Test**: Run the raw full suite or fallback lane split and prove every failing group has exactly one category, one seam, one owner/follow-up decision, and one status row in `failure-classification.md`. + +**Acceptance Scenarios**: + +1. **Given** the repo after Specs `293` and `294`, **When** the raw full suite passes, **Then** `failure-classification.md` records `ci-signal-restored` with the command, date, and pass counts. +2. **Given** the raw full suite fails, **When** the failure groups are reviewed, **Then** each group is classified before any repair is attempted. +3. **Given** a failing group points at `/admin/t/...`, TenantPanelProvider, or legacy tenant route behavior, **When** it is classified, **Then** the remedy must not restore that behavior and must be split or fixed only through current workspace-first truth. + +--- + +### User Story 2 - Validate CI Lane and Artifact Signal (Priority: P1) + +As a maintainer, I want each existing CI lane wrapper, report, artifact, and failure class to produce a trustworthy signal so Gitea CI failures can be interpreted without reading raw terminal output first. + +**Why this priority**: a green or red Pest run is not enough if wrapper, report, artifact, budget, or failure-class summaries are stale. + +**Independent Test**: Run the existing lane wrappers and report commands, then verify each lane either passes with complete artifacts or fails with the correct primary failure class. + +**Acceptance Scenarios**: + +1. **Given** a lane fails because tests fail, **When** its report summary is generated, **Then** the primary failure class is `test-failure` rather than wrapper, artifact, or infrastructure failure. +2. **Given** a lane wrapper or manifest no longer resolves to the intended lane, **When** the lane is classified, **Then** it is marked `ci-wrapper-or-manifest-regression` and may be fixed in `295`. +3. **Given** required report artifacts are missing after a lane run, **When** publication is checked, **Then** it is classified as `artifact-publication-regression` and may be fixed in `295`. + +--- + +### User Story 3 - Split Product Failures Instead of Absorbing Them (Priority: P1) + +As a maintainer, I want remaining product/runtime failures to become explicit follow-up ownership instead of being silently fixed under a CI-baseline spec. + +**Why this priority**: this protects scope discipline and keeps test-governance decisions attributable. + +**Independent Test**: Review every non-CI failure group and prove it either has a targeted follow-up recommendation or is demonstrably flaky/environmental. + +**Acceptance Scenarios**: + +1. **Given** a failing group requires a runtime product fix, **When** classification finishes, **Then** it is marked `follow-up-spec-required` or `product-runtime-or-test-regression` and not repaired under `295` unless the user explicitly starts that implementation scope later. +2. **Given** a failing group belongs to browser-only behavior, **When** classification finishes, **Then** it is marked `browser-lane-regression` with the existing smoke file and follow-up path. +3. **Given** a failing group disappears on rerun or is environment-specific, **When** classification finishes, **Then** it is marked `flaky-or-environment` with rerun evidence instead of treated as restored CI. + +--- + +### User Story 4 - Publish the Final CI Readiness Decision (Priority: P2) + +As a maintainer, I want a final readiness statement that says whether the full suite can be used as a CI baseline now, and what exact follow-up remains if it cannot. + +**Why this priority**: the output must be actionable for future specs and Gitea workflows, not just a local debugging note. + +**Independent Test**: Inspect `failure-classification.md`, lane report outputs, and final validation commands to confirm there are no unclassified failure groups and no hidden scope expansion. + +**Acceptance Scenarios**: + +1. **Given** all raw suite and lane signals pass, **When** close-out is prepared, **Then** the readiness decision is `restored-ci-signal`. +2. **Given** any group remains red, **When** close-out is prepared, **Then** the readiness decision is `classified-follow-up-required` and each group has an owner/follow-up. +3. **Given** a small CI/lane contract fix was applied, **When** final validation runs, **Then** the directly affected lane/report/artifact guard passes and unrelated failures remain classified rather than hidden. + +### Edge Cases + +- The raw full suite times out or produces output too large to classify directly. +- A lane passes tests but fails report or artifact publication. +- A lane fails only because budget/trend baselines drifted, not because tests failed. +- Browser lane failures expose stale screenshots or environment-specific browser state. +- A failure appears to touch Spec `293` or `294` seams but would require reopening retired legacy behavior. +- A failure disappears on rerun, suggesting flaky or environment-only behavior. +- A small lane manifest fix changes which tests run in a lane, which could accidentally widen CI cost. + +## Requirements *(mandatory)* + +**Constitution alignment (required):** This spec introduces no Microsoft Graph calls, no write/change behavior, no long-running application work, and no new `OperationRun`. It must preserve workspace/tenant isolation expectations while classifying test failures. Any failure suggesting isolation, RBAC, or audit regressions must be classified as product/runtime debt and not hidden as a CI wrapper issue. + +**Constitution alignment (PROP-001 / ABSTR-001 / PERSIST-001 / STATE-001 / BLOAT-001):** The only structural addition is one spec-local failure-classification vocabulary and artifact. It solves the current CI readiness problem after two stabilization specs; no runtime persistence, CI framework, test engine, or new lane abstraction is introduced. + +**Constitution alignment (TEST-GOV-001):** Spec `295` must explicitly classify the proving purpose of every lane run, preserve the existing lane family boundaries, keep expensive fixture/context setup opt-in, and end with one review outcome: `keep`, `split`, `document-in-feature`, `follow-up-spec`, or `reject-or-split`. + +### Functional Requirements + +- **FR-295-001**: The implementation MUST run the raw full suite once when feasible using `cd apps/platform && ./vendor/bin/sail artisan test --compact`. +- **FR-295-002**: If the raw full suite is too slow, noisy, or environment-blocked to classify reliably, the implementation MUST run the explicit fallback lane split: `fast-feedback`, `confidence`, `heavy-governance`, and `browser`. +- **FR-295-003**: Every failing group MUST be recorded in `failure-classification.md` with exactly one pinned category, one pinned seam, observed command, candidate owner, fix-in-295 decision, follow-up decision, and status. +- **FR-295-004**: Lane wrapper, report, artifact, budget, and failure-class problems MAY be fixed in `295` only when the failure is clearly isolated to `scripts/platform-test-lane`, `scripts/platform-test-report`, `scripts/platform-test-artifacts`, `TestLaneManifest`, `TestLaneReport`, `TestLaneBudget`, or their guard tests. +- **FR-295-005**: Product/runtime failures MUST NOT be repaired under `295` unless they are also a small, proven CI/lane contract defect; otherwise they must be assigned to a follow-up spec or classified as unrelated existing debt. +- **FR-295-006**: Any failure related to Specs `293` or `294` MUST be classified without rewriting those completed specs or restoring legacy behavior. +- **FR-295-007**: The implementation MUST NOT restore TenantPanelProvider, `/admin/t/...`, tenant-scoped provider fallback routes, or other retired cutover behavior. +- **FR-295-008**: The implementation MUST validate existing lane failure classes: `test-failure`, `wrapper-failure`, `budget-breach`, `artifact-publication-failure`, and `infrastructure-failure`. +- **FR-295-009**: The implementation MUST produce a final CI readiness decision in `failure-classification.md`: `restored-ci-signal`, `classified-follow-up-required`, or `blocked-by-environment`. +- **FR-295-010**: Any new or changed tests MUST be limited to CI/lane contract proof and must use Pest. + +### Non-Functional Requirements + +- **NFR-295-001**: No new runtime persistence, queue, model, service abstraction, provider registry, Filament resource, or browser family is introduced. +- **NFR-295-002**: Test lane classification must follow actual proving purpose, not file location. +- **NFR-295-003**: Existing lane budget and trend baselines must not be relaxed silently. +- **NFR-295-004**: Classification output must be concise enough for future implementers to route work without re-running the entire suite first. +- **NFR-295-005**: The final package must preserve Filament v5 / Livewire v4 compatibility and must not change panel provider registration. + +## Key Entities *(include if feature involves data)* + +- **Failure Group**: one failing test file, failing assertion cluster, wrapper error, artifact error, budget breach, or environment failure sharing one cause and one owner. +- **CI Lane Signal**: the pass/fail/report/artifact/budget outcome for one lane in `TestLaneManifest`. +- **Classification Decision**: the spec-local row assigning one category, seam, owner, fix-in-295 decision, and follow-up path. +- **Readiness Decision**: the final status of the full suite and lane baseline after classification. + +## Success Criteria *(mandatory)* + +- **SC-295-001**: `failure-classification.md` exists and contains the pinned category and seam definitions. +- **SC-295-002**: Raw full suite output or fallback lane split output is represented by classified groups with no unclassified red group remaining. +- **SC-295-003**: Existing lane wrappers and report/artifact contracts either pass or have a classified failure class and fix/follow-up decision. +- **SC-295-004**: No implementation step restores TenantPanelProvider, `/admin/t/...`, or retired tenant-scoped fallback behavior. +- **SC-295-005**: The final readiness decision is explicit and actionable: `restored-ci-signal`, `classified-follow-up-required`, or `blocked-by-environment`. +- **SC-295-006**: If a product/runtime failure remains, the classification identifies a separate follow-up owner instead of treating the full suite as green. + +## Assumptions + +- Specs `293` and `294` have completed the targeted stabilization work described by the user and are context only. +- The repo's existing Gitea-compatible lane system remains the preferred CI shape. +- Local implementation will use Sail-first commands unless a non-Docker fallback is explicitly needed. +- Full-suite execution may be expensive; lane split is an allowed fallback only when the raw full suite is not classifiable. + +## Risks + +- Full-suite output may be too large or slow to classify directly. +- Environment-specific Sail/browser failures may obscure real suite status. +- A tempting product fix may be small locally but still outside this CI-baseline scope. +- Budget/trend drift may be real but not appropriate to fix by silently raising thresholds. +- Multiple failing groups may share a fixture root cause and need careful grouping to avoid duplicate follow-up specs. + +## Open Questions + +- None blocking preparation. During implementation, actual failing groups determine whether follow-up specs are needed. diff --git a/specs/295-full-suite-ci-baseline/tasks.md b/specs/295-full-suite-ci-baseline/tasks.md new file mode 100644 index 00000000..d6d2a29f --- /dev/null +++ b/specs/295-full-suite-ci-baseline/tasks.md @@ -0,0 +1,173 @@ +# Tasks: Full Suite Failure Classification & CI Lane Baseline + +**Input**: Design documents from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/` +**Prerequisites**: `spec.md`, `plan.md`, `research.md`, `data-model.md`, `quickstart.md`, `failure-classification.md`, `checklists/requirements.md` + +**Review Artifact**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/checklists/requirements.md` +**Failure Inventory**: `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` + +## Review Metadata + +- **Review outcome class**: `acceptable-special-case` +- **Workflow outcome**: `keep` +- **Test-governance outcome**: `keep` +- **Stop / split triggers**: broad product/runtime repair, new CI framework, new permanent lane, new browser family, new heavy-governance family, runtime application changes, Filament resource/page changes, route restoration, TenantPanelProvider restoration, `/admin/t/...` restoration, provider/verification runtime expansion, historical-spec rewrite, or budget relaxation without classification evidence + +## Pinned Failure-Classification Categories + +- `ci-signal-restored` +- `ci-wrapper-or-manifest-regression` +- `artifact-publication-regression` +- `budget-or-trend-baseline-drift` +- `product-runtime-or-test-regression` +- `browser-lane-regression` +- `flaky-or-environment` +- `follow-up-spec-required` +- `resolved-or-not-needed` + +## Pinned CI / Suite Seams + +- `raw-full-suite` +- `fast-feedback-lane` +- `confidence-lane` +- `heavy-governance-lane` +- `browser-lane` +- `profiling-or-junit-support` +- `lane-reporting` +- `artifact-publication` +- `budget-trend-baseline` +- `legacy-cutover-regression-guard` +- `provider-verification-regression-guard` + +## Test Governance Checklist + +- [x] Lane assignment is named and is the narrowest sufficient proof for each observed failure group. +- [x] New or changed tests stay in the smallest honest family, and any heavy-governance or browser addition is explicit. +- [x] Shared helpers, factories, seeds, fixtures, and context defaults stay cheap by default; any widening is isolated or documented. +- [x] Planned validation commands cover the change without pulling in unrelated lane cost beyond classification. +- [x] The declared surface test profile or `standard-native-filament` relief is explicit. +- [x] Any material budget, baseline, trend, or escalation note is recorded in `failure-classification.md`. + +## Phase 1: Setup and Scope Lock + +**Purpose**: Confirm Spec `295` remains a classification and CI lane baseline package before any suite command runs. + +- [x] T001 Review `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/spec.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/plan.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/research.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/data-model.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/quickstart.md`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md`, and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/checklists/requirements.md` before changing runtime or tests +- [x] T002 [P] Confirm current branch, working tree, and baseline diff using `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && git status --short --branch` and `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && git diff --stat`, then record any pre-existing changes in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T003 [P] Inspect `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/293-post-cutover-suite-stabilization/failure-classification.md` and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/294-provider-verification-runtime-semantics/failure-classification.md` as context only, confirming no task edits are made to Specs `293` or `294` +- [x] T004 [P] Inspect `/Users/ahmeddarrazi/Documents/projects/wt-plattform/scripts/platform-test-lane`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/scripts/platform-test-report`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/scripts/platform-test-artifacts`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/composer.json`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneManifest.php`, and `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneReport.php` to confirm current lane entry points and failure classes +- [x] T005 Confirm the explicit forbidden scope in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md`: no TenantPanelProvider restoration, no `/admin/t/...` restoration, no broad product repair, and no historical-spec rewrite + +--- + +## Phase 2: User Story 1 - Classify the Full Suite Before Any Repair (Priority: P1) + +**Goal**: Establish the raw full-suite readiness signal or an explicit fallback split before any fix work begins. + +**Independent Test**: the raw full-suite result or fallback lane split is represented by classified rows in `failure-classification.md`, with no red group left unclassified. + +- [x] T006 [US1] Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail artisan test --compact)` and record pass/fail counts, failing files, and any timeout/noisy-output reason in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T007 [US1] If T006 cannot produce a classifiable result, run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane fast-feedback`, `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane confidence`, `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane heavy-governance`, and `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane browser`, then record each lane outcome in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T008 [US1] Group every failing test file, assertion cluster, wrapper error, report error, artifact error, budget breach, or environment issue into one row in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` with exactly one pinned category and one pinned seam +- [x] T009 [US1] Classify any legacy route or panel-related group under `legacy-cutover-regression-guard` without restoring `/admin/t/...`, TenantPanelProvider, tenant-scoped provider fallback routes, or historical compatibility behavior +- [x] T010 [US1] Classify any provider/verification group under `provider-verification-regression-guard` without rewriting Spec `294`; only mark it in-scope if the failure is a direct CI/lane contract defect rather than provider runtime behavior + +--- + +## Phase 3: User Story 2 - Validate CI Lane and Artifact Signal (Priority: P1) + +**Goal**: Prove existing CI wrappers, reports, artifacts, budgets, and failure classes are interpretable after the suite run. + +**Independent Test**: every lane either passes with complete report/artifact output or fails with the correct primary failure class. + +- [x] T011 [US2] Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report fast-feedback` and classify report, budget, trend, and artifact status in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T012 [US2] Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report confidence` and classify report, budget, trend, and artifact status in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T013 [US2] Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report heavy-governance` and classify report, budget, trend, and artifact status in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T014 [US2] Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-report browser` and classify report, budget, trend, and artifact status in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T015 [P] [US2] If machine-readable confidence output is needed for follow-up ownership, run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-lane junit` and classify the JUnit support result in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` (not run separately because the lane wrappers produced the needed JUnit artifacts) +- [x] T016 [P] [US2] If artifact publication is suspected, run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && ./scripts/platform-test-artifacts fast-feedback /tmp/tenantpilot-fast-feedback-artifacts` or the matching affected lane and classify any missing required artifacts under `artifact-publication-regression` +- [x] T017 [US2] Verify existing failure classes from `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneReport.php` classify lane outcomes as `test-failure`, `wrapper-failure`, `budget-breach`, `artifact-publication-failure`, or `infrastructure-failure`, and record mismatches in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` + +--- + +## Phase 4: User Story 3 - Split Product Failures Instead of Absorbing Them (Priority: P1) + +**Goal**: Keep Spec `295` limited to CI signal readiness by splitting product/runtime failures into explicit follow-up ownership. + +**Independent Test**: every non-CI failure group has a follow-up recommendation, owner, or environment disposition. + +- [x] T018 [US3] For each row classified as `product-runtime-or-test-regression`, decide whether it is a follow-up spec, lane-specific debt, or active feature blocker, then record the decision in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T019 [US3] For each row classified as `browser-lane-regression`, record the affected browser file under `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Browser/`, whether the failure is smoke/environment/product behavior, and the follow-up path in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` +- [x] T020 [US3] For each row classified as `flaky-or-environment`, rerun the narrowest affected command once when safe and record the rerun evidence or environment blocker in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` (no flaky/environment row was identified) +- [x] T021 [US3] Confirm no failure group is being fixed under `295` solely because it is small or nearby; it must be directly tied to CI wrapper, manifest, report, artifact, or budget/trend contract drift + +--- + +## Phase 5: User Story 4 - Apply Only Small CI-Signal Fixes (Priority: P2) + +**Goal**: Correct narrow CI/lane contract defects only when classification proves they block a trustworthy CI signal. + +**Independent Test**: the directly affected lane/report/artifact guard passes after the minimal fix, and unrelated red groups remain classified. + +- [x] T022 [US4] If a `ci-wrapper-or-manifest-regression` row is proven, apply the minimal correction in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/scripts/platform-test-lane`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/composer.json`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneManifest.php`, or the directly affected guard test under `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Feature/Guards/` (not applicable: no `ci-wrapper-or-manifest-regression` row was proven) +- [x] T023 [US4] If an `artifact-publication-regression` row is proven, apply the minimal correction in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/scripts/platform-test-artifacts`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneReport.php`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneManifest.php`, or the directly affected artifact guard test +- [x] T024 [US4] If a `budget-or-trend-baseline-drift` row is proven, update only the documented budget/trend baseline owner in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneBudget.php`, `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Support/TestLaneManifest.php`, or the directly affected guard test when the classification row explains why the evidence supports the change (not applicable: no budget/trend baseline rewrite was justified) +- [x] T025 [US4] Add or adjust Pest coverage only when a CI/lane contract defect was fixed, keeping tests under `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Feature/Guards/` or `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/tests/Unit/Support/` and avoiding new browser/heavy families by default +- [x] T026 [US4] Re-run the narrowest affected lane/report/artifact command after any CI/lane fix and update `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` with the final status + +--- + +## Phase 6: Final Readiness Decision and Validation + +**Purpose**: Publish one final CI readiness decision and prove no unclassified failure or hidden scope expansion remains. + +- [x] T027 Review `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` and confirm every row has category, seam, observed command, candidate owner, fix-in-295 decision, follow-up, and status +- [x] T028 Set the final readiness decision in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/specs/295-full-suite-ci-baseline/failure-classification.md` to exactly one of `restored-ci-signal`, `classified-follow-up-required`, or `blocked-by-environment` +- [x] T029 Re-run the final narrowest proof command set for the decision: raw full suite if classifiable, otherwise the exact affected lane/report commands from Phases 2 through 5 +- [x] T030 Run `export PATH="/bin:/usr/bin:/usr/local/bin:$PATH" && (cd apps/platform && ./vendor/bin/sail bin pint --dirty --format agent)` if any PHP or script-adjacent PHP files changed +- [x] T031 Confirm Filament remains v5 on Livewire v4, provider registration remains in `/Users/ahmeddarrazi/Documents/projects/wt-plattform/apps/platform/bootstrap/providers.php`, no globally searchable resource changed, no destructive action changed, no asset registration changed, no `/admin/t/...` route or TenantPanelProvider behavior was restored, and no Specs `293` or `294` artifact was rewritten + +## Dependencies & Execution Order + +- **Phase 1** must complete before any suite command. +- **Phase 2** must classify raw suite or fallback lane output before any fix work. +- **Phase 3** depends on Phase 2 because lane reports must be interpreted against observed lane outcomes. +- **Phase 4** depends on the failure group inventory from Phases 2 and 3. +- **Phase 5** depends on classified CI/lane contract defects; skip it entirely if no in-scope CI/lane defect is proven. +- **Phase 6** depends on all classification and any bounded fixes. + +## Parallel Execution Examples + +- T003 and T004 can run in parallel after T001. +- T011 through T014 can run independently after their corresponding lane outputs exist. +- T018 through T020 can be split by failure group once T008 has created the grouped inventory. +- T022 through T024 must not run until a corresponding classification row proves the in-scope defect. + +## Implementation Strategy + +### Suggested MVP Scope + +MVP = Phases 1 through 4. That is enough to answer whether the suite is green or which follow-up owns each red group. Phase 5 runs only when classification proves a narrow CI/lane contract defect. + +### Incremental Delivery + +1. Lock scope and read prior stabilization artifacts. +2. Run raw full suite or fallback lane split. +3. Classify every red group. +4. Validate lane/report/artifact signal. +5. Split product/runtime failures to follow-up ownership. +6. Apply only proven CI/lane fixes. +7. Publish the final readiness decision. + +## Explicit Follow-Ups / Out of Scope + +- Product/runtime failing-test repair outside CI/lane contract defects +- Browser UI repair +- Package Execution +- Guided Operations +- Microsoft Starter Pack +- Virtual Consultant +- Tenant cutover rework +- Provider/verification runtime expansion beyond Spec `294` +- New permanent CI lane or framework +- Historical-spec cleanup