From e1cfb25de626c854186120cee4016ed4d8f5d916 Mon Sep 17 00:00:00 2001 From: Ahmed Darrazi Date: Fri, 15 May 2026 22:39:07 +0200 Subject: [PATCH] docs --- Agents.md | 5 + docs/AGENTS-draft.md | 50 ++++++++ docs/ai-coding-rules.md | 94 +++++++++++++++ docs/architecture-guidelines.md | 177 +++++++++++++++++++++++++++ docs/deployment-checklist.md | 102 ++++++++++++++++ docs/filament-guidelines.md | 153 ++++++++++++++++++++++++ docs/package-governance.md | 84 +++++++++++++ docs/performance-guidelines.md | 101 ++++++++++++++++ docs/security-guidelines.md | 137 +++++++++++++++++++++ docs/stack-overview.md | 206 ++++++++++++++++++++++++++++++++ docs/testing-guidelines.md | 113 ++++++++++++++++++ 11 files changed, 1222 insertions(+) create mode 100644 docs/AGENTS-draft.md create mode 100644 docs/ai-coding-rules.md create mode 100644 docs/architecture-guidelines.md create mode 100644 docs/deployment-checklist.md create mode 100644 docs/filament-guidelines.md create mode 100644 docs/package-governance.md create mode 100644 docs/performance-guidelines.md create mode 100644 docs/security-guidelines.md create mode 100644 docs/stack-overview.md create mode 100644 docs/testing-guidelines.md diff --git a/Agents.md b/Agents.md index e092c426..9e5b6a9a 100644 --- a/Agents.md +++ b/Agents.md @@ -378,9 +378,14 @@ ## AI Usage Note All AI agents must read: - `AGENTS.md` - `.specify/*` +- `docs/ai-coding-rules.md` +- the relevant guideline file under `docs/*-guidelines.md` before proposing or implementing changes. +For the current enterprise best-practice baseline and the proposed compact addendum +for this file, see `docs/stack-overview.md` and `docs/AGENTS-draft.md`. + ## Reference Materials - PowerShell scripts from IntuneManagement are stored under `/references/IntuneManagement-master` for implementation guidance only. diff --git a/docs/AGENTS-draft.md b/docs/AGENTS-draft.md new file mode 100644 index 00000000..6138a918 --- /dev/null +++ b/docs/AGENTS-draft.md @@ -0,0 +1,50 @@ +# AGENTS.md Draft Addendum + +This draft is intentionally short. It is meant to be merged into the root `AGENTS.md` after review, not to replace the existing Spec Kit workflow and multi-agent coordination rules. + +## Enterprise Rule Sources + +Agents must treat these files as project rules: + +- `docs/stack-overview.md` +- `docs/architecture-guidelines.md` +- `docs/filament-guidelines.md` +- `docs/security-guidelines.md` +- `docs/testing-guidelines.md` +- `docs/performance-guidelines.md` +- `docs/deployment-checklist.md` +- `docs/package-governance.md` +- `docs/ai-coding-rules.md` + +## Version Rules + +- Laravel 12, Filament 5, Livewire 4, Tailwind CSS 4, Pest 4, PHPUnit 12, PostgreSQL 16. +- Do not use Filament v3/v4 or Livewire v3 APIs. +- Laravel 12 panel providers are registered in `bootstrap/providers.php`. +- Search official version-specific docs before changing Laravel, Filament, Livewire, Pest, Tailwind, or PostgreSQL behavior. + +## Code Rules + +- Keep Filament resources/pages thin. Extract non-trivial business logic into actions/services/jobs. +- All Graph calls go through `GraphClientInterface`. +- No Graph calls during UI rendering. +- Every tenant-owned query is workspace- and tenant-scoped. +- Every new resource-backed model has a policy or a documented exception. +- Every destructive/high-impact Filament action has confirmation, authorization, audit logging, notification, and tests. +- New JSON payload storage defaults to PostgreSQL JSONB when it is retained or queryable. +- Jobs must be idempotent, observable, retry-aware, and safe to re-run. + +## Security and Testing Rules + +- Run `composer audit` and `corepack pnpm audit --audit-level moderate` for dependency work and before release. +- Use Pest 4 and Filament action testing helpers. +- Use the PostgreSQL test lane for migrations, JSONB, partial indexes, locks, and tenant-isolation constraints. +- Never log secrets, tokens, credential payloads, or raw sensitive Graph payloads. + +## Deployment Rules + +- Sail-first locally; Dokploy-first for staging/production. +- Production must set `APP_DEBUG=false`. +- Production workers use supervised `queue:work`, not `queue:listen`. +- Deployments that use Filament registered assets must run `php artisan filament:assets`. +- Staging validation is required before production promotion. diff --git a/docs/ai-coding-rules.md b/docs/ai-coding-rules.md new file mode 100644 index 00000000..abdaef3f --- /dev/null +++ b/docs/ai-coding-rules.md @@ -0,0 +1,94 @@ +# TenantPilot AI Coding Rules + +Status: 2026-05-15 +Use for: Codex, Claude, Cursor, Windsurf, and other AI coding agents. + +## Stack + +- PHP 8.4.15 runtime; Composer constraint currently `^8.2`. +- Laravel 12.52. +- Filament 5.2.1 with required upgrade target >=5.3.5 because of advisory. +- Livewire 4.1.4. Never propose Livewire v3 APIs. +- Tailwind CSS 4.2.2 with Vite 7.3.2. +- PostgreSQL 16 via Sail/Dokploy. +- Pest 4.3 and PHPUnit 12. +- Queue/cache default to database locally; Redis is available. + +## Mandatory First Reads + +- `AGENTS.md` +- `.specify/memory/constitution.md` +- Relevant `specs/-/spec.md`, `plan.md`, `tasks.md` +- `docs/research/filament-v5-notes.md` for Filament uncertainty +- Relevant guideline file in `docs/*-guidelines.md` + +## Forbidden APIs and Patterns + +- No Filament v3/v4 APIs. +- No Livewire v3 references. +- No panel providers in `bootstrap/app.php`; Laravel 12 panel providers belong in `bootstrap/providers.php`. +- No Graph calls outside `GraphClientInterface`. +- No Graph calls during UI render. +- No destructive action without `->action(...)`, `->requiresConfirmation()`, authorization, audit, and test. +- No business-critical logic buried in Filament closures. +- No public/local uploaded filenames from `preserveFilenames()` without approved mitigation. +- No new persisted entity, enum/status family, abstraction, or taxonomy without constitution proportionality review. +- No new package without package-governance review. + +## Architecture Rules + +- Filament is UI composition. +- Services/actions own business behavior. +- Jobs own remote/long-running work. +- Policies/gates own authorization. +- Models own persistence relationships, casts, scopes, and small invariants. +- Migrations own integrity through constraints and indexes. +- Tenant-owned data must always be workspace- and tenant-scoped. + +## Filament Rules + +- Every globally searchable resource needs a View or Edit page and `$recordTitleAttribute`; otherwise disable global search. +- Tables need default sort, domain-specific empty state, and query-safe columns. +- Relationship columns require eager loading or query optimization. +- Use `UiEnforcement` or `WorkspaceUiEnforcement` for capability-aware action state. +- Prefer render hooks and CSS hook classes over publishing Filament internals. +- Add `php artisan filament:assets` to deployment when Filament assets are registered. + +## Testing Rules + +- Use Pest 4. +- Test pages/relation managers/widgets as Livewire components. +- Use Filament action helpers for action tests. +- Add policy tests for new policies. +- Add tenant isolation tests for tenant-owned models. +- Use PostgreSQL lane for JSONB, partial indexes, locks, composite FKs, and migrations. +- Browser tests are for critical workflows and JS/visual smoke, not every CRUD page. + +## Security Rules + +- Deny unauthorized workspace/tenant access as not found. +- Never log secrets, tokens, raw credential payloads, or raw sensitive Graph payloads. +- Provider credentials use encrypted storage. +- File uploads default to private disks, random names, MIME/size validation, and tamper prevention when applicable. +- Production requires `APP_DEBUG=false`. +- Dependency audits must be clean or explicitly accepted. + +## Performance Rules + +- Use JSONB for queryable snapshot, backup, restore, evidence, and audit payloads. +- Add indexes for proven query paths only. +- Move Graph, restore, backup, export, and report work to queues. +- Make jobs idempotent and observable through `OperationRun`. +- Honor Microsoft Graph `Retry-After`; use exponential backoff with jitter when missing. + +## Response Format for AI Code Changes + +Every implementation response must state: + +1. Livewire v4 compliance. +2. Panel provider location if Filament panel code changed. +3. Global search status for changed resources. +4. Destructive/high-impact actions and how confirmation + authorization + audit are handled. +5. Asset strategy and whether `filament:assets` is required. +6. Tests run and tests added/updated. +7. Deployment impact: env, migrations, queues, scheduler, storage. diff --git a/docs/architecture-guidelines.md b/docs/architecture-guidelines.md new file mode 100644 index 00000000..562bd7fc --- /dev/null +++ b/docs/architecture-guidelines.md @@ -0,0 +1,177 @@ +# TenantPilot Architecture Guidelines + +Status: 2026-05-15 +Applies to: Laravel 12.52, PHP 8.4, Filament 5.2+, Livewire 4, PostgreSQL 16. + +## Target Architecture + +TenantPilot should remain a Laravel monolith with explicit bounded modules, not a speculative framework. The architecture target is: + +- Filament owns admin UI composition only. +- Domain/application services own Intune, backup, restore, audit, evidence, and permission behavior. +- Jobs own long-running or remote Graph work. +- Policies and gates own authorization. +- Models own persistence relationships, casts, scopes, and small invariants only. +- Migrations own data integrity through foreign keys, unique constraints, partial indexes, and JSONB where queryable. + +This aligns with the constitution: heavy architecture is allowed for tenant isolation, RBAC, auditability, immutable history, queue correctness, credential safety, and compliance evidence; speculative generic layers are not. + +## Current Architecture Signals + +Strong patterns already present: + +- `GraphClientInterface` is the required external Graph seam. +- `UiEnforcement` and `WorkspaceUiEnforcement` centralize UI authorization behavior. +- `OperationRun` provides observable queued operations. +- `ProviderCredential` uses encrypted casts for credential payloads. +- Workspace/tenant isolation migrations add non-null workspace ownership and composite constraints. +- Pest lanes and architecture/governance tests already exist. + +High-risk drift: + +- Large Filament classes concentrate UI, authorization, table configuration, modal logic, dispatching, notifications, and domain workflow glue in one place. +- Some resources use static `can*()` methods instead of dedicated policies, making authorization harder to audit globally. +- Historic JSON columns remain mixed with newer JSONB design. + +## Rules + +- Business logic must not live directly in Filament table/header actions except trivial UI orchestration. +- Every action that creates, mutates, deletes, restores, retries, syncs, dispatches, or exports must call a service/action class or queued job. +- Every new resource-backed model needs a policy, or a documented exception in the feature spec. +- Every tenant-owned query must scope by workspace and managed environment before rendering or mutation. +- Graph calls must never happen during UI render. They must happen in services/jobs through `GraphClientInterface`. +- New abstractions require the constitution proportionality check unless they are security, audit, queue, or isolation-critical. +- Do not add generic provider frameworks until at least two real providers require the variation. +- Prefer extracted builders only when they reduce real review burden. Do not extract one-off schema fragments into a new layer just for style. + +## Refactoring Backlog + +| Target | Problem | Recommendation | Priority | Effort | Risk if ignored | +|---|---|---|---|---:|---| +| `ManagedEnvironmentOnboardingWizard` | 5,748 LOC workflow page | Split into step schema builders, onboarding draft mutation service, and page-only orchestration. | P1 | L | High regression risk in onboarding and RBAC. | +| `ManagedEnvironmentResource` | 3,785 LOC resource | Extract table columns/filters/actions and tenant-scoped domain actions. | P1 | L | Difficult safe review of destructive environment actions. | +| `RestoreRunResource` | 2,779 LOC resource | Extract restore action builders and write-gate composition. | P1 | M | Restore safety logic becomes hard to audit. | +| `FindingResource` | 2,503 LOC resource | Extract bulk exception/assignment workflows. | P2 | M | Slower feature work and fragile tests. | +| `BackupScheduleResource` | repeated run/retry/bulk closures | Extract `StartBackupScheduleRunAction` service. | P1 | M | Duplicate authorization/audit behavior can drift. | + +## Preferred Code Patterns + +### Thin Filament Resource + +```php +use App\Filament\Resources\BackupScheduleResource\Actions\BackupScheduleActions; +use App\Filament\Resources\BackupScheduleResource\Schemas\BackupScheduleForm; +use App\Filament\Resources\BackupScheduleResource\Tables\BackupScheduleTable; +use App\Models\BackupSchedule; +use Filament\Resources\Resource; +use Filament\Schemas\Schema; +use Filament\Tables\Table; + +final class BackupScheduleResource extends Resource +{ + protected static ?string $model = BackupSchedule::class; + + protected static bool $isGloballySearchable = false; + + public static function form(Schema $schema): Schema + { + return BackupScheduleForm::configure($schema); + } + + public static function table(Table $table): Table + { + return BackupScheduleTable::configure($table); + } + + public static function makeRunNowAction(): Action + { + return BackupScheduleActions::runNow(); + } +} +``` + +### Service Action for Business Logic + +```php +namespace App\Actions\BackupSchedules; + +use App\Jobs\RunBackupScheduleJob; +use App\Models\BackupSchedule; +use App\Models\User; +use App\Services\OperationRunService; +use Illuminate\Support\Facades\DB; +use Illuminate\Support\Facades\Gate; + +final class StartBackupScheduleRun +{ + public function __construct( + private readonly OperationRunService $operationRuns, + ) {} + + public function handle(User $actor, BackupSchedule $schedule): int + { + Gate::forUser($actor)->authorize('run', $schedule); + + return DB::transaction(function () use ($schedule, $actor): int { + $run = $this->operationRuns->startBackupScheduleRun($schedule, $actor); + + RunBackupScheduleJob::dispatch($schedule->getKey(), $run->getKey()) + ->onQueue('graph'); + + return (int) $run->getKey(); + }); + } +} +``` + +### Idempotent Job Skeleton + +```php +use App\Models\OperationRun; +use Illuminate\Contracts\Queue\ShouldQueue; +use Illuminate\Foundation\Queue\Queueable; +use Illuminate\Support\Facades\DB; + +final class SyncManagedEnvironmentPoliciesJob implements ShouldQueue +{ + use Queueable; + + public int $tries = 3; + + public int $timeout = 300; + + public function __construct( + private readonly int $operationRunId, + ) {} + + public function handle(): void + { + $run = DB::transaction(function (): OperationRun { + $run = OperationRun::query()->lockForUpdate()->findOrFail($this->operationRunId); + + if ($run->isTerminal()) { + return $run; + } + + $run->markRunning(); + + return $run; + }); + + if ($run->isTerminal()) { + return; + } + + // Graph work happens here through GraphClientInterface-backed services. + } +} +``` + +## Acceptance Standard for New Features + +- Spec/plan/tasks exist when code changes runtime behavior. +- Resource/page logic remains UI-focused. +- Mutations have policy authorization, transaction boundaries where needed, audit logging, and tests. +- Remote work is queued and observable. +- Tenant/workspace isolation is proven by tests. +- PostgreSQL-specific behavior is covered in the PostgreSQL lane. diff --git a/docs/deployment-checklist.md b/docs/deployment-checklist.md new file mode 100644 index 00000000..3521a888 --- /dev/null +++ b/docs/deployment-checklist.md @@ -0,0 +1,102 @@ +# TenantPilot Deployment Checklist + +Status: 2026-05-15 +Target: Sail locally, Dokploy-first staging/production, PostgreSQL, container-based deployment. + +## Production Readiness Checklist + +- Staging environment exists and is the mandatory production gate. +- `APP_ENV=production` and `APP_DEBUG=false`. +- `APP_KEY` is stable, secret, and backed up securely. +- Database is PostgreSQL 16-compatible. +- Storage volumes/private object storage are persistent. +- Queue workers and scheduler are explicitly configured. +- Health check route `/up` is monitored. +- Logs are collected outside the container. +- Backups are encrypted and restore-tested. +- Dependency audits are clean or exceptions are approved. + +## Build and Release Checklist + +1. `cd apps/platform && composer install --no-dev --optimize-autoloader` +2. `cd apps/platform && corepack pnpm install --frozen-lockfile` +3. `cd apps/platform && corepack pnpm build` +4. `cd apps/platform && php artisan filament:assets` +5. `cd apps/platform && php artisan migrate --force` +6. `cd apps/platform && php artisan optimize` +7. Restart or reload long-running services with `php artisan reload` or `php artisan queue:restart` depending on runtime setup. +8. Verify `/up`. +9. Verify login, tenant selection, queue dispatch, and audit write on staging. + +## Queue Worker Checklist + +Do not use `queue:listen` for production workers. + +Recommended baseline: + +```bash +php artisan queue:work database --queue=high,default,graph,restore,reports,notifications --sleep=3 --tries=3 --timeout=300 +``` + +When Redis is enabled: + +```bash +php artisan queue:work redis --queue=high,default,graph,restore,reports,notifications --sleep=3 --tries=3 --timeout=300 +``` + +Rules: + +- Use process supervision so exited workers restart. +- Keep worker `--timeout` lower than queue `retry_after`. +- Reload/restart workers on deploy. +- Track queue depth and failed jobs. +- Run destructive restore/backups in separate queues when volume grows. + +## Scheduler Checklist + +- One scheduler instance per environment. +- Use Laravel scheduler with `withoutOverlapping()` for recurring jobs. +- Monitor last successful scheduler tick and per-command failures. +- Long-running scheduled work dispatches jobs rather than doing Graph work inline. + +## Migration Checklist + +- Review locks and table size before staging. +- Backfill in chunks where needed. +- Avoid irreversible destructive schema changes after production unless forward-only rollback is documented. +- JSON to JSONB conversions need staging timing proof. +- Composite FK and partial index changes need PostgreSQL CI/staging validation. + +## Rollback Checklist + +- Keep previous image available. +- Know whether rollback is code-only or code+schema. +- For forward-only migrations, ship a forward fix instead of unsafe down migration. +- Pause workers before risky rollback if queued payload formats changed. +- Verify audit logs and operation runs remain readable. + +## Backup/Restore Checklist + +- Database backups encrypted. +- Storage backups encrypted. +- Provider credentials excluded from logs and exports. +- Restore tested on staging from a real backup. +- Backup retention and deletion documented. +- Restore runbook includes queue/scheduler coordination. + +## Monitoring Checklist + +- `/up` uptime check. +- Laravel logs and container logs centralized. +- Queue failures and long-running jobs alerted. +- Scheduler missed-run alert. +- Database connections, slow queries, disk, and backup freshness monitored. +- Graph 429/503 rates visible. +- Error tracking integrated before production. + +## Dokploy Notes + +- Treat Dokploy as the process/orchestration layer, not as application governance. +- Ensure web, queue, and scheduler processes are separate service definitions or entrypoints. +- Persist `storage/`, database volumes, and uploaded/private files. +- Do not bake `.env` into images. diff --git a/docs/filament-guidelines.md b/docs/filament-guidelines.md new file mode 100644 index 00000000..a60740a0 --- /dev/null +++ b/docs/filament-guidelines.md @@ -0,0 +1,153 @@ +# TenantPilot Filament Guidelines + +Status: 2026-05-15 +Applies to: Filament v5, Livewire v4.1, Laravel 12. + +## Version Contract + +- Livewire v4.0+ compliance: satisfied by Livewire 4.1.4. +- Panel provider location: `apps/platform/bootstrap/providers.php` registers `AdminPanelProvider` and `SystemPanelProvider`. +- Admin panel path: `/admin`. +- System panel path: `/system`. +- Filament asset deployment: any registered Filament assets require `cd apps/platform && php artisan filament:assets` in deployment or release build. + +## Global Search Contract + +- A resource may use global search only when it has a View or Edit page and a `$recordTitleAttribute`. +- Relationship-backed global search details must eager-load relationships in `getGlobalSearchEloquentQuery()`. +- If a resource is tenant-sensitive or lacks safe View/Edit URLs, set `protected static bool $isGloballySearchable = false`. +- Current examples: `PolicyResource`, `ProviderConnectionResource`, and `ManagedEnvironmentResource` disable global search, which is correct for sensitive tenant-scoped surfaces. + +## Destructive and High-Impact Actions + +Every destructive or high-impact action must have: + +- `->action(...)`, not URL-only execution. +- `->requiresConfirmation()`. +- Policy or gate authorization inside the action handler. +- `UiEnforcement` or `WorkspaceUiEnforcement` on the visible/disabled UI state. +- Audit log entry. +- Success/error notification. +- Pest test for visible/disabled/denied/executed behavior. + +Destructive examples: delete, force delete, restore, archive, retry restore, run restore, disable provider connection, purge, revoke, credential rotation, backup/restore mutations. + +## Filament Do's + +- Use native Filament resources, pages, tables, forms, schemas, actions, relation managers, widgets, clusters, and notifications before custom Blade/JS. +- Use render hooks and CSS hook classes instead of publishing internal Filament views. +- Keep tables scan-first: default sort, explicit empty state, sensible pagination profile, hidden technical detail columns. +- Use `ActionSurfaceDeclaration` when the resource participates in the project action-surface contract. +- Keep RelationManagers lazy-loaded unless an operator workflow requires eager loading. +- Use policies for model authorization and `UiEnforcement` for UI affordance consistency. +- Use `rateLimit()` or Laravel rate limiting for actions that can trigger expensive remote or queued work repeatedly. + +## Filament Don'ts + +- Do not put business workflows directly in long action closures when they mutate data or dispatch remote work. +- Do not assume confirmation modals on `->url(...)` actions. +- Do not expose user-controlled URLs to `url()` without scheme validation. +- Do not use `preserveFilenames()` for uploads on local/public disks. +- Do not enable global search on resources that cannot safely link to View/Edit pages. +- Do not hide unauthorized UI as the only security control. +- Do not add custom pages when a Resource, RelationManager, or action modal covers the workflow. + +## Project-Specific Patterns + +### Safe Table Action + +```php +use App\Actions\BackupSchedules\StartBackupScheduleRun; +use App\Models\BackupSchedule; +use App\Support\Auth\Capabilities; +use App\Support\Rbac\UiEnforcement; +use Filament\Actions\Action; +use Filament\Notifications\Notification; + +UiEnforcement::forTableAction( + Action::make('runNow') + ->label('Run now') + ->icon('heroicon-o-play') + ->requiresConfirmation() + ->modalHeading('Run backup schedule now?') + ->action(function (BackupSchedule $record, StartBackupScheduleRun $starter): void { + $runId = $starter->handle(auth()->user(), $record); + + Notification::make() + ->title('Backup run queued') + ->body("Operation run #{$runId} was created.") + ->success() + ->send(); + }), + fn (BackupSchedule $record): mixed => $record->managedEnvironment, +) + ->requireCapability(Capabilities::TENANT_BACKUP_SCHEDULES_RUN) + ->apply(); +``` + +### Extracted Schema + +```php +namespace App\Filament\Resources\BackupScheduleResource\Schemas; + +use Filament\Forms\Components\Select; +use Filament\Forms\Components\TextInput; +use Filament\Forms\Components\Toggle; +use Filament\Schemas\Schema; + +final class BackupScheduleForm +{ + public static function configure(Schema $schema): Schema + { + return $schema->schema([ + TextInput::make('name')->required()->maxLength(255), + Select::make('frequency')->required()->options([ + 'daily' => 'Daily', + 'weekly' => 'Weekly', + ]), + Toggle::make('is_enabled')->label('Enabled'), + ]); + } +} +``` + +### Extracted Table + +```php +namespace App\Filament\Resources\BackupScheduleResource\Tables; + +use App\Support\Filament\TablePaginationProfiles; +use Filament\Tables\Columns\TextColumn; +use Filament\Tables\Table; + +final class BackupScheduleTable +{ + public static function configure(Table $table): Table + { + return $table + ->defaultSort('next_run_at') + ->paginationPageOptions(TablePaginationProfiles::resource()) + ->columns([ + TextColumn::make('name')->searchable()->sortable(), + TextColumn::make('status')->badge(), + TextColumn::make('next_run_at')->since()->sortable(), + ]) + ->emptyStateHeading('No backup schedules') + ->emptyStateDescription('Create a schedule after selecting a managed environment.'); + } +} +``` + +## Migration Plan for Bad Patterns + +1. Identify resource files above 1,000 LOC or actions above 60 LOC. +2. Extract repeated action orchestration into `app/Actions//...`. +3. Extract table columns/filters/actions into resource-local builder classes only when they reduce review risk. +4. Add policy tests before deleting resource-level authorization logic. +5. Keep one feature branch per refactor slice to avoid broad conflicts. + +## Testing Plan + +- Resource pages and relation managers are Livewire components and must be tested through Pest/Livewire. +- Mutating actions must use Filament action testing helpers such as `callAction`, `mountAction`, `callTableAction`, `assertActionDisabled`, and `assertTableActionVisible`. +- Browser tests are reserved for critical multi-step workflows, JS errors, accessibility regressions, and visual smoke checks. diff --git a/docs/package-governance.md b/docs/package-governance.md new file mode 100644 index 00000000..8d33005c --- /dev/null +++ b/docs/package-governance.md @@ -0,0 +1,84 @@ +# TenantPilot Package Governance + +Status: 2026-05-15 +Applies to: Composer, pnpm workspace, Filament plugins, Laravel packages, frontend tooling. + +## Policy + +New packages are allowed only when they solve a current release problem that existing Laravel/Filament/project patterns cannot reasonably solve. + +Every new package requires: + +- Maintenance check. +- License check. +- Security advisory check. +- Version compatibility check with PHP 8.4, Laravel 12, Filament 5, Livewire 4, Tailwind 4. +- Removal plan if it is experimental. +- Spec/plan update when it changes runtime behavior. + +## Current Risk Matrix + +| Package | Current | Risk | Recommendation | +|---|---:|---|---| +| `filament/filament` | 5.2.1 | High advisory via Filament Tables XSS range | Upgrade to >=5.3.5, preferably current 5.x, then run Filament/action/browser lanes. | +| `league/commonmark` | transitive | Medium advisories | Patch through Composer update. | +| `phpseclib/phpseclib` | transitive | High advisories | Patch through Composer update. | +| `phpunit/phpunit` | 12.5.4 | High dev advisory | Upgrade to >=12.5.8. | +| `psy/psysh` | transitive/dev | Medium advisory | Patch through Composer update. | +| `axios` | 1.14.0 | High/moderate SSRF/header/prototype pollution advisories | Upgrade to >=1.16.1. | +| `postcss` | 8.5.9 transitive | Moderate XSS | Upgrade transitive via package update. | +| `esbuild` | transitive via drizzle tooling | Moderate dev-server issue | Upgrade dependency chain to esbuild >=0.25.0. | +| `devalue` | workspace transitive | High DoS | Upgrade to >=5.8.1 through website/workspace dependency update. | +| `socialiteproviders/microsoft-azure` | 4.2.1 | Major version behind | Review 5.x migration separately with auth tests. | +| `barryvdh/laravel-debugbar` | 3.16.5 dev | Major behind | Keep dev-only; upgrade or remove if unused. | + +## Approved Packages + +- Laravel framework first-party packages already in use. +- Filament first-party v5 packages. +- Pest 4 and official Pest plugins used by the current test lanes. +- Tailwind CSS v4 and `@tailwindcss/vite`. +- Drizzle tooling for local PostgreSQL workflows when repo scripts require it. + +## Packages Under Review + +- `socialiteproviders/microsoft-azure` 4.x to 5.x. +- `torchlight/engine` 0.1 to 1.x. +- `barryvdh/laravel-debugbar` 3.x to 4.x or removal. +- Vite 7 to 8 and `laravel-vite-plugin` 2 to 3. + +## Do Not Use Without Approval + +- Unmaintained Filament plugins. +- Packages that require Filament v3/v4 APIs. +- Packages that bypass Laravel authorization, validation, storage, or queue systems. +- Packages that store secrets in plaintext. +- UI frameworks that duplicate Filament for admin workflows. +- SDKs that bypass `GraphClientInterface` for Microsoft Graph calls. + +## CI Gates + +Required before release: + +```bash +cd apps/platform +composer validate --strict +composer audit +corepack pnpm audit --audit-level moderate +``` + +Advisory exceptions require: + +- Advisory ID. +- Affected package/version. +- Reason not exploitable in TenantPilot. +- Expiry date. +- Owner. +- Compensating control. + +## Upgrade Rules + +- Patch security advisories before feature work when severity is high and package is runtime-exposed. +- Minor Laravel/Filament updates require Filament action tests and browser smoke on critical admin workflows. +- Major upgrades require a spec, upgrade guide review, staging validation, and rollback plan. +- Do not update lock files incidentally in feature PRs unless the feature is a dependency update. diff --git a/docs/performance-guidelines.md b/docs/performance-guidelines.md new file mode 100644 index 00000000..92180754 --- /dev/null +++ b/docs/performance-guidelines.md @@ -0,0 +1,101 @@ +# TenantPilot Performance Guidelines + +Status: 2026-05-15 +Applies to: Laravel 12, Filament 5, Livewire 4, PostgreSQL 16, Microsoft Graph. + +## Performance Target + +TenantPilot should keep interactive admin requests short and move remote, large, retryable, or long-running work into queued operations with visible `OperationRun` state. + +## Current Performance Risks + +| Risk | Evidence | Priority | Mitigation | +|---|---|---:|---| +| Queryable payloads still in `json` | policy versions, backup items, restore runs, audit logs | P1 | Convert to JSONB where queried; add targeted GIN/expression indexes. | +| Large Filament pages/resources | 1,000-5,700 LOC classes | P1 | Extract tables/actions and review N+1 risks per surface. | +| Database queue for all work | `.env.example` and queue config | P2 | Move high-volume Graph/restore work to Redis queue when load grows. | +| Dashboard/widget query cost | multiple KPI/list widgets | P2 | Cache or precompute expensive aggregate metrics. | +| Graph throttling | Microsoft Graph 429/503 behavior | P1 | Honor `Retry-After`, use exponential backoff with jitter, avoid polling. | + +## Synchronous vs Asynchronous + +Keep synchronous: + +- Rendering Filament pages. +- Validating form/action input. +- Creating operation intent records. +- Small DB-only state transitions. +- Showing preview summaries from already persisted data. + +Move asynchronous: + +- Microsoft Graph reads/writes. +- Backup set item capture. +- Restore execution. +- Bulk export/import. +- Compliance/evidence snapshots. +- Long report generation. +- Notification delivery retries. +- Any workflow likely to exceed 2-5 seconds. + +## Filament Table Rules + +- Always define a default sort. +- Eager-load relationships used by visible columns. +- Use `withCount()`/aggregate subqueries instead of per-row counts. +- Hide technical columns by default. +- Use session persistence only on investigative resources. +- Avoid computed columns that perform per-row service calls. +- Avoid Graph calls during table render. + +## Database Rules + +- Prefer `jsonb` for raw Graph snapshots, backup payloads, restore previews/results, evidence summaries, and audit metadata that must be queried. +- Add GIN indexes only when a query path exists; prefer expression indexes for common JSON paths. +- Add composite indexes for workspace/tenant/time/status list filters. +- Add partial unique indexes for active run/idempotency constraints. +- Keep migrations incremental and reversible where practical. + +## Queue Strategy + +MVP: + +- Database queue is acceptable for local and low-volume staging. +- Jobs must be idempotent and observable. +- Worker timeout must be lower than `retry_after`. + +Scale-up: + +- Move production queues to Redis. +- Split queues: `high`, `default`, `graph`, `restore`, `reports`, `notifications`. +- Run separate worker counts per queue. +- Use process supervision in Dokploy/container runtime. +- Restart/reload workers on every deploy. + +## Caching Strategy + +- Cache stable config-derived capability maps. +- Cache dashboard aggregates only when invalidation is clear. +- Do not cache tenant authorization decisions across membership changes unless invalidation is proven. +- Avoid caching raw Graph secrets or token payloads. +- Use Redis for locks and cache in production when queue/scheduler scale increases. + +## Monitoring Metrics + +- HTTP p50/p95/p99 response time by route/panel. +- Livewire request duration and error rate. +- DB query count and slow queries by page/action. +- Queue depth, job latency, failures, retries, max runtime. +- Scheduler last-success timestamp per scheduled command. +- Graph 429/503 count, retry-after seconds, retry exhaustion. +- OperationRun created/running/failed/partial counts. +- Audit log write failures. +- Backup/restore duration and item failure rate. + +## Load Test Recommendations + +- List 10k policies and 100k policy versions per workspace. +- Render backup and restore tables with 50k backup items. +- Simulate concurrent backup schedule runs for multiple tenants. +- Simulate Graph 429/503 responses and verify retry/backoff budgets. +- Exercise dashboard widgets with realistic operation/finding history. diff --git a/docs/security-guidelines.md b/docs/security-guidelines.md new file mode 100644 index 00000000..db39e1e5 --- /dev/null +++ b/docs/security-guidelines.md @@ -0,0 +1,137 @@ +# TenantPilot Security Guidelines + +Status: 2026-05-15 +Reference model: OWASP ASVS 5.0.0, OWASP Top 10, NIST SSDF, Laravel 12, Filament 5. + +## Security Target + +TenantPilot manages critical Intune configuration and restore workflows. Treat tenant data, backup payloads, provider credentials, policy snapshots, audit logs, and operation runs as sensitive enterprise data. + +## Current Strengths + +- Workspace and tenant isolation are constitutional non-negotiables. +- Many policies return `Response::denyAsNotFound()`. +- `UiEnforcement` centralizes disabled/hidden UI affordance behavior. +- `ProviderCredential` uses encrypted array casts and hides payloads. +- Graph access is routed through `GraphClientInterface`. +- Audit and operation-run models already provide traceability. + +## Top Security Findings + +| Risk | Evidence | Priority | Control | +|---|---|---:|---| +| Vulnerable dependencies | `composer audit`, `pnpm audit` | P0 | Patch, audit gates, approved exceptions only. | +| Inconsistent policy coverage | Some resource-backed models lack obvious policies | P1 | Resource-policy matrix and tests. | +| Production session/debug defaults need gating | `.env.example` has `APP_DEBUG=true`, `SESSION_ENCRYPT=false` for local | P1 | Deployment checklist enforces production env. | +| File upload future risk | Filament warns about file path tampering and filenames | P2 | Private disks, random names, MIME validation, path tamper prevention. | +| Graph beta default | `config/graph.php` defaults to `beta` | P2 | Endpoint-level version registry and contract tests. | + +## Release Security Checklist + +- `composer audit` clean or explicitly risk-accepted. +- `corepack pnpm audit --audit-level moderate` clean or explicitly risk-accepted. +- `APP_DEBUG=false` in staging/production. +- `APP_KEY` present and not rotated casually. +- Session cookies are secure, same-site, and domain-scoped for production. +- Provider credentials remain encrypted and never logged. +- No secrets in config, docs, tests, fixtures, screenshots, or audit metadata. +- Every write operation has policy authorization, explicit confirmation, and audit log. +- Backup and restore flows have dry-run/preview where applicable. +- Queue payloads contain identifiers, not secrets or raw credential payloads. +- Health endpoint and uptime monitor are active. + +## Checklist for New Filament Resources + +- Policy exists for the model or a spec documents why no policy is needed. +- `canViewAny`, `canCreate`, `canEdit`, `canDelete` call policies or capability resolver consistently. +- Tenant-owned resources scope queries by workspace and managed environment. +- Global search is disabled unless View/Edit pages are safe and scoped. +- Tables eager-load relationships shown in columns. +- Empty states do not leak tenant existence. +- Mutating actions are confirmation-gated and tested. +- Bulk actions intentionally choose `*Any` policy semantics or per-record authorization. + +## Checklist for File Uploads + +- Store on a private disk by default. +- Use random storage filenames. +- Store original filenames in a separate column if needed. +- Restrict `acceptedFileTypes()` and `maxSize()`. +- Use Laravel file validation rules for server-side validation. +- Use `preventFilePathTampering()` when the workflow does not intentionally allow choosing existing disk files. +- Do not render uploaded HTML/SVG inline unless sanitized and explicitly approved. +- Signed URLs must be short-lived and tenant-authorized. + +## Checklist for Admin Actions + +- Action name describes the business effect. +- UI state uses `UiEnforcement` or `WorkspaceUiEnforcement`. +- Server handler calls `Gate::authorize()` or a policy method. +- Destructive/high-impact action has `requiresConfirmation()`. +- Handler writes an audit event with actor, workspace, managed environment, target, outcome, and safe metadata. +- Long-running work dispatches a job and creates/updates an `OperationRun`. +- Duplicate clicks are idempotent or guarded by locks/unique run identity. +- Test covers allowed, disabled/denied, side effect, audit, and tenant isolation. + +## Checklist for Multi-Tenancy + +- Workspace context is established before tenant context. +- Non-members receive deny-as-not-found. +- Queries filter by `workspace_id` and tenant id before access. +- Cross-tenant surfaces are explicit and aggregation-based. +- IDs from request/query strings are resolved through scoped resolvers. +- Tests include tenant A cannot see or mutate tenant B. +- Audit logs include workspace and tenant context when applicable. + +## Security Code Pattern: Policy + +```php +namespace App\Policies; + +use App\Models\BackupSet; +use App\Models\User; +use Illuminate\Auth\Access\Response; + +final class BackupSetPolicy +{ + public function view(User $user, BackupSet $backupSet): Response + { + if (! $backupSet->workspace || ! $user->belongsToWorkspace($backupSet->workspace)) { + return Response::denyAsNotFound(); + } + + return $user->can('tenant.view', $backupSet->managedEnvironment) + ? Response::allow() + : Response::denyAsNotFound(); + } + + public function restore(User $user, BackupSet $backupSet): Response + { + if ($this->view($user, $backupSet)->denied()) { + return Response::denyAsNotFound(); + } + + return $user->can('tenant.restore.run', $backupSet->managedEnvironment) + ? Response::allow() + : Response::deny('Missing restore capability.'); + } +} +``` + +## Security Code Pattern: Audit Event + +```php +$audit->record( + action: 'backup_schedule.run_requested', + actor: $actor, + workspace: $schedule->workspace, + managedEnvironment: $schedule->managedEnvironment, + target: $schedule, + metadata: [ + 'operation_run_id' => $run->getKey(), + 'schedule_id' => $schedule->getKey(), + ], +); +``` + +Never include tokens, client secrets, raw credential payloads, or raw Graph error bodies in audit metadata. diff --git a/docs/stack-overview.md b/docs/stack-overview.md new file mode 100644 index 00000000..fa0bcf38 --- /dev/null +++ b/docs/stack-overview.md @@ -0,0 +1,206 @@ +# TenantPilot Stack Overview and Enterprise Assessment + +Status: 2026-05-15 +Scope: `apps/platform` Laravel/Filament application in `wt-plattform` +Project phase assumption: pre-production / MVP-to-scale-up, high criticality because Intune configuration, restore, audit, and tenant isolation are in scope. + +## Executive Summary + +TenantPilot already has a stronger-than-average governance foundation: Spec Kit is active, workspace and tenant isolation are explicit constitutional rules, Graph calls are centralized through `GraphClientInterface`, queued operations are observable via `OperationRun`, and many Filament actions use `UiEnforcement`, confirmation, audit logging, and capability checks. + +The main enterprise gaps are not conceptual; they are operational hardening gaps: + +1. P0: `composer audit` and `pnpm audit` currently report high/medium advisories affecting Filament Tables, phpseclib, PHPUnit, axios, devalue, esbuild, postcss, and related packages. +2. P1: Several critical historical JSON payload columns still use PostgreSQL `json` where the project data strategy requires `jsonb` for queryable snapshots and backup/restore payloads. +3. P1: Multiple Filament resources/pages exceed 1,000-5,700 lines, increasing change risk around admin workflows. +4. P1: Policy coverage is inconsistent: many resources use resource-level `can*()` plus gates, but not every resource-backed model has a dedicated policy. +5. P1: The local/Docker queue command uses `queue:listen`; production should use supervised `queue:work` or Laravel 12 `reload` semantics. +6. P2: Admin panel registration mixes explicit resources with discovery. This can be valid, but needs a documented rule to avoid accidental double mental models. +7. P2: Production configuration rules need to be encoded as deployment gates: `APP_DEBUG=false`, encrypted/secure sessions where needed, health checks, audit-safe logging, queue restart/reload, and backup restore testing. + +## Stack and Version Analysis + +| Bereich | Erkannte Version | Quelle/Datei | Status | Risiko | Empfehlung | +|---|---:|---|---|---|---| +| PHP runtime | 8.4.15 | Laravel Boost `application_info` | supported | low | Keep 8.4; track active support until 2026-12-31 and security support until 2028-12-31. | +| PHP constraint | `^8.2` | `apps/platform/composer.json` | broad | low | Keep if needed, but CI should test the actual runtime 8.4. | +| Laravel | 12.52.0 | Boost / `composer.lock` | current LTS-family app version | medium | Stay on 12.x for now; treat Laravel 13 as a planned major upgrade, not incidental. | +| Filament | 5.2.1 | Boost / `composer.lock` | vulnerable range | high | Upgrade to at least 5.3.5; preferably current 5.x after regression tests. | +| Livewire | 4.1.4 | Boost / `composer.lock` | compliant | low | Filament v5 + Livewire v4 compliance is satisfied. | +| Tailwind CSS | 4.2.2 | Boost / `pnpm-lock.yaml` | current minor behind | low | Tailwind v4 Vite integration is correct; update during frontend dependency patch window. | +| Alpine.js | unclear direct version | bundled transitively by Filament assets | unclear | low | Do not pin separately unless a project asset needs it. | +| PostgreSQL | 16 | `docker-compose.yml` | aligned | low | Use PostgreSQL-specific CI for JSONB, partial index, FK, and isolation assertions. | +| Redis | 7-alpine | `docker-compose.yml` | available | medium | Use for cache/queue when scale requires it; database queue is acceptable for MVP but not the long-term default. | +| Queue | database | `.env.example`, `config/queue.php` | MVP-grade | medium | Production should use supervised `queue:work`; split high/low/default queues for Graph/restore workloads. | +| Cache | database | `.env.example`, `config/cache.php` | MVP-grade | medium | Use Redis for production if queue restart signals, locks, and scheduler overlap become load-sensitive. | +| Session | database, encrypted false | `.env.example` | local default | medium | Production must set secure cookie/domain/same-site policy and consider `SESSION_ENCRYPT=true`. | +| Mail | log | `.env.example` | local default | low | Production needs SMTP/SES/Postmark decision and alert delivery tests. | +| Storage | local | `.env.example` | local default | medium | Production backup/report artifacts should use private object storage and tested restore paths. | +| Auth | Socialite + Microsoft Azure provider | `composer.json`, providers | aligned | medium | Review SocialiteProviders Microsoft-Azure 4.x to 5.x upgrade separately. | +| Testing | Pest 4.3.1, PHPUnit 12.5.4 | Boost / `composer.lock` | strong but vulnerable dev dep | high | Upgrade PHPUnit to a patched 12.5.x and keep Pest 4 lanes. | +| Frontend build | pnpm 10.33, Vite 7.3.2 | root/app package files | aligned | medium | Patch axios/postcss/esbuild/devalue advisories before production. | + +## Enterprise Maturity Score + +| Bereich | Score | Begründung | Zielzustand | +|---|---:|---|---| +| Architektur | 3.0 | Strong service/job/support layers, but some Filament surfaces are very large. | Thin UI classes, explicit services/actions for business workflows, no speculative frameworking. | +| Filament | 3.0 | Correct v5/Livewire 4 basis, panel providers in `bootstrap/providers.php`, central RBAC helper. | Standardized resource patterns, policy per resource, extracted schema/table/action builders where size justifies it. | +| Security | 3.0 | Tenant isolation, encrypted credential payloads, audit logs are strong. Supply-chain and policy coverage need work. | Audit gates in CI, patched dependencies, policy/resource coverage matrix, production security config gate. | +| Testing | 4.0 | Rich Pest/Filament/browser/governance lanes exist. | PostgreSQL lane required for schema/isolation changes; dependency audit gates mandatory. | +| Performance | 3.0 | Eager loading and queues exist; JSONB strategy is partially implemented. | Query budgets, JSONB indexes for queried payloads, worker separation, dashboard metrics. | +| DevOps | 3.0 | Sail-first local and Gitea CI exist. | Dokploy runbook, health checks, supervised workers, staging gate, rollback drills. | +| Observability | 3.0 | `OperationRun` and `AuditLog` create useful internal observability. | External error tracking/APM, queue/scheduler alerts, SLO dashboards. | +| Compliance | 2.5 | Audit/isolation foundations exist; GDPR/retention docs are incomplete. | Data inventory, retention matrix, DPA/vendor review, backup encryption proof. | +| Maintainability | 3.0 | Spec Kit and constitution reduce drift; large UI files raise regression risk. | Enforced file-size/refactor triggers and feature-local extraction patterns. | + +## Findings Register + +| ID | Kategorie | Finding | Evidenz | Risiko | Priorität | Aufwand | Empfehlung | Akzeptanzkriterium | +|---|---|---|---|---|---|---|---|---| +| F-001 | Supply Chain | Composer audit reports 8 advisories affecting 5 packages, including high severity Filament Tables XSS and phpseclib/PHPUnit advisories. | `composer audit --format=plain`; `filament/filament` 5.2.1, `phpunit/phpunit` 12.5.4 | XSS, crypto/DoS, unsafe dev tooling | P0 | M | Upgrade Filament to >=5.3.5, patch transitive packages, rerun full Filament/Pest lanes. | `composer audit` returns no high/medium advisories accepted by default policy. | +| F-002 | Supply Chain | pnpm audit reports high/moderate advisories for axios, devalue, esbuild, postcss and workspace packages. | `corepack pnpm audit --audit-level moderate --json` | SSRF, header injection, XSS, DoS | P0 | M | Update axios >=1.16.1, postcss >=8.5.10, devalue >=5.8.1, esbuild chain via dependency upgrade. | `pnpm audit --audit-level moderate` is clean or has approved exceptions. | +| F-003 | Datenbank | Core snapshot/backup/restore payload columns still use `json`, not `jsonb`. | `policy_versions.snapshot`, `backup_items.payload`, `restore_runs.preview/results/requested_items`, `audit_logs.metadata` migrations | Slow query paths, weaker indexing, inconsistency with product rule | P1 | M | Convert queryable payloads to JSONB with reversible migrations where feasible; add GIN/expression indexes only for proven queries. | Schema uses JSONB for policy snapshots, backup payloads, restore previews/results, and audit metadata query paths. | +| F-004 | Filament | Large workflow classes create high change risk. | `ManagedEnvironmentOnboardingWizard.php` 5748 LOC, `ManagedEnvironmentResource.php` 3785 LOC, `RestoreRunResource.php` 2779 LOC, `FindingResource.php` 2503 LOC | Regression risk, difficult review, slow onboarding | P1 | L | Extract schema/table/action factories and service actions at natural workflow boundaries; keep extra layers narrow. | Largest admin workflows have test-covered extracted builders/services and no single file exceeds agreed threshold without exception. | +| F-005 | Security | Resource/model policy coverage is inconsistent. | Policies exist for many models, but resources like `Policy`, `PolicyVersion`, `BackupSet`, `RestoreRun`, `ManagedEnvironment`, `BaselineProfile`, `InventoryItem`, `StoredReport` lack obvious dedicated policies. | Authorization drift between UI and server | P1 | M | Add policy classes or documented exceptions; make Filament resources call policies for CRUD and domain actions. | Resource-policy matrix is complete and tested. | +| F-006 | DevOps | Queue container uses `php artisan queue:listen`. | `docker-compose.yml:65` | Inefficient workers, production reload ambiguity | P1 | S | Production/Dokploy should run `php artisan queue:work --sleep=3 --tries=3 --timeout=300` under process supervision and reload/restart on deploy. | Deployment checklist has worker command, process monitor, restart/reload, queue metrics. | +| F-007 | Filament | Admin panel registers explicit resources and also discovers resources. | `AdminPanelProvider.php:198`, `:211` | Accidental resource exposure or inconsistent registration ownership | P2 | S | Choose explicit allowlist for enterprise panels or document discovery boundaries. | Panel registration rule is documented and covered by navigation/surface tests. | +| F-008 | Graph/Integration | Default Graph version is `beta`. | `apps/platform/config/graph.php:12` | API drift and production contract instability | P2 | M | Keep beta only where endpoint requires it; document endpoint version in `config/graph_contracts.php` and specs. | Each Graph contract records v1.0/beta, permission, retry behavior, and production risk. | +| F-009 | Testing/CI | PostgreSQL test lane exists but should be mandatory for schema/isolation changes. | `composer.json` has `test:pgsql`; default PHPUnit uses SQLite in memory. | SQLite misses JSONB, partial index, FK, and lock behavior | P2 | M | Add CI rule: migrations, tenant isolation, JSONB, operation locks require `sail:test:pgsql` or CI PostgreSQL lane. | PR checks show PostgreSQL lane on relevant path changes. | +| F-010 | Compliance | GDPR retention and backup security are not yet consolidated in one operational matrix. | Product docs exist; no single retention/backup/privacy matrix found in requested scope. | Incomplete audit readiness | P2 | M | Create retention matrix for audit logs, backups, reports, credentials, run payloads; include deletion/export procedure. | Retention matrix exists and is referenced from deployment/security docs. | + +## Recommended Next 10 Actions + +1. Patch Composer and pnpm advisories, starting with Filament >=5.3.5 and axios/postcss/devalue. +2. Add `composer audit` and `pnpm audit --audit-level moderate` to the confidence or release lane. +3. Create a resource-policy matrix and add missing policy classes or documented exceptions. +4. Convert core queryable JSON payloads to JSONB with targeted indexes. +5. Replace production queue guidance from `queue:listen` to supervised `queue:work` plus Laravel 12 `reload`/`queue:restart`. +6. Extract repeated Filament action closures from `BackupScheduleResource`, `RestoreRunResource`, and `ManagedEnvironmentResource` into focused action/service classes. +7. Require PostgreSQL CI for migrations, tenant isolation, operation locks, and JSONB behavior. +8. Document Graph endpoint version and permission truth in `config/graph_contracts.php` for every new integration. +9. Add production security config checklist: debug false, secure cookies, private storage, no secret logging, encrypted credentials. +10. Create retention/backup restore drill checklist before staging promotion. + +## Best-Practice Target State + +- Architecture: Laravel monolith with clear UI/application/domain/infrastructure boundaries; no speculative platform framework. +- Filament: v5-native resources/pages/widgets/actions with thin UI orchestration, policies, `UiEnforcement`, empty states, table standards, and tested actions. +- Backend: services/actions/jobs own business behavior; controllers and resources stay thin; remote work is queued and idempotent. +- Database: PostgreSQL integrity first: workspace/tenant constraints, partial unique indexes for active operations, JSONB for retained/queryable payloads. +- Security: OWASP ASVS-informed controls, deny-as-not-found isolation, least privilege, encrypted credentials, clean dependency audits. +- Testing: Pest 4 lanes protect business truth, Filament actions, policy semantics, PostgreSQL constraints, and browser-critical workflows. +- Deployment: Dokploy runbook with staging gate, health checks, migrations, asset build, supervised workers, rollback, backup restore proof. +- Observability: audit logs, operation runs, queue/scheduler metrics, Graph throttle metrics, error tracking, and production dashboards. +- Documentation: compact project rules in `docs/*-guidelines.md`, current AGENTS rules, and spec-linked decisions. +- AI coding: agents must follow `docs/ai-coding-rules.md`, Spec Kit, and version-specific official docs. + +## Anti-Pattern Catalog + +| Anti-Pattern | Warum problematisch | Besseres Pattern | Priorität | +|---|---|---|---| +| Fat Filament Resources/Pages | Hard to review, test, and safely change | Extract focused schema/table/action builders and domain actions | P1 | +| Business logic in closures | Authorization/audit/transaction behavior drifts | Service/action class called from UI action | P1 | +| Missing policies | UI checks become the only guard | Policy per resource-backed model or documented exception | P1 | +| N+1 table/global-search queries | Slow admin surfaces | Eager loading, `withCount`, aggregate subqueries | P2 | +| Unsafe uploads | RCE/path tampering/data leakage risk | Private disk, random filenames, MIME/size validation, tamper prevention | P2 | +| Missing transactions | Partial writes in critical workflows | Transaction around intent/run/audit state changes | P1 | +| JSON where JSONB is queried | Weak indexing and repeated parsing | JSONB plus targeted GIN/expression indexes | P1 | +| Fragile broad tests | Slow suite, unclear signal | Lane-scoped tests that prove business truth | P2 | +| Plugin/package drift | Security and maintenance risk | Package governance and audit gates | P0 | +| Admin actions without audit | No accountability for critical changes | Audit event for every sensitive mutation | P1 | +| Unclear roles/rights | Tenant data exposure risk | Capability matrix, policies, deny-as-not-found | P1 | +| No queue retry strategy | Duplicate/failed operations | Idempotent jobs, locks, run identity, backoff | P1 | + +## Roadmap + +### Phase 1: Stabilisieren + +Goal: remove immediate production blockers. + +Tasks: + +- Patch Composer and pnpm advisories. +- Add audit gates to CI/release. +- Replace production queue guidance with supervised `queue:work`. +- Add production env/security checklist enforcement. +- Add or confirm tests for destructive restore/backup/provider actions. + +Effort: M. +Risk: dependency upgrades can reveal Filament regressions. +Acceptance: audits clean, critical tests pass, staging deploy runbook works. + +### Phase 2: Standardisieren + +Goal: reduce admin workflow change risk. + +Tasks: + +- Build resource-policy matrix. +- Extract repeated action closure logic from largest resources. +- Standardize explicit vs discovered Filament resource registration. +- Document Graph contract version and permission rules per endpoint. +- Require PostgreSQL lane for schema/isolation changes. + +Effort: L. +Risk: refactors can conflict with active feature work. +Acceptance: resource-policy matrix complete, top three largest surfaces have bounded extraction plan/tests. + +### Phase 3: Skalieren + +Goal: prepare for higher tenant/data volume. + +Tasks: + +- Convert queryable JSON payloads to JSONB. +- Add targeted indexes for policy/backup/restore/audit query paths. +- Split queues and consider Redis for production. +- Add dashboard/queue/scheduler/Graph metrics. +- Cache stable aggregates where invalidation is clear. + +Effort: L. +Risk: migrations need staging timing proof. +Acceptance: query plans and queue metrics are documented; staging migration time is acceptable. + +### Phase 4: Enterprise Readiness + +Goal: compliance and operating model. + +Tasks: + +- Data inventory and retention matrix. +- Backup restore drill and incident response runbook. +- External error tracking/APM integration. +- Vendor/DPA/security review for mail/storage/hosting providers. +- Scheduled package/security review cadence. + +Effort: XL. +Risk: organizational dependencies outside code. +Acceptance: production readiness checklist is signed off and rehearseable. + +## Open Questions + +- Which production mail provider will be used: SMTP, SES, Mailgun, Postmark, or another service? +- Which production storage backend will hold backup/report artifacts: local volume, S3, R2, Spaces, or another private object store? +- Should production queue/cache move to Redis before first customer data, or after volume signals? +- What are the required retention periods for audit logs, backups, restore results, operation runs, reports, and support access logs? +- Is Microsoft Graph `beta` acceptable for each production endpoint, or must some flows be v1.0-only? +- What compliance bar is expected: internal GDPR readiness, enterprise customer security review, regulated procurement, or formal certification? + +## Sources + +- Official Laravel 12 docs: [deployment](https://laravel.com/docs/12.x/deployment), [authorization](https://laravel.com/docs/12.x/authorization), [queues](https://laravel.com/docs/12.x/queues), [validation](https://laravel.com/docs/12.x/validation). +- Official Filament 5 docs: [global search](https://filamentphp.com/docs/5.x/resources/global-search), [actions](https://filamentphp.com/docs/5.x/actions/overview), [security](https://filamentphp.com/docs/5.x/advanced/security), [testing actions](https://filamentphp.com/docs/5.x/testing/testing-actions). +- Official Pest 4 docs: [browser testing release notes](https://pestphp.com/docs/pest-v4-is-here-now-with-browser-testing). +- Official PostgreSQL 16 docs: [JSON/JSONB and GIN indexing](https://www.postgresql.org/docs/16/datatype-json.html). +- OWASP: [ASVS](https://owasp.org/www-project-application-security-verification-standard/). +- NIST: [SP 800-218 SSDF](https://csrc.nist.gov/pubs/sp/800/218/final). +- Microsoft Learn: [Graph throttling guidance](https://learn.microsoft.com/en-us/graph/throttling). +- PHP: [supported versions](https://www.php.net/supported-versions.php). + +## Assumptions and Uncertainties + +- Production is assumed to be Dokploy on VPS with container-based deployment, as stated in `AGENTS.md`. +- Exact production mail/storage/cache providers are not set in `.env.example`; recommendations are provider-neutral. +- Alpine.js exact version is not declared directly; it is treated as Filament-managed unless the project adds custom Alpine assets. +- The current branch has unrelated modified app/test files; this assessment did not alter them. diff --git a/docs/testing-guidelines.md b/docs/testing-guidelines.md new file mode 100644 index 00000000..9ba36075 --- /dev/null +++ b/docs/testing-guidelines.md @@ -0,0 +1,113 @@ +# TenantPilot Testing Guidelines + +Status: 2026-05-15 +Applies to: Pest 4.3+, PHPUnit 12, Laravel 12, Filament 5, Livewire 4. + +## Test Philosophy + +Tests protect business truth: workspace isolation, tenant isolation, RBAC, auditability, immutable snapshots, restore safety, queued operation correctness, and Graph contract safety. + +Do not create broad tests for thin presentation helpers unless the helper encodes operator-critical behavior. + +## Test Pyramid + +| Layer | Use for | Default lane | +|---|---|---| +| Unit | Pure services, value objects, mappers, policy helpers | fast-feedback | +| Feature | HTTP, DB, policies, queued jobs, audit side effects | fast-feedback/confidence | +| Filament/Livewire | Pages, widgets, relation managers, actions | confidence | +| PostgreSQL | migrations, JSONB, partial indexes, locks, FK isolation | pgsql | +| Browser | critical multi-step UI, JS smoke, visual/user workflow checks | browser | +| Heavy governance | broad surface discovery and drift checks | heavy-governance | + +## Minimum Standard for New Features + +- Every new policy gets allowed and denied tests. +- Every new destructive/high-impact Filament action gets action tests. +- Every new tenant-owned model gets cross-tenant isolation tests. +- Every new migration touching constraints/indexes gets PostgreSQL lane coverage when SQLite cannot prove the behavior. +- Every job that calls Graph is tested for idempotency, terminal-state handling, retry/throttle classification, and safe logging. +- Every feature spec states test impact and lane classification. + +## Critical User Journeys to Keep Covered + +- Workspace selection and tenant selection. +- Provider connection create/verify/disable/health-check. +- Policy sync, snapshot capture, version history, diff navigation. +- Backup set creation, add policies, schedule run/retry. +- Restore preview, confirmation, execution, partial failure handling. +- Finding triage, assignment, exception, evidence review. +- Audit log visibility and tenant-scope enforcement. +- System panel login/session isolation and platform capability checks. + +## Filament Action Test Pattern + +```php +use App\Filament\Resources\BackupScheduleResource\Pages\ListBackupSchedules; +use App\Jobs\RunBackupScheduleJob; +use Illuminate\Support\Facades\Bus; +use function Pest\Livewire\livewire; + +it('queues a backup schedule run for an authorized tenant member', function () { + Bus::fake(); + + [$user, $tenant, $schedule] = tenantUserWithBackupScheduleRunCapability(); + + actingAs($user); + Filament::setTenant($tenant); + + livewire(ListBackupSchedules::class) + ->assertTableActionVisible('runNow', $schedule) + ->callTableAction('runNow', $schedule); + + Bus::assertDispatched(RunBackupScheduleJob::class); + + expectAuditLogged('backup_schedule.run_requested', $schedule); +}); +``` + +## Policy Test Pattern + +```php +it('hides another workspace backup set as not found', function () { + [$actor, $ownWorkspace] = workspaceMember(); + $foreignBackupSet = BackupSet::factory()->forWorkspace()->create(); + + $response = Gate::forUser($actor)->inspect('view', $foreignBackupSet); + + expect($response->denied())->toBeTrue() + ->and($response->status())->toBe(404); +}); +``` + +## PostgreSQL Lane Rule + +Use `cd apps/platform && ./vendor/bin/sail php vendor/bin/pest -c phpunit.pgsql.xml` or the equivalent CI PostgreSQL lane for: + +- JSONB migrations and GIN indexes. +- Partial unique indexes. +- Composite foreign keys. +- `lockForUpdate()` behavior. +- Tenant/workspace constraint migrations. +- Query plans where performance depends on PostgreSQL-specific operators. + +## CI Recommendation + +Release/confidence CI should run: + +1. `composer validate --strict` +2. `composer audit` +3. `corepack pnpm audit --audit-level moderate` +4. `./vendor/bin/pint --test` +5. `composer run test` +6. `composer run test:pgsql` when database paths changed +7. `composer run test:browser` for UI workflow changes +8. `corepack pnpm build:platform` + +## Fragility Controls + +- Prefer factories with explicit state over global seed assumptions. +- Keep full workspace/tenant/member setup opt-in. +- Assert outcomes, audit events, and authorization behavior, not implementation details. +- Use fake Graph clients that fail hard if UI rendering accidentally calls Graph. +- Avoid snapshot tests for volatile admin markup unless visual regression is the real goal.