TenantAtlas/app/Jobs/ExecuteRestoreRunJob.php
ahmido 845d21db6d feat: harden operation lifecycle monitoring (#190)
## Summary
- harden operation-run lifecycle handling with explicit reconciliation policy, stale-run healing, failed-job bridging, and monitoring visibility
- refactor audit log event inspection into a Filament slide-over and remove the stale inline detail/header-action coupling
- align panel theme asset resolution and supporting Filament UI updates, including the rounded 2xl theme token regression fix

## Testing
- ran focused Pest coverage for the affected audit-log inspection flow and related visibility tests
- ran formatting with `vendor/bin/sail bin pint --dirty --format agent`
- manually verified the updated audit-log slide-over flow in the integrated browser

## Notes
- branch includes the Spec 160 artifacts under `specs/160-operation-lifecycle-guarantees/`
- the full test suite was not rerun as part of this final commit/PR step

Co-authored-by: Ahmed Darrazi <ahmed.darrazi@live.de>
Reviewed-on: #190
2026-03-23 21:53:19 +00:00

211 lines
7.1 KiB
PHP

<?php
namespace App\Jobs;
use App\Contracts\Hardening\WriteGateInterface;
use App\Exceptions\Hardening\ProviderAccessHardeningRequired;
use App\Jobs\Middleware\EnsureQueuedExecutionLegitimate;
use App\Jobs\Middleware\TrackOperationRun;
use App\Listeners\SyncRestoreRunToOperationRun;
use App\Models\OperationRun;
use App\Models\RestoreRun;
use App\Services\Intune\AuditLogger;
use App\Services\Intune\RestoreService;
use App\Support\OpsUx\RunFailureSanitizer;
use App\Support\RestoreRunStatus;
use Carbon\CarbonImmutable;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Throwable;
class ExecuteRestoreRunJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $timeout = 420;
public bool $failOnTimeout = true;
public ?OperationRun $operationRun = null;
public function __construct(
public int $restoreRunId,
public ?string $actorEmail = null,
public ?string $actorName = null,
?OperationRun $operationRun = null,
) {
$this->operationRun = $operationRun;
}
/**
* @return array<int, object>
*/
public function middleware(): array
{
return [new EnsureQueuedExecutionLegitimate, new TrackOperationRun];
}
public function handle(RestoreService $restoreService, AuditLogger $auditLogger): void
{
if (! $this->operationRun) {
$this->fail(new \RuntimeException('OperationRun context is required for ExecuteRestoreRunJob.'));
return;
}
$restoreRun = RestoreRun::with(['tenant', 'backupSet'])->find($this->restoreRunId);
if (! $restoreRun) {
return;
}
if ((int) ($restoreRun->operation_run_id ?? 0) !== (int) $this->operationRun->getKey()) {
RestoreRun::withoutEvents(function () use ($restoreRun): void {
$restoreRun->forceFill(['operation_run_id' => $this->operationRun?->getKey()])->save();
});
}
if ($restoreRun->status !== RestoreRunStatus::Queued->value) {
return;
}
app(SyncRestoreRunToOperationRun::class)->handle($restoreRun);
$tenant = $restoreRun->tenant;
$backupSet = $restoreRun->backupSet;
if (! $tenant || ! $backupSet || $backupSet->trashed()) {
$restoreRun->update([
'status' => RestoreRunStatus::Failed->value,
'failure_reason' => 'Backup set is archived or unavailable.',
'completed_at' => CarbonImmutable::now(),
]);
app(SyncRestoreRunToOperationRun::class)->handle($restoreRun->refresh());
if ($tenant) {
$auditLogger->log(
tenant: $tenant,
action: 'restore.failed',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $restoreRun->backup_set_id,
'reason' => 'Backup set is archived or unavailable.',
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'failed',
);
}
return;
}
try {
app(WriteGateInterface::class)->evaluate($tenant, 'restore.execute');
} catch (ProviderAccessHardeningRequired $e) {
$restoreRun->update([
'status' => RestoreRunStatus::Failed->value,
'failure_reason' => $e->reasonMessage,
'completed_at' => CarbonImmutable::now(),
]);
if ($this->operationRun) {
app(\App\Services\OperationRunService::class)->updateRun(
$this->operationRun,
status: \App\Support\OperationRunStatus::Completed->value,
outcome: \App\Support\OperationRunOutcome::Failed->value,
failures: [[
'code' => 'hardening.write_blocked',
'reason_code' => $e->reasonCode,
'message' => $e->reasonMessage,
]],
);
}
return;
}
$restoreRun->update([
'status' => RestoreRunStatus::Running->value,
'started_at' => CarbonImmutable::now(),
'failure_reason' => null,
]);
// Keep the canonical Monitoring/Operations adapter row in sync even if downstream
// code performs restore-run updates without firing model events.
app(SyncRestoreRunToOperationRun::class)->handle($restoreRun->refresh());
$auditLogger->log(
tenant: $tenant,
action: 'restore.started',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $backupSet->id,
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'success',
);
try {
$restoreService->executeForRun(
restoreRun: $restoreRun,
tenant: $tenant,
backupSet: $backupSet,
actorEmail: $this->actorEmail,
actorName: $this->actorName,
);
app(SyncRestoreRunToOperationRun::class)->handle($restoreRun->refresh());
} catch (Throwable $throwable) {
$restoreRun->refresh();
$safeReason = RunFailureSanitizer::sanitizeMessage($throwable->getMessage());
if ($restoreRun->status === RestoreRunStatus::Running->value) {
$restoreRun->update([
'status' => RestoreRunStatus::Failed->value,
'failure_reason' => $safeReason,
'completed_at' => CarbonImmutable::now(),
]);
}
app(SyncRestoreRunToOperationRun::class)->handle($restoreRun->refresh());
if ($tenant) {
$auditLogger->log(
tenant: $tenant,
action: 'restore.failed',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $backupSet->id,
'reason' => $safeReason,
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'failed',
);
}
throw $throwable;
}
}
}