TenantAtlas/app/Jobs/ExecuteRestoreRunJob.php
ahmido bcf4996a1e feat/049-backup-restore-job-orchestration (#56)
Summary

This PR implements Spec 049 – Backup/Restore Job Orchestration: all critical Backup/Restore execution paths are job-only, idempotent, tenant-scoped, and observable via run records + DB notifications (Phase 1). The UI no longer performs heavy Graph work inside request/Filament actions for these flows.

Why

We want predictable UX and operations at MSP scale:
	•	no timeouts / long-running requests
	•	reproducible run state + per-item results
	•	safe error persistence (no secrets / no token leakage)
	•	strict tenant isolation + auditability for write paths

What changed

Foundational (Runs + Idempotency + Observability)
	•	Added a shared RunIdempotency helper (dedupe while queued/running).
	•	Added a read-only BulkOperationRuns surface (list + view) for status/progress.
	•	Added DB notifications for run status changes (with “View run” link).

US1 – Policy “Capture snapshot” is job-only
	•	Policy detail “Capture snapshot” now:
	•	creates/reuses a run (dedupe key: tenant + policy.capture_snapshot + policy DB id)
	•	dispatches a queued job
	•	returns immediately with notification + link to run detail
	•	Graph capture work moved fully into the job; request path stays Graph-free.

US3 – Restore runs orchestration is job-only + safe
	•	Live restore execution is queued and updates RestoreRun status/progress.
	•	Per-item outcomes are persisted deterministically (per internal DB record).
	•	Audit logging is written for live restore.
	•	Preview/dry-run is enforced as read-only (no writes).

Tenant isolation / authorization (non-negotiable)
	•	Run list/view/start are tenant-scoped and policy-guarded (cross-tenant access => 403, not 404).
	•	Explicit Pest tests cover cross-tenant denial and start authorization.

Tests / Verification
	•	./vendor/bin/pint --dirty
	•	Targeted suite (examples):
	•	policy capture snapshot queued + idempotency tests
	•	restore orchestration + audit logging + preview read-only tests
	•	run authorization / tenant isolation tests

Notes / Scope boundaries
	•	Phase 1 UX = DB notifications + run detail page. A global “progress widget” is tracked as Phase 2 and not required for merge.
	•	Resilience/backoff is tracked in tasks but can be iterated further after merge.

Review focus
	•	Dedupe behavior for queued/running runs (reuse vs create-new)
	•	Tenant scoping & policy gates for all run surfaces
	•	Restore safety: audit event + preview no-writes

Co-authored-by: Ahmed Darrazi <ahmeddarrazi@adsmac.local>
Reviewed-on: #56
2026-01-11 15:59:06 +00:00

191 lines
6.1 KiB
PHP

<?php
namespace App\Jobs;
use App\Models\RestoreRun;
use App\Models\User;
use App\Notifications\RunStatusChangedNotification;
use App\Services\BulkOperationService;
use App\Services\Intune\AuditLogger;
use App\Services\Intune\RestoreService;
use App\Support\RestoreRunStatus;
use Carbon\CarbonImmutable;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Throwable;
class ExecuteRestoreRunJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public function __construct(
public int $restoreRunId,
public ?string $actorEmail = null,
public ?string $actorName = null,
) {}
public function handle(RestoreService $restoreService, AuditLogger $auditLogger, BulkOperationService $bulkOperationService): void
{
$restoreRun = RestoreRun::with(['tenant', 'backupSet'])->find($this->restoreRunId);
if (! $restoreRun) {
return;
}
if ($restoreRun->status !== RestoreRunStatus::Queued->value) {
return;
}
$this->notifyStatus($restoreRun, 'queued');
$tenant = $restoreRun->tenant;
$backupSet = $restoreRun->backupSet;
if (! $tenant || ! $backupSet || $backupSet->trashed()) {
$restoreRun->update([
'status' => RestoreRunStatus::Failed->value,
'failure_reason' => 'Backup set is archived or unavailable.',
'completed_at' => CarbonImmutable::now(),
]);
$this->notifyStatus($restoreRun->refresh(), 'failed');
if ($tenant) {
$auditLogger->log(
tenant: $tenant,
action: 'restore.failed',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $restoreRun->backup_set_id,
'reason' => 'Backup set is archived or unavailable.',
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'failed',
);
}
return;
}
$restoreRun->update([
'status' => RestoreRunStatus::Running->value,
'started_at' => CarbonImmutable::now(),
'failure_reason' => null,
]);
$this->notifyStatus($restoreRun->refresh(), 'running');
$auditLogger->log(
tenant: $tenant,
action: 'restore.started',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $backupSet->id,
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'success',
);
try {
$restoreService->executeForRun(
restoreRun: $restoreRun,
tenant: $tenant,
backupSet: $backupSet,
actorEmail: $this->actorEmail,
actorName: $this->actorName,
);
$this->notifyStatus($restoreRun->refresh(), (string) $restoreRun->status);
} catch (Throwable $throwable) {
$restoreRun->refresh();
$safeReason = $bulkOperationService->sanitizeFailureReason($throwable->getMessage());
if ($restoreRun->status === RestoreRunStatus::Running->value) {
$restoreRun->update([
'status' => RestoreRunStatus::Failed->value,
'failure_reason' => $safeReason,
'completed_at' => CarbonImmutable::now(),
]);
}
$this->notifyStatus($restoreRun->refresh(), (string) $restoreRun->status);
if ($tenant) {
$auditLogger->log(
tenant: $tenant,
action: 'restore.failed',
context: [
'metadata' => [
'restore_run_id' => $restoreRun->id,
'backup_set_id' => $backupSet->id,
'reason' => $safeReason,
],
],
actorEmail: $this->actorEmail,
actorName: $this->actorName,
resourceType: 'restore_run',
resourceId: (string) $restoreRun->id,
status: 'failed',
);
}
throw $throwable;
}
}
private function notifyStatus(RestoreRun $restoreRun, string $status): void
{
$email = $this->actorEmail;
if (! is_string($email) || $email === '') {
$email = is_string($restoreRun->requested_by) ? $restoreRun->requested_by : null;
}
if (! is_string($email) || $email === '') {
return;
}
$user = User::query()->where('email', $email)->first();
if (! $user) {
return;
}
$metadata = is_array($restoreRun->metadata) ? $restoreRun->metadata : [];
$counts = [];
foreach (['total', 'succeeded', 'failed', 'skipped'] as $key) {
if (array_key_exists($key, $metadata) && is_numeric($metadata[$key])) {
$counts[$key] = (int) $metadata[$key];
}
}
$payload = [
'tenant_id' => (int) $restoreRun->tenant_id,
'run_type' => 'restore',
'run_id' => (int) $restoreRun->getKey(),
'status' => $status,
];
if ($counts !== []) {
$payload['counts'] = $counts;
}
$user->notify(new RunStatusChangedNotification($payload));
}
}