Summary This PR implements Spec 049 – Backup/Restore Job Orchestration: all critical Backup/Restore execution paths are job-only, idempotent, tenant-scoped, and observable via run records + DB notifications (Phase 1). The UI no longer performs heavy Graph work inside request/Filament actions for these flows. Why We want predictable UX and operations at MSP scale: • no timeouts / long-running requests • reproducible run state + per-item results • safe error persistence (no secrets / no token leakage) • strict tenant isolation + auditability for write paths What changed Foundational (Runs + Idempotency + Observability) • Added a shared RunIdempotency helper (dedupe while queued/running). • Added a read-only BulkOperationRuns surface (list + view) for status/progress. • Added DB notifications for run status changes (with “View run” link). US1 – Policy “Capture snapshot” is job-only • Policy detail “Capture snapshot” now: • creates/reuses a run (dedupe key: tenant + policy.capture_snapshot + policy DB id) • dispatches a queued job • returns immediately with notification + link to run detail • Graph capture work moved fully into the job; request path stays Graph-free. US3 – Restore runs orchestration is job-only + safe • Live restore execution is queued and updates RestoreRun status/progress. • Per-item outcomes are persisted deterministically (per internal DB record). • Audit logging is written for live restore. • Preview/dry-run is enforced as read-only (no writes). Tenant isolation / authorization (non-negotiable) • Run list/view/start are tenant-scoped and policy-guarded (cross-tenant access => 403, not 404). • Explicit Pest tests cover cross-tenant denial and start authorization. Tests / Verification • ./vendor/bin/pint --dirty • Targeted suite (examples): • policy capture snapshot queued + idempotency tests • restore orchestration + audit logging + preview read-only tests • run authorization / tenant isolation tests Notes / Scope boundaries • Phase 1 UX = DB notifications + run detail page. A global “progress widget” is tracked as Phase 2 and not required for merge. • Resilience/backoff is tracked in tasks but can be iterated further after merge. Review focus • Dedupe behavior for queued/running runs (reuse vs create-new) • Tenant scoping & policy gates for all run surfaces • Restore safety: audit event + preview no-writes Co-authored-by: Ahmed Darrazi <ahmeddarrazi@adsmac.local> Reviewed-on: #56
191 lines
6.1 KiB
PHP
191 lines
6.1 KiB
PHP
<?php
|
|
|
|
namespace App\Jobs;
|
|
|
|
use App\Models\RestoreRun;
|
|
use App\Models\User;
|
|
use App\Notifications\RunStatusChangedNotification;
|
|
use App\Services\BulkOperationService;
|
|
use App\Services\Intune\AuditLogger;
|
|
use App\Services\Intune\RestoreService;
|
|
use App\Support\RestoreRunStatus;
|
|
use Carbon\CarbonImmutable;
|
|
use Illuminate\Bus\Queueable;
|
|
use Illuminate\Contracts\Queue\ShouldQueue;
|
|
use Illuminate\Foundation\Bus\Dispatchable;
|
|
use Illuminate\Queue\InteractsWithQueue;
|
|
use Illuminate\Queue\SerializesModels;
|
|
use Throwable;
|
|
|
|
class ExecuteRestoreRunJob implements ShouldQueue
|
|
{
|
|
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
|
|
|
public function __construct(
|
|
public int $restoreRunId,
|
|
public ?string $actorEmail = null,
|
|
public ?string $actorName = null,
|
|
) {}
|
|
|
|
public function handle(RestoreService $restoreService, AuditLogger $auditLogger, BulkOperationService $bulkOperationService): void
|
|
{
|
|
$restoreRun = RestoreRun::with(['tenant', 'backupSet'])->find($this->restoreRunId);
|
|
|
|
if (! $restoreRun) {
|
|
return;
|
|
}
|
|
|
|
if ($restoreRun->status !== RestoreRunStatus::Queued->value) {
|
|
return;
|
|
}
|
|
|
|
$this->notifyStatus($restoreRun, 'queued');
|
|
|
|
$tenant = $restoreRun->tenant;
|
|
$backupSet = $restoreRun->backupSet;
|
|
|
|
if (! $tenant || ! $backupSet || $backupSet->trashed()) {
|
|
$restoreRun->update([
|
|
'status' => RestoreRunStatus::Failed->value,
|
|
'failure_reason' => 'Backup set is archived or unavailable.',
|
|
'completed_at' => CarbonImmutable::now(),
|
|
]);
|
|
|
|
$this->notifyStatus($restoreRun->refresh(), 'failed');
|
|
|
|
if ($tenant) {
|
|
$auditLogger->log(
|
|
tenant: $tenant,
|
|
action: 'restore.failed',
|
|
context: [
|
|
'metadata' => [
|
|
'restore_run_id' => $restoreRun->id,
|
|
'backup_set_id' => $restoreRun->backup_set_id,
|
|
'reason' => 'Backup set is archived or unavailable.',
|
|
],
|
|
],
|
|
actorEmail: $this->actorEmail,
|
|
actorName: $this->actorName,
|
|
resourceType: 'restore_run',
|
|
resourceId: (string) $restoreRun->id,
|
|
status: 'failed',
|
|
);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
$restoreRun->update([
|
|
'status' => RestoreRunStatus::Running->value,
|
|
'started_at' => CarbonImmutable::now(),
|
|
'failure_reason' => null,
|
|
]);
|
|
|
|
$this->notifyStatus($restoreRun->refresh(), 'running');
|
|
|
|
$auditLogger->log(
|
|
tenant: $tenant,
|
|
action: 'restore.started',
|
|
context: [
|
|
'metadata' => [
|
|
'restore_run_id' => $restoreRun->id,
|
|
'backup_set_id' => $backupSet->id,
|
|
],
|
|
],
|
|
actorEmail: $this->actorEmail,
|
|
actorName: $this->actorName,
|
|
resourceType: 'restore_run',
|
|
resourceId: (string) $restoreRun->id,
|
|
status: 'success',
|
|
);
|
|
|
|
try {
|
|
$restoreService->executeForRun(
|
|
restoreRun: $restoreRun,
|
|
tenant: $tenant,
|
|
backupSet: $backupSet,
|
|
actorEmail: $this->actorEmail,
|
|
actorName: $this->actorName,
|
|
);
|
|
|
|
$this->notifyStatus($restoreRun->refresh(), (string) $restoreRun->status);
|
|
} catch (Throwable $throwable) {
|
|
$restoreRun->refresh();
|
|
|
|
$safeReason = $bulkOperationService->sanitizeFailureReason($throwable->getMessage());
|
|
|
|
if ($restoreRun->status === RestoreRunStatus::Running->value) {
|
|
$restoreRun->update([
|
|
'status' => RestoreRunStatus::Failed->value,
|
|
'failure_reason' => $safeReason,
|
|
'completed_at' => CarbonImmutable::now(),
|
|
]);
|
|
}
|
|
|
|
$this->notifyStatus($restoreRun->refresh(), (string) $restoreRun->status);
|
|
|
|
if ($tenant) {
|
|
$auditLogger->log(
|
|
tenant: $tenant,
|
|
action: 'restore.failed',
|
|
context: [
|
|
'metadata' => [
|
|
'restore_run_id' => $restoreRun->id,
|
|
'backup_set_id' => $backupSet->id,
|
|
'reason' => $safeReason,
|
|
],
|
|
],
|
|
actorEmail: $this->actorEmail,
|
|
actorName: $this->actorName,
|
|
resourceType: 'restore_run',
|
|
resourceId: (string) $restoreRun->id,
|
|
status: 'failed',
|
|
);
|
|
}
|
|
|
|
throw $throwable;
|
|
}
|
|
}
|
|
|
|
private function notifyStatus(RestoreRun $restoreRun, string $status): void
|
|
{
|
|
$email = $this->actorEmail;
|
|
|
|
if (! is_string($email) || $email === '') {
|
|
$email = is_string($restoreRun->requested_by) ? $restoreRun->requested_by : null;
|
|
}
|
|
|
|
if (! is_string($email) || $email === '') {
|
|
return;
|
|
}
|
|
|
|
$user = User::query()->where('email', $email)->first();
|
|
|
|
if (! $user) {
|
|
return;
|
|
}
|
|
|
|
$metadata = is_array($restoreRun->metadata) ? $restoreRun->metadata : [];
|
|
$counts = [];
|
|
|
|
foreach (['total', 'succeeded', 'failed', 'skipped'] as $key) {
|
|
if (array_key_exists($key, $metadata) && is_numeric($metadata[$key])) {
|
|
$counts[$key] = (int) $metadata[$key];
|
|
}
|
|
}
|
|
|
|
$payload = [
|
|
'tenant_id' => (int) $restoreRun->tenant_id,
|
|
'run_type' => 'restore',
|
|
'run_id' => (int) $restoreRun->getKey(),
|
|
'status' => $status,
|
|
];
|
|
|
|
if ($counts !== []) {
|
|
$payload['counts'] = $counts;
|
|
}
|
|
|
|
$user->notify(new RunStatusChangedNotification($payload));
|
|
}
|
|
}
|