TenantAtlas/app/Jobs/CaptureBaselineSnapshotJob.php

576 lines
21 KiB
PHP

<?php
namespace App\Jobs;
use App\Jobs\Middleware\TrackOperationRun;
use App\Models\BaselineProfile;
use App\Models\BaselineSnapshot;
use App\Models\BaselineSnapshotItem;
use App\Models\InventoryItem;
use App\Models\OperationRun;
use App\Models\Tenant;
use App\Models\User;
use App\Services\Baselines\BaselineContentCapturePhase;
use App\Services\Baselines\BaselineSnapshotIdentity;
use App\Services\Baselines\CurrentStateHashResolver;
use App\Services\Baselines\Evidence\ResolvedEvidence;
use App\Services\Baselines\InventoryMetaContract;
use App\Services\Intune\AuditLogger;
use App\Services\OperationRunService;
use App\Support\Baselines\BaselineCaptureMode;
use App\Support\Baselines\BaselineFullContentRolloutGate;
use App\Support\Baselines\BaselineProfileStatus;
use App\Support\Baselines\BaselineScope;
use App\Support\Baselines\BaselineSubjectKey;
use App\Support\Baselines\PolicyVersionCapturePurpose;
use App\Support\OperationRunOutcome;
use App\Support\OperationRunStatus;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use RuntimeException;
class CaptureBaselineSnapshotJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public ?OperationRun $operationRun = null;
public function __construct(
public OperationRun $run,
) {
$this->operationRun = $run;
}
/**
* @return array<int, object>
*/
public function middleware(): array
{
return [new TrackOperationRun];
}
public function handle(
BaselineSnapshotIdentity $identity,
InventoryMetaContract $metaContract,
AuditLogger $auditLogger,
OperationRunService $operationRunService,
?CurrentStateHashResolver $hashResolver = null,
?BaselineContentCapturePhase $contentCapturePhase = null,
?BaselineFullContentRolloutGate $rolloutGate = null,
): void {
$hashResolver ??= app(CurrentStateHashResolver::class);
$contentCapturePhase ??= app(BaselineContentCapturePhase::class);
$rolloutGate ??= app(BaselineFullContentRolloutGate::class);
if (! $this->operationRun instanceof OperationRun) {
$this->fail(new RuntimeException('OperationRun context is required for CaptureBaselineSnapshotJob.'));
return;
}
$context = is_array($this->operationRun->context) ? $this->operationRun->context : [];
$profileId = (int) ($context['baseline_profile_id'] ?? 0);
$sourceTenantId = (int) ($context['source_tenant_id'] ?? 0);
$profile = BaselineProfile::query()->find($profileId);
if (! $profile instanceof BaselineProfile) {
throw new RuntimeException("BaselineProfile #{$profileId} not found.");
}
$sourceTenant = Tenant::query()->find($sourceTenantId);
if (! $sourceTenant instanceof Tenant) {
throw new RuntimeException("Source Tenant #{$sourceTenantId} not found.");
}
$initiator = $this->operationRun->user_id
? User::query()->find($this->operationRun->user_id)
: null;
$effectiveScope = BaselineScope::fromJsonb($context['effective_scope'] ?? null);
$captureMode = $profile->capture_mode instanceof BaselineCaptureMode
? $profile->capture_mode
: BaselineCaptureMode::Opportunistic;
if ($captureMode === BaselineCaptureMode::FullContent) {
$rolloutGate->assertEnabled();
}
$inventoryResult = $this->collectInventorySubjects($sourceTenant, $effectiveScope);
$subjects = $inventoryResult['subjects'];
$inventoryByKey = $inventoryResult['inventory_by_key'];
$subjectsTotal = $inventoryResult['subjects_total'];
$captureGaps = $inventoryResult['gaps'];
$this->auditStarted(
auditLogger: $auditLogger,
tenant: $sourceTenant,
profile: $profile,
initiator: $initiator,
captureMode: $captureMode,
subjectsTotal: $subjectsTotal,
effectiveScope: $effectiveScope,
);
$phaseStats = [
'requested' => 0,
'succeeded' => 0,
'skipped' => 0,
'failed' => 0,
'throttled' => 0,
];
$phaseGaps = [];
$resumeToken = null;
if ($captureMode === BaselineCaptureMode::FullContent) {
$budgets = [
'max_items_per_run' => (int) config('tenantpilot.baselines.full_content_capture.max_items_per_run', 200),
'max_concurrency' => (int) config('tenantpilot.baselines.full_content_capture.max_concurrency', 5),
'max_retries' => (int) config('tenantpilot.baselines.full_content_capture.max_retries', 3),
];
$resumeTokenIn = null;
if (is_array($context['baseline_capture'] ?? null)) {
$resumeTokenIn = $context['baseline_capture']['resume_token'] ?? null;
}
$phaseResult = $contentCapturePhase->capture(
tenant: $sourceTenant,
subjects: $subjects,
purpose: PolicyVersionCapturePurpose::BaselineCapture,
budgets: $budgets,
resumeToken: is_string($resumeTokenIn) ? $resumeTokenIn : null,
operationRunId: (int) $this->operationRun->getKey(),
baselineProfileId: (int) $profile->getKey(),
createdBy: $initiator?->email,
);
$phaseStats = is_array($phaseResult['stats'] ?? null) ? $phaseResult['stats'] : $phaseStats;
$phaseGaps = is_array($phaseResult['gaps'] ?? null) ? $phaseResult['gaps'] : [];
$resumeToken = is_string($phaseResult['resume_token'] ?? null) ? $phaseResult['resume_token'] : null;
}
$resolvedEvidence = $hashResolver->resolveForSubjects(
tenant: $sourceTenant,
subjects: $subjects,
since: null,
latestInventorySyncRunId: null,
);
$snapshotItems = $this->buildSnapshotItems(
inventoryByKey: $inventoryByKey,
resolvedEvidence: $resolvedEvidence,
captureMode: $captureMode,
gaps: $captureGaps,
);
$items = $snapshotItems['items'] ?? [];
$identityHash = $identity->computeIdentity($items);
$gapsByReason = $this->mergeGapCounts($captureGaps, $phaseGaps);
$gapsCount = array_sum($gapsByReason);
$snapshotSummary = [
'total_items' => count($items),
'policy_type_counts' => $this->countByPolicyType($items),
'fidelity_counts' => $snapshotItems['fidelity_counts'] ?? ['content' => 0, 'meta' => 0],
'gaps' => [
'count' => $gapsCount,
'by_reason' => $gapsByReason,
],
];
$snapshot = $this->findOrCreateSnapshot(
$profile,
$identityHash,
$items,
$snapshotSummary,
);
$wasNewSnapshot = $snapshot->wasRecentlyCreated;
if ($profile->status === BaselineProfileStatus::Active) {
$profile->update(['active_snapshot_id' => $snapshot->getKey()]);
}
$warningsRecorded = $gapsByReason !== [] || $resumeToken !== null;
$warningsRecorded = $warningsRecorded || ($captureMode === BaselineCaptureMode::FullContent && ($snapshotItems['fidelity_counts']['meta'] ?? 0) > 0);
$outcome = $warningsRecorded ? OperationRunOutcome::PartiallySucceeded->value : OperationRunOutcome::Succeeded->value;
$summaryCounts = [
'total' => $subjectsTotal,
'processed' => $subjectsTotal,
'succeeded' => $snapshotItems['items_count'],
'failed' => max(0, $subjectsTotal - $snapshotItems['items_count']),
];
$operationRunService->updateRun(
$this->operationRun,
status: OperationRunStatus::Completed->value,
outcome: $outcome,
summaryCounts: $summaryCounts,
);
$updatedContext = is_array($this->operationRun->context) ? $this->operationRun->context : [];
$updatedContext['baseline_capture'] = array_merge(
is_array($updatedContext['baseline_capture'] ?? null) ? $updatedContext['baseline_capture'] : [],
[
'subjects_total' => $subjectsTotal,
'evidence_capture' => $phaseStats,
'gaps' => [
'count' => $gapsCount,
'by_reason' => $gapsByReason,
],
'resume_token' => $resumeToken,
],
);
$updatedContext['result'] = [
'snapshot_id' => (int) $snapshot->getKey(),
'snapshot_identity_hash' => $identityHash,
'was_new_snapshot' => $wasNewSnapshot,
'items_captured' => $snapshotItems['items_count'],
];
$this->operationRun->update(['context' => $updatedContext]);
$this->auditCompleted(
auditLogger: $auditLogger,
tenant: $sourceTenant,
profile: $profile,
snapshot: $snapshot,
initiator: $initiator,
captureMode: $captureMode,
subjectsTotal: $subjectsTotal,
wasNewSnapshot: $wasNewSnapshot,
evidenceCaptureStats: $phaseStats,
gaps: [
'count' => $gapsCount,
'by_reason' => $gapsByReason,
],
);
}
/**
* @return array{
* subjects_total: int,
* subjects: list<array{policy_type: string, subject_external_id: string}>,
* inventory_by_key: array<string, array{
* tenant_subject_external_id: string,
* workspace_subject_external_id: string,
* subject_key: string,
* policy_type: string,
* display_name: ?string,
* category: ?string,
* platform: ?string
* }>,
* gaps: array<string, int>
* }
*/
private function collectInventorySubjects(
Tenant $sourceTenant,
BaselineScope $scope,
): array {
$query = InventoryItem::query()
->where('tenant_id', $sourceTenant->getKey());
$query->whereIn('policy_type', $scope->allTypes());
/** @var array<string, array{tenant_subject_external_id: string, workspace_subject_external_id: string, subject_key: string, policy_type: string, display_name: ?string, category: ?string, platform: ?string}> $inventoryByKey */
$inventoryByKey = [];
$subjectsTotal = 0;
/** @var array<string, int> $gaps */
$gaps = [];
$query->orderBy('policy_type')
->orderBy('external_id')
->chunk(500, function ($inventoryItems) use (&$inventoryByKey, &$subjectsTotal, &$gaps): void {
foreach ($inventoryItems as $inventoryItem) {
$subjectsTotal++;
$metaJsonb = is_array($inventoryItem->meta_jsonb) ? $inventoryItem->meta_jsonb : [];
$displayName = is_string($inventoryItem->display_name) ? $inventoryItem->display_name : null;
$subjectKey = BaselineSubjectKey::fromDisplayName($displayName);
if ($subjectKey === null) {
$gaps['missing_subject_key'] = ($gaps['missing_subject_key'] ?? 0) + 1;
continue;
}
$workspaceSafeId = BaselineSubjectKey::workspaceSafeSubjectExternalId(
policyType: (string) $inventoryItem->policy_type,
subjectKey: $subjectKey,
);
$key = (string) $inventoryItem->policy_type.'|'.(string) $inventoryItem->external_id;
$inventoryByKey[$key] = [
'tenant_subject_external_id' => (string) $inventoryItem->external_id,
'workspace_subject_external_id' => $workspaceSafeId,
'subject_key' => $subjectKey,
'policy_type' => (string) $inventoryItem->policy_type,
'display_name' => $displayName,
'category' => is_string($inventoryItem->category) ? $inventoryItem->category : null,
'platform' => is_string($inventoryItem->platform) ? $inventoryItem->platform : null,
];
}
});
ksort($gaps);
$subjects = array_values(array_map(
static fn (array $item): array => [
'policy_type' => (string) $item['policy_type'],
'subject_external_id' => (string) $item['tenant_subject_external_id'],
],
$inventoryByKey,
));
return [
'subjects_total' => $subjectsTotal,
'subjects' => $subjects,
'inventory_by_key' => $inventoryByKey,
'gaps' => $gaps,
];
}
/**
* @param array<string, array{
* tenant_subject_external_id: string,
* workspace_subject_external_id: string,
* subject_key: string,
* policy_type: string,
* display_name: ?string,
* category: ?string,
* platform: ?string,
* }> $inventoryByKey
* @param array<string, ResolvedEvidence|null> $resolvedEvidence
* @param array<string, int> $gaps
* @return array{
* items: array<int, array{
* subject_type: string,
* subject_external_id: string,
* subject_key: string,
* policy_type: string,
* baseline_hash: string,
* meta_jsonb: array<string, mixed>
* }>,
* items_count: int,
* fidelity_counts: array{content: int, meta: int}
* }
*/
private function buildSnapshotItems(
array $inventoryByKey,
array $resolvedEvidence,
BaselineCaptureMode $captureMode,
array &$gaps,
): array {
$items = [];
$fidelityCounts = ['content' => 0, 'meta' => 0];
foreach ($inventoryByKey as $key => $inventoryItem) {
$evidence = $resolvedEvidence[$key] ?? null;
if (! $evidence instanceof ResolvedEvidence) {
$gaps['missing_evidence'] = ($gaps['missing_evidence'] ?? 0) + 1;
continue;
}
$provenance = $evidence->provenance();
unset($provenance['observed_operation_run_id']);
$fidelity = (string) ($provenance['fidelity'] ?? 'meta');
$fidelityCounts[$fidelity === 'content' ? 'content' : 'meta']++;
if ($captureMode === BaselineCaptureMode::FullContent && $fidelity !== 'content') {
$gaps['meta_fallback'] = ($gaps['meta_fallback'] ?? 0) + 1;
}
$items[] = [
'subject_type' => 'policy',
'subject_external_id' => (string) $inventoryItem['workspace_subject_external_id'],
'subject_key' => (string) $inventoryItem['subject_key'],
'policy_type' => (string) $inventoryItem['policy_type'],
'baseline_hash' => $evidence->hash,
'meta_jsonb' => [
'display_name' => $inventoryItem['display_name'],
'category' => $inventoryItem['category'],
'platform' => $inventoryItem['platform'],
'evidence' => $provenance,
],
];
}
return [
'items' => $items,
'items_count' => count($items),
'fidelity_counts' => $fidelityCounts,
];
}
private function findOrCreateSnapshot(
BaselineProfile $profile,
string $identityHash,
array $snapshotItems,
array $summaryJsonb,
): BaselineSnapshot {
$existing = BaselineSnapshot::query()
->where('workspace_id', $profile->workspace_id)
->where('baseline_profile_id', $profile->getKey())
->where('snapshot_identity_hash', $identityHash)
->first();
if ($existing instanceof BaselineSnapshot) {
return $existing;
}
$snapshot = BaselineSnapshot::create([
'workspace_id' => (int) $profile->workspace_id,
'baseline_profile_id' => (int) $profile->getKey(),
'snapshot_identity_hash' => $identityHash,
'captured_at' => now(),
'summary_jsonb' => $summaryJsonb,
]);
foreach (array_chunk($snapshotItems, 100) as $chunk) {
$rows = array_map(
fn (array $item): array => [
'baseline_snapshot_id' => (int) $snapshot->getKey(),
'subject_type' => $item['subject_type'],
'subject_external_id' => $item['subject_external_id'],
'subject_key' => $item['subject_key'],
'policy_type' => $item['policy_type'],
'baseline_hash' => $item['baseline_hash'],
'meta_jsonb' => json_encode($item['meta_jsonb']),
'created_at' => now(),
'updated_at' => now(),
],
$chunk,
);
BaselineSnapshotItem::insert($rows);
}
return $snapshot;
}
/**
* @param array<int, array{policy_type: string}> $items
* @return array<string, int>
*/
private function countByPolicyType(array $items): array
{
$counts = [];
foreach ($items as $item) {
$type = (string) $item['policy_type'];
$counts[$type] = ($counts[$type] ?? 0) + 1;
}
ksort($counts);
return $counts;
}
private function auditStarted(
AuditLogger $auditLogger,
Tenant $tenant,
BaselineProfile $profile,
?User $initiator,
BaselineCaptureMode $captureMode,
int $subjectsTotal,
BaselineScope $effectiveScope,
): void {
$auditLogger->log(
tenant: $tenant,
action: 'baseline.capture.started',
context: [
'metadata' => [
'operation_run_id' => (int) $this->operationRun->getKey(),
'baseline_profile_id' => (int) $profile->getKey(),
'baseline_profile_name' => (string) $profile->name,
'purpose' => PolicyVersionCapturePurpose::BaselineCapture->value,
'capture_mode' => $captureMode->value,
'scope_types_total' => count($effectiveScope->allTypes()),
'subjects_total' => $subjectsTotal,
],
],
actorId: $initiator?->id,
actorEmail: $initiator?->email,
actorName: $initiator?->name,
resourceType: 'baseline_profile',
resourceId: (string) $profile->getKey(),
);
}
private function auditCompleted(
AuditLogger $auditLogger,
Tenant $tenant,
BaselineProfile $profile,
BaselineSnapshot $snapshot,
?User $initiator,
BaselineCaptureMode $captureMode,
int $subjectsTotal,
bool $wasNewSnapshot,
array $evidenceCaptureStats,
array $gaps,
): void {
$auditLogger->log(
tenant: $tenant,
action: 'baseline.capture.completed',
context: [
'metadata' => [
'operation_run_id' => (int) $this->operationRun->getKey(),
'baseline_profile_id' => (int) $profile->getKey(),
'baseline_profile_name' => (string) $profile->name,
'purpose' => PolicyVersionCapturePurpose::BaselineCapture->value,
'capture_mode' => $captureMode->value,
'subjects_total' => $subjectsTotal,
'snapshot_id' => (int) $snapshot->getKey(),
'snapshot_identity_hash' => (string) $snapshot->snapshot_identity_hash,
'was_new_snapshot' => $wasNewSnapshot,
'evidence_capture' => $evidenceCaptureStats,
'gaps' => $gaps,
],
],
actorId: $initiator?->id,
actorEmail: $initiator?->email,
actorName: $initiator?->name,
resourceType: 'operation_run',
resourceId: (string) $this->operationRun->getKey(),
);
}
/**
* @param array<string, int> ...$gaps
* @return array<string, int>
*/
private function mergeGapCounts(array ...$gaps): array
{
$merged = [];
foreach ($gaps as $gapMap) {
foreach ($gapMap as $reason => $count) {
if (! is_string($reason) || $reason === '') {
continue;
}
$merged[$reason] = ($merged[$reason] ?? 0) + (int) $count;
}
}
ksort($merged);
return $merged;
}
}