Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions app/Domain/Activity/Queries/RecentActivityForUserQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use App\Domain\Activity\ActivityEventPresenter;
use App\Models\ActivityEvent;
use App\Models\User;
use App\Models\Website;

/**
* Read-side query for the activity feed shown in the AppLayout right rail
Expand Down Expand Up @@ -45,16 +46,35 @@ class RecentActivityForUserQuery
*/
public function handle(User $user, int $limit = self::RAIL_LIMIT): array
{
// TODO(future): broaden the predicate when system-emitted events
// (deployments, websites, hosts) start landing without a repository.
// Today every spec-017 webhook event carries a repository_id, so
// the EXISTS subquery against repositories→projects is watertight
// and rows with repository_id IS NULL are filtered out for every
// user — they don't leak across users, but they also don't show.
// Two scoping paths land in the same feed:
// 1. Repository-scoped events (spec 017's webhook handlers
// and deployments — `repository_id` resolves through the
// project's owner).
// 2. Monitoring-scoped events (spec 024 — `source: monitoring`,
// `metadata.website_id` resolves through the website's
// project's owner). These rows have `repository_id` null.
//
// The user's website ids are pre-resolved into a list once so
// the JSON predicate stays cheap (no JSON join per row); cross-
// DB JSON-extract syntax is the same shape on MySQL and SQLite.
$userWebsiteIds = Website::query()
->whereHas('project', fn ($q) => $q->where('owner_user_id', $user->id))
->pluck('id')
->all();

return ActivityEvent::query()
->with('repository:id,full_name')
->whereHas('repository.project', function ($q) use ($user) {
$q->where('owner_user_id', $user->id);
->where(function ($q) use ($user, $userWebsiteIds) {
$q->whereHas('repository.project', function ($inner) use ($user) {
$inner->where('owner_user_id', $user->id);
});

if (! empty($userWebsiteIds)) {
$q->orWhere(function ($inner) use ($userWebsiteIds) {
$inner->where('source', 'monitoring')
->whereIn('metadata->website_id', $userWebsiteIds);
});
}
})
->orderByDesc('occurred_at')
->orderByDesc('id')
Expand Down
149 changes: 143 additions & 6 deletions app/Domain/Monitoring/Actions/RecordWebsiteCheckAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@

namespace App\Domain\Monitoring\Actions;

use App\Domain\Activity\Actions\CreateActivityEventAction;
use App\Domain\Monitoring\Probes\WebsiteProbeResult;
use App\Enums\ActivitySeverity;
use App\Enums\WebsiteCheckStatus;
use App\Enums\WebsiteStatus;
use App\Models\Website;
use App\Models\WebsiteCheck;
use Illuminate\Support\Carbon;

/**
* Persistence half of the probe pipeline. Given a `Website` + a
Expand All @@ -19,18 +22,31 @@
* bad" without scanning the checks table.
*
* Returns the persisted `WebsiteCheck` so callers (manual probe
* controller, future scheduler job) can flash it back to the user
* without an extra round-trip.
* controller, scheduler-driven `RunWebsiteCheckJob`) can flash it
* back to the user without an extra round-trip.
*
* Activity-event creation on status transitions deliberately lives
* in spec 024 — that's where status transitions are interesting,
* since manual probes are user-triggered and don't need a separate
* notification surface.
* Spec 024: extended to emit `ActivityEvent`s on healthy↔failed
* **category transitions only** — steady-state runs (Up→Up, Down→Down)
* stay silent so the activity feed isn't flooded.
*
* `CreateActivityEventAction` (spec 017) dispatches
* `ActivityEventCreated` (spec 019); spec 024 extended that broadcaster
* to resolve a recipient channel for monitoring-source events via
* `metadata.website_id → website → project → owner_user_id` (since
* monitoring rows have `repository_id = null` and would otherwise
* silently fail to broadcast). Realtime fan-out reaches the right rail.
*/
class RecordWebsiteCheckAction
{
public function __construct(
private readonly CreateActivityEventAction $createActivity,
) {}

public function execute(Website $website, WebsiteProbeResult $result): WebsiteCheck
{
// Capture BEFORE the update so we can detect category swings.
$previousStatus = $website->status;

$checkedAt = now();

$check = WebsiteCheck::query()->create([
Expand All @@ -55,9 +71,130 @@ public function execute(Website $website, WebsiteProbeResult $result): WebsiteCh

$website->forceFill($updates)->save();

$this->maybeEmitTransitionActivity($website, $previousStatus, $result, $checkedAt);

return $check;
}

/**
* Category transition detector. Three buckets:
* - Healthy = Up | Slow
* - Failed = Down | Error
* - Pending = first-ever probe state (initial seed)
*
* Emits an activity event on:
* - Healthy → Failed (incident — `website.down`, danger)
* - Pending → Failed (incident on first probe — same shape)
* - Failed → Healthy (recovery — `website.up`, success)
*
* Steady-state (Healthy → Healthy, Failed → Failed) and the silent
* Pending → Healthy first-probe-success path emit nothing — keeps
* the activity feed signal-dense.
*/
private function maybeEmitTransitionActivity(
Website $website,
?WebsiteStatus $previousStatus,
WebsiteProbeResult $result,
Carbon $checkedAt,
): void {
$previousCategory = $this->categoryFor($previousStatus);
$currentCategory = $this->categoryForCheckStatus($result->status);

if ($previousCategory === $currentCategory) {
return; // Steady state — silent.
}

if ($previousCategory === 'pending' && $currentCategory === 'healthy') {
return; // First probe + everything fine — uneventful.
}

if ($currentCategory === 'failed') {
$this->createActivity->execute([
'event_type' => 'website.down',
'severity' => ActivitySeverity::Danger,
'title' => "{$website->name} went down",
'description' => $this->failureDescription($result),
'occurred_at' => $checkedAt,
'source' => 'monitoring',
'metadata' => [
'website_id' => $website->id,
'url' => $website->url,
'http_status_code' => $result->httpStatusCode,
'error_message' => $result->errorMessage,
],
]);

return;
}

// Failed → Healthy (the only remaining transition).
$this->createActivity->execute([
'event_type' => 'website.up',
'severity' => ActivitySeverity::Success,
'title' => "{$website->name} recovered",
'description' => $result->responseTimeMs !== null
? "Up in {$result->responseTimeMs}ms"
: 'Up',
'occurred_at' => $checkedAt,
'source' => 'monitoring',
'metadata' => [
'website_id' => $website->id,
'url' => $website->url,
'http_status_code' => $result->httpStatusCode,
'response_time_ms' => $result->responseTimeMs,
],
]);
}

/**
* Bucket a parent `WebsiteStatus` into healthy / failed / pending.
* Null is treated as `pending` — defensive against a brand-new
* row that bypassed the factory's default.
*
* @return 'healthy'|'failed'|'pending'
*/
private function categoryFor(?WebsiteStatus $status): string
{
return match ($status) {
WebsiteStatus::Up, WebsiteStatus::Slow => 'healthy',
WebsiteStatus::Down, WebsiteStatus::Error => 'failed',
null, WebsiteStatus::Pending => 'pending',
};
}

/**
* Bucket a `WebsiteCheckStatus` (the freshly-probed result) into
* the same healthy / failed buckets. There's no `pending` here —
* a recorded check always reflects an actual probe.
*
* @return 'healthy'|'failed'
*/
private function categoryForCheckStatus(WebsiteCheckStatus $status): string
{
return match ($status) {
WebsiteCheckStatus::Up, WebsiteCheckStatus::Slow => 'healthy',
WebsiteCheckStatus::Down, WebsiteCheckStatus::Error => 'failed',
};
}

/**
* Human-readable failure context for the activity event description.
* Prefer the captured error message (HTTP-layer body preview or
* transport error) over a bare HTTP status.
*/
private function failureDescription(WebsiteProbeResult $result): string
{
if ($result->errorMessage !== null && $result->errorMessage !== '') {
return $result->errorMessage;
}

if ($result->httpStatusCode !== null) {
return "HTTP {$result->httpStatusCode}";
}

return 'Probe failed';
}

/**
* `WebsiteCheckStatus` and `WebsiteStatus` differ only by the
* `pending` value (parent only). Map 1:1 by name.
Expand Down
68 changes: 68 additions & 0 deletions app/Domain/Monitoring/Jobs/DispatchDueWebsiteChecksJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

namespace App\Domain\Monitoring\Jobs;

use App\Models\Website;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;

/**
* Scheduler-bound dispatcher (spec 024). Bound in `routes/console.php`
* via `Schedule::job(...)->everyMinute()->withoutOverlapping()`.
*
* Loads every `Website` row, filters to "due now" in PHP (the
* predicate `last_checked_at + check_interval_seconds <= now()` is
* cross-DB awkward to express in raw SQL), and dispatches a per-website
* `RunWebsiteCheckJob` for each. The probe HTTP request happens in
* the per-website job — the dispatcher itself stays fast so a slow
* site doesn't block the every-minute tick.
*
* Soft cap of 500 websites per dispatcher run keeps a runaway
* configuration from amplifying into thousands of queued jobs in a
* single tick. Ordered by `last_checked_at` ascending with nulls
* first so the oldest-stale rows always land in the cap window —
* an `orderBy('id')` would silently strand the high-id tail when
* total > cap. Phase-1 expectation is well below the cap; revisit
* (cursor-based pagination, distributed locks) when a real account
* approaches it.
*/
class DispatchDueWebsiteChecksJob implements ShouldQueue
{
use Dispatchable;
use InteractsWithQueue;
use Queueable;
use SerializesModels;

/** Single attempt; the next every-minute tick is the retry path. */
public int $tries = 1;

/** Hard cap on websites picked up per tick. */
private const SOFT_CAP = 500;

public function handle(): void
{
$now = now();

Website::query()
// Never-checked rows always due → land at the head of the
// queue. After that, oldest-stale first.
->orderByRaw('last_checked_at IS NULL DESC')
->orderBy('last_checked_at')
->limit(self::SOFT_CAP)
->get()
->filter(function (Website $website) use ($now) {
if ($website->last_checked_at === null) {
return true;
}

return $website->last_checked_at
->copy()
->addSeconds($website->check_interval_seconds)
->lessThanOrEqualTo($now);
})
->each(fn (Website $website) => RunWebsiteCheckJob::dispatch($website->id));
}
}
53 changes: 53 additions & 0 deletions app/Domain/Monitoring/Jobs/RunWebsiteCheckJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?php

namespace App\Domain\Monitoring\Jobs;

use App\Domain\Monitoring\Actions\RecordWebsiteCheckAction;
use App\Domain\Monitoring\Actions\RunWebsiteProbeAction;
use App\Models\Website;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;

/**
* Per-website async probe wrapper (spec 024). Reuses the spec-023
* actions so the manual "Probe now" button and the scheduled run
* land on the same persistence path — no behavioural drift.
*
* Loads the row inside `handle()` rather than carrying the model
* through `SerializesModels`: the dispatcher could pick up a website
* that's then deleted before the worker dequeues, and a no-op return
* is cleaner than a `ModelNotFoundException` in the failure log.
*
* `tries = 1`. A failed probe (transport error / timeout) is *itself*
* a recorded outcome — `RecordWebsiteCheckAction` writes the Error
* row. Job-level retries would either double-record the same probe or
* mask transient failures the user wants to see.
*/
class RunWebsiteCheckJob implements ShouldQueue
{
use Dispatchable;
use InteractsWithQueue;
use Queueable;
use SerializesModels;

public int $tries = 1;

public function __construct(public readonly int $websiteId) {}

public function handle(
RunWebsiteProbeAction $probe,
RecordWebsiteCheckAction $record,
): void {
$website = Website::query()->find($this->websiteId);

if ($website === null) {
return;
}

$result = $probe->execute($website);
$record->execute($website, $result);
}
}
Loading
Loading