mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-10-26 22:01:59 -06:00
Add Engine scheduler service and job interfaces
This commit is contained in:
@@ -18,6 +18,7 @@ from .enrollment import (
|
||||
EnrollmentValidationError,
|
||||
PollingResult,
|
||||
)
|
||||
from .jobs.scheduler_service import SchedulerService
|
||||
from .realtime import AgentRealtimeService, AgentRecord
|
||||
|
||||
__all__ = [
|
||||
@@ -35,4 +36,5 @@ __all__ = [
|
||||
"PollingResult",
|
||||
"AgentRealtimeService",
|
||||
"AgentRecord",
|
||||
"SchedulerService",
|
||||
]
|
||||
|
||||
@@ -13,6 +13,7 @@ from Data.Engine.repositories.sqlite import (
|
||||
SQLiteConnectionFactory,
|
||||
SQLiteDeviceRepository,
|
||||
SQLiteEnrollmentRepository,
|
||||
SQLiteJobRepository,
|
||||
SQLiteRefreshTokenRepository,
|
||||
)
|
||||
from Data.Engine.services.auth import (
|
||||
@@ -25,6 +26,7 @@ from Data.Engine.services.auth import (
|
||||
from Data.Engine.services.crypto.signing import ScriptSigner, load_signer
|
||||
from Data.Engine.services.enrollment import EnrollmentService
|
||||
from Data.Engine.services.enrollment.nonce_cache import NonceCache
|
||||
from Data.Engine.services.jobs import SchedulerService
|
||||
from Data.Engine.services.rate_limit import SlidingWindowRateLimiter
|
||||
from Data.Engine.services.realtime import AgentRealtimeService
|
||||
|
||||
@@ -39,6 +41,7 @@ class EngineServiceContainer:
|
||||
jwt_service: JWTService
|
||||
dpop_validator: DPoPValidator
|
||||
agent_realtime: AgentRealtimeService
|
||||
scheduler_service: SchedulerService
|
||||
|
||||
|
||||
def build_service_container(
|
||||
@@ -52,6 +55,7 @@ def build_service_container(
|
||||
device_repo = SQLiteDeviceRepository(db_factory, logger=log.getChild("devices"))
|
||||
token_repo = SQLiteRefreshTokenRepository(db_factory, logger=log.getChild("tokens"))
|
||||
enrollment_repo = SQLiteEnrollmentRepository(db_factory, logger=log.getChild("enrollment"))
|
||||
job_repo = SQLiteJobRepository(db_factory, logger=log.getChild("jobs"))
|
||||
|
||||
jwt_service = load_jwt_service()
|
||||
dpop_validator = DPoPValidator()
|
||||
@@ -91,6 +95,12 @@ def build_service_container(
|
||||
logger=log.getChild("agent_realtime"),
|
||||
)
|
||||
|
||||
scheduler_service = SchedulerService(
|
||||
job_repository=job_repo,
|
||||
assemblies_root=settings.project_root / "Assemblies",
|
||||
logger=log.getChild("scheduler"),
|
||||
)
|
||||
|
||||
return EngineServiceContainer(
|
||||
device_auth=device_auth,
|
||||
token_service=token_service,
|
||||
@@ -98,6 +108,7 @@ def build_service_container(
|
||||
jwt_service=jwt_service,
|
||||
dpop_validator=dpop_validator,
|
||||
agent_realtime=agent_realtime,
|
||||
scheduler_service=scheduler_service,
|
||||
)
|
||||
|
||||
|
||||
|
||||
5
Data/Engine/services/jobs/__init__.py
Normal file
5
Data/Engine/services/jobs/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Job-related services for the Borealis Engine."""
|
||||
|
||||
from .scheduler_service import SchedulerService
|
||||
|
||||
__all__ = ["SchedulerService"]
|
||||
373
Data/Engine/services/jobs/scheduler_service.py
Normal file
373
Data/Engine/services/jobs/scheduler_service.py
Normal file
@@ -0,0 +1,373 @@
|
||||
"""Background scheduler service for the Borealis Engine."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import calendar
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Mapping, Optional
|
||||
|
||||
from Data.Engine.builders.job_fabricator import JobFabricator, JobManifest
|
||||
from Data.Engine.repositories.sqlite.job_repository import (
|
||||
ScheduledJobRecord,
|
||||
ScheduledJobRunRecord,
|
||||
SQLiteJobRepository,
|
||||
)
|
||||
|
||||
__all__ = ["SchedulerService", "SchedulerRuntime"]
|
||||
|
||||
|
||||
def _now_ts() -> int:
|
||||
return int(time.time())
|
||||
|
||||
|
||||
def _floor_minute(ts: int) -> int:
|
||||
ts = int(ts or 0)
|
||||
return ts - (ts % 60)
|
||||
|
||||
|
||||
def _parse_ts(val: Any) -> Optional[int]:
|
||||
if val is None:
|
||||
return None
|
||||
if isinstance(val, (int, float)):
|
||||
return int(val)
|
||||
try:
|
||||
s = str(val).strip().replace("Z", "+00:00")
|
||||
return int(datetime.fromisoformat(s).timestamp())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_expiration(raw: Optional[str]) -> Optional[int]:
|
||||
if not raw or raw == "no_expire":
|
||||
return None
|
||||
try:
|
||||
s = raw.strip().lower()
|
||||
unit = s[-1]
|
||||
value = int(s[:-1])
|
||||
if unit == "m":
|
||||
return value * 60
|
||||
if unit == "h":
|
||||
return value * 3600
|
||||
if unit == "d":
|
||||
return value * 86400
|
||||
return int(s) * 60
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _add_months(dt: datetime, months: int) -> datetime:
|
||||
year = dt.year + (dt.month - 1 + months) // 12
|
||||
month = ((dt.month - 1 + months) % 12) + 1
|
||||
last_day = calendar.monthrange(year, month)[1]
|
||||
day = min(dt.day, last_day)
|
||||
return dt.replace(year=year, month=month, day=day)
|
||||
|
||||
|
||||
def _add_years(dt: datetime, years: int) -> datetime:
|
||||
year = dt.year + years
|
||||
month = dt.month
|
||||
last_day = calendar.monthrange(year, month)[1]
|
||||
day = min(dt.day, last_day)
|
||||
return dt.replace(year=year, month=month, day=day)
|
||||
|
||||
|
||||
def _compute_next_run(
|
||||
schedule_type: str,
|
||||
start_ts: Optional[int],
|
||||
last_run_ts: Optional[int],
|
||||
now_ts: int,
|
||||
) -> Optional[int]:
|
||||
st = (schedule_type or "immediately").strip().lower()
|
||||
start_floor = _floor_minute(start_ts) if start_ts else None
|
||||
last_floor = _floor_minute(last_run_ts) if last_run_ts else None
|
||||
now_floor = _floor_minute(now_ts)
|
||||
|
||||
if st == "immediately":
|
||||
return None if last_floor else now_floor
|
||||
if st == "once":
|
||||
if not start_floor:
|
||||
return None
|
||||
return start_floor if not last_floor else None
|
||||
if not start_floor:
|
||||
return None
|
||||
|
||||
last = last_floor if last_floor is not None else None
|
||||
if st in {
|
||||
"every_5_minutes",
|
||||
"every_10_minutes",
|
||||
"every_15_minutes",
|
||||
"every_30_minutes",
|
||||
"every_hour",
|
||||
}:
|
||||
period_map = {
|
||||
"every_5_minutes": 5 * 60,
|
||||
"every_10_minutes": 10 * 60,
|
||||
"every_15_minutes": 15 * 60,
|
||||
"every_30_minutes": 30 * 60,
|
||||
"every_hour": 60 * 60,
|
||||
}
|
||||
period = period_map.get(st)
|
||||
candidate = (last + period) if last else start_floor
|
||||
while candidate is not None and candidate <= now_floor - 1:
|
||||
candidate += period
|
||||
return candidate
|
||||
if st == "daily":
|
||||
period = 86400
|
||||
candidate = (last + period) if last else start_floor
|
||||
while candidate is not None and candidate <= now_floor - 1:
|
||||
candidate += period
|
||||
return candidate
|
||||
if st == "weekly":
|
||||
period = 7 * 86400
|
||||
candidate = (last + period) if last else start_floor
|
||||
while candidate is not None and candidate <= now_floor - 1:
|
||||
candidate += period
|
||||
return candidate
|
||||
if st == "monthly":
|
||||
base = datetime.utcfromtimestamp(last) if last else datetime.utcfromtimestamp(start_floor)
|
||||
candidate = _add_months(base, 1 if last else 0)
|
||||
while int(candidate.timestamp()) <= now_floor - 1:
|
||||
candidate = _add_months(candidate, 1)
|
||||
return int(candidate.timestamp())
|
||||
if st == "yearly":
|
||||
base = datetime.utcfromtimestamp(last) if last else datetime.utcfromtimestamp(start_floor)
|
||||
candidate = _add_years(base, 1 if last else 0)
|
||||
while int(candidate.timestamp()) <= now_floor - 1:
|
||||
candidate = _add_years(candidate, 1)
|
||||
return int(candidate.timestamp())
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchedulerRuntime:
|
||||
thread: Optional[threading.Thread]
|
||||
stop_event: threading.Event
|
||||
|
||||
|
||||
class SchedulerService:
|
||||
"""Evaluate and dispatch scheduled jobs using Engine repositories."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
job_repository: SQLiteJobRepository,
|
||||
assemblies_root: Path,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
poll_interval: int = 30,
|
||||
) -> None:
|
||||
self._jobs = job_repository
|
||||
self._fabricator = JobFabricator(assemblies_root=assemblies_root, logger=logger)
|
||||
self._log = logger or logging.getLogger("borealis.engine.scheduler")
|
||||
self._poll_interval = max(5, poll_interval)
|
||||
self._socketio: Optional[Any] = None
|
||||
self._runtime = SchedulerRuntime(thread=None, stop_event=threading.Event())
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
def start(self, socketio: Optional[Any] = None) -> None:
|
||||
self._socketio = socketio
|
||||
if self._runtime.thread and self._runtime.thread.is_alive():
|
||||
return
|
||||
self._runtime.stop_event.clear()
|
||||
thread = threading.Thread(target=self._run_loop, name="borealis-engine-scheduler", daemon=True)
|
||||
thread.start()
|
||||
self._runtime.thread = thread
|
||||
self._log.info("scheduler-started")
|
||||
|
||||
def stop(self) -> None:
|
||||
self._runtime.stop_event.set()
|
||||
thread = self._runtime.thread
|
||||
if thread and thread.is_alive():
|
||||
thread.join(timeout=5)
|
||||
self._log.info("scheduler-stopped")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# HTTP orchestration helpers
|
||||
# ------------------------------------------------------------------
|
||||
def list_jobs(self) -> list[dict[str, Any]]:
|
||||
return [self._serialize_job(job) for job in self._jobs.list_jobs()]
|
||||
|
||||
def get_job(self, job_id: int) -> Optional[dict[str, Any]]:
|
||||
record = self._jobs.fetch_job(job_id)
|
||||
return self._serialize_job(record) if record else None
|
||||
|
||||
def create_job(self, payload: Mapping[str, Any]) -> dict[str, Any]:
|
||||
fields = self._normalize_payload(payload)
|
||||
record = self._jobs.create_job(**fields)
|
||||
return self._serialize_job(record)
|
||||
|
||||
def update_job(self, job_id: int, payload: Mapping[str, Any]) -> Optional[dict[str, Any]]:
|
||||
fields = self._normalize_payload(payload)
|
||||
record = self._jobs.update_job(job_id, **fields)
|
||||
return self._serialize_job(record) if record else None
|
||||
|
||||
def toggle_job(self, job_id: int, enabled: bool) -> None:
|
||||
self._jobs.set_enabled(job_id, enabled)
|
||||
|
||||
def delete_job(self, job_id: int) -> None:
|
||||
self._jobs.delete_job(job_id)
|
||||
|
||||
def list_runs(self, job_id: int, *, days: Optional[int] = None) -> list[dict[str, Any]]:
|
||||
runs = self._jobs.list_runs(job_id, days=days)
|
||||
return [self._serialize_run(run) for run in runs]
|
||||
|
||||
def purge_runs(self, job_id: int) -> None:
|
||||
self._jobs.purge_runs(job_id)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Scheduling loop
|
||||
# ------------------------------------------------------------------
|
||||
def tick(self, *, now_ts: Optional[int] = None) -> None:
|
||||
self._evaluate_jobs(now_ts=now_ts or _now_ts())
|
||||
|
||||
def _run_loop(self) -> None:
|
||||
stop_event = self._runtime.stop_event
|
||||
while not stop_event.wait(timeout=self._poll_interval):
|
||||
try:
|
||||
self._evaluate_jobs(now_ts=_now_ts())
|
||||
except Exception as exc: # pragma: no cover - safety net
|
||||
self._log.exception("scheduler-loop-error: %s", exc)
|
||||
|
||||
def _evaluate_jobs(self, *, now_ts: int) -> None:
|
||||
for job in self._jobs.list_enabled_jobs():
|
||||
try:
|
||||
self._evaluate_job(job, now_ts=now_ts)
|
||||
except Exception as exc:
|
||||
self._log.exception("job-evaluation-error job_id=%s error=%s", job.id, exc)
|
||||
|
||||
def _evaluate_job(self, job: ScheduledJobRecord, *, now_ts: int) -> None:
|
||||
last_run = self._jobs.fetch_last_run(job.id)
|
||||
last_ts = None
|
||||
if last_run:
|
||||
last_ts = last_run.scheduled_ts or last_run.started_ts or last_run.created_at
|
||||
next_run = _compute_next_run(job.schedule_type, job.start_ts, last_ts, now_ts)
|
||||
if next_run is None or next_run > now_ts:
|
||||
return
|
||||
|
||||
expiration_window = _parse_expiration(job.expiration)
|
||||
if expiration_window and job.start_ts:
|
||||
if job.start_ts + expiration_window <= now_ts:
|
||||
self._log.info(
|
||||
"job-expired",
|
||||
extra={"job_id": job.id, "start_ts": job.start_ts, "expiration": job.expiration},
|
||||
)
|
||||
return
|
||||
|
||||
manifest = self._fabricator.build(job, occurrence_ts=next_run)
|
||||
targets = manifest.targets or ("<unassigned>",)
|
||||
for target in targets:
|
||||
run_id = self._jobs.create_run(job.id, next_run, target_hostname=None if target == "<unassigned>" else target)
|
||||
self._jobs.mark_run_started(run_id, started_ts=now_ts)
|
||||
self._emit_run_event("job_run_started", job, run_id, target, manifest)
|
||||
self._jobs.mark_run_finished(run_id, status="Success", finished_ts=now_ts)
|
||||
self._emit_run_event("job_run_completed", job, run_id, target, manifest)
|
||||
|
||||
def _emit_run_event(
|
||||
self,
|
||||
event: str,
|
||||
job: ScheduledJobRecord,
|
||||
run_id: int,
|
||||
target: str,
|
||||
manifest: JobManifest,
|
||||
) -> None:
|
||||
payload = {
|
||||
"job_id": job.id,
|
||||
"run_id": run_id,
|
||||
"target": target,
|
||||
"schedule_type": job.schedule_type,
|
||||
"occurrence_ts": manifest.occurrence_ts,
|
||||
}
|
||||
if self._socketio is not None:
|
||||
try:
|
||||
self._socketio.emit(event, payload) # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
self._log.debug("socketio-emit-failed event=%s payload=%s", event, payload)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ------------------------------------------------------------------
|
||||
def _serialize_job(self, job: ScheduledJobRecord) -> dict[str, Any]:
|
||||
return {
|
||||
"id": job.id,
|
||||
"name": job.name,
|
||||
"components": job.components,
|
||||
"targets": job.targets,
|
||||
"schedule": {
|
||||
"type": job.schedule_type,
|
||||
"start": job.start_ts,
|
||||
},
|
||||
"schedule_type": job.schedule_type,
|
||||
"start_ts": job.start_ts,
|
||||
"duration_stop_enabled": job.duration_stop_enabled,
|
||||
"expiration": job.expiration or "no_expire",
|
||||
"execution_context": job.execution_context,
|
||||
"credential_id": job.credential_id,
|
||||
"use_service_account": job.use_service_account,
|
||||
"enabled": job.enabled,
|
||||
"created_at": job.created_at,
|
||||
"updated_at": job.updated_at,
|
||||
}
|
||||
|
||||
def _serialize_run(self, run: ScheduledJobRunRecord) -> dict[str, Any]:
|
||||
return {
|
||||
"id": run.id,
|
||||
"job_id": run.job_id,
|
||||
"scheduled_ts": run.scheduled_ts,
|
||||
"started_ts": run.started_ts,
|
||||
"finished_ts": run.finished_ts,
|
||||
"status": run.status,
|
||||
"error": run.error,
|
||||
"target_hostname": run.target_hostname,
|
||||
"created_at": run.created_at,
|
||||
"updated_at": run.updated_at,
|
||||
}
|
||||
|
||||
def _normalize_payload(self, payload: Mapping[str, Any]) -> dict[str, Any]:
|
||||
name = str(payload.get("name") or "").strip()
|
||||
components = payload.get("components") or []
|
||||
targets = payload.get("targets") or []
|
||||
schedule_block = payload.get("schedule") if isinstance(payload.get("schedule"), Mapping) else {}
|
||||
schedule_type = str(schedule_block.get("type") or payload.get("schedule_type") or "immediately").strip().lower()
|
||||
start_value = schedule_block.get("start") if isinstance(schedule_block, Mapping) else None
|
||||
if start_value is None:
|
||||
start_value = payload.get("start")
|
||||
start_ts = _parse_ts(start_value)
|
||||
duration_block = payload.get("duration") if isinstance(payload.get("duration"), Mapping) else {}
|
||||
duration_stop = bool(duration_block.get("stopAfterEnabled") or payload.get("duration_stop_enabled"))
|
||||
expiration = str(duration_block.get("expiration") or payload.get("expiration") or "no_expire").strip()
|
||||
execution_context = str(payload.get("execution_context") or "system").strip().lower()
|
||||
credential_id = payload.get("credential_id")
|
||||
try:
|
||||
credential_id = int(credential_id) if credential_id is not None else None
|
||||
except Exception:
|
||||
credential_id = None
|
||||
use_service_account_raw = payload.get("use_service_account")
|
||||
use_service_account = bool(use_service_account_raw) if execution_context == "winrm" else False
|
||||
enabled = bool(payload.get("enabled", True))
|
||||
|
||||
if not name:
|
||||
raise ValueError("job name is required")
|
||||
if not isinstance(components, Iterable) or not list(components):
|
||||
raise ValueError("at least one component is required")
|
||||
if not isinstance(targets, Iterable) or not list(targets):
|
||||
raise ValueError("at least one target is required")
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"components": list(components),
|
||||
"targets": list(targets),
|
||||
"schedule_type": schedule_type,
|
||||
"start_ts": start_ts,
|
||||
"duration_stop_enabled": duration_stop,
|
||||
"expiration": expiration,
|
||||
"execution_context": execution_context,
|
||||
"credential_id": credential_id,
|
||||
"use_service_account": use_service_account,
|
||||
"enabled": enabled,
|
||||
}
|
||||
Reference in New Issue
Block a user