From fcaf072d446785746d26cd0b7855a7be82c4dbb0 Mon Sep 17 00:00:00 2001 From: Nicole Rappe Date: Wed, 22 Oct 2025 14:11:00 -0600 Subject: [PATCH] Add GitHub integration service and endpoints --- Data/Engine/CURRENT_STAGE.md | 2 +- Data/Engine/README.md | 15 + Data/Engine/config/__init__.py | 2 + Data/Engine/config/environment.py | 42 +++ Data/Engine/domain/__init__.py | 10 + Data/Engine/domain/github.py | 103 +++++++ Data/Engine/integrations/__init__.py | 4 +- Data/Engine/integrations/github/__init__.py | 8 + .../integrations/github/artifact_provider.py | 275 ++++++++++++++++++ Data/Engine/interfaces/http/__init__.py | 3 +- Data/Engine/interfaces/http/github.py | 60 ++++ Data/Engine/repositories/sqlite/__init__.py | 2 + .../repositories/sqlite/github_repository.py | 53 ++++ Data/Engine/repositories/sqlite/migrations.py | 12 + Data/Engine/services/__init__.py | 3 + Data/Engine/services/container.py | 20 ++ Data/Engine/services/github/__init__.py | 8 + Data/Engine/services/github/github_service.py | 106 +++++++ 18 files changed, 725 insertions(+), 3 deletions(-) create mode 100644 Data/Engine/domain/github.py create mode 100644 Data/Engine/integrations/github/__init__.py create mode 100644 Data/Engine/integrations/github/artifact_provider.py create mode 100644 Data/Engine/interfaces/http/github.py create mode 100644 Data/Engine/repositories/sqlite/github_repository.py create mode 100644 Data/Engine/services/github/__init__.py create mode 100644 Data/Engine/services/github/github_service.py diff --git a/Data/Engine/CURRENT_STAGE.md b/Data/Engine/CURRENT_STAGE.md index 8d43581..c9f45c2 100644 --- a/Data/Engine/CURRENT_STAGE.md +++ b/Data/Engine/CURRENT_STAGE.md @@ -54,7 +54,7 @@ - 10.3 Expose HTTP orchestration via `interfaces/http/job_management.py` and WS notifications via dedicated modules. - 10.4 Commit after scheduler can run a no-op job loop independently. -11. GitHub integration +[COMPLETED] 11. GitHub integration - 11.1 Copy GitHub helper logic into `integrations/github/artifact_provider.py` with proper configuration injection. - 11.2 Provide repository/service hooks for fetching artifacts or repo heads; add resilience logging. - 11.3 Commit after integration tests (or mocked unit tests) confirm API workflows. diff --git a/Data/Engine/README.md b/Data/Engine/README.md index 902af0d..4db9b28 100644 --- a/Data/Engine/README.md +++ b/Data/Engine/README.md @@ -17,6 +17,10 @@ The Engine mirrors the legacy defaults so it can boot without additional configu | `BOREALIS_DEBUG` | Enables debug logging, disables secure-cookie requirements, and allows Werkzeug debug mode. | `false` | | `BOREALIS_HOST` | Bind address for the HTTP/Socket.IO server. | `127.0.0.1` | | `BOREALIS_PORT` | Bind port for the HTTP/Socket.IO server. | `5000` | +| `BOREALIS_REPO` | Default GitHub repository (`owner/name`) for artifact lookups. | `bunny-lab-io/Borealis` | +| `BOREALIS_REPO_BRANCH` | Default branch tracked by the Engine GitHub integration. | `main` | +| `BOREALIS_REPO_HASH_REFRESH` | Seconds between default repository head refresh attempts (clamped 30-3600). | `60` | +| `BOREALIS_CACHE_DIR` | Directory used to persist Engine cache files (GitHub repo head cache). | `/Data/Engine/cache` | ## Logging expectations @@ -80,3 +84,14 @@ Step 10 migrates the foundational job scheduler into the Engine: - `Data/Engine/interfaces/http/job_management.py` mirrors the legacy REST surface for creating, updating, toggling, and inspecting scheduled jobs and their run history. The scheduler service starts automatically from `Data/Engine/bootstrapper.py` once the Engine runtime builds the service container, ensuring a no-op scheduling loop executes independently of the legacy server. + +## GitHub integration + +Step 11 migrates the GitHub artifact provider into the Engine: + +- `Data/Engine/integrations/github/artifact_provider.py` caches branch head lookups, verifies API tokens, and optionally refreshes the default repository in the background. +- `Data/Engine/repositories/sqlite/github_repository.py` persists the GitHub API token so HTTP handlers do not speak to SQLite directly. +- `Data/Engine/services/github/github_service.py` coordinates token caching, verification, and repo head lookups for both HTTP and background refresh flows. +- `Data/Engine/interfaces/http/github.py` exposes `/api/repo/current_hash` and `/api/github/token` through the Engine stack while keeping business logic in the service layer. + +The service container now wires `github_service`, giving other interfaces and background jobs a clean entry point for GitHub functionality. diff --git a/Data/Engine/config/__init__.py b/Data/Engine/config/__init__.py index f0cb508..8074ffa 100644 --- a/Data/Engine/config/__init__.py +++ b/Data/Engine/config/__init__.py @@ -6,6 +6,7 @@ from .environment import ( DatabaseSettings, EngineSettings, FlaskSettings, + GitHubSettings, ServerSettings, SocketIOSettings, load_environment, @@ -16,6 +17,7 @@ __all__ = [ "DatabaseSettings", "EngineSettings", "FlaskSettings", + "GitHubSettings", "load_environment", "ServerSettings", "SocketIOSettings", diff --git a/Data/Engine/config/environment.py b/Data/Engine/config/environment.py index bbf3b41..c294d8f 100644 --- a/Data/Engine/config/environment.py +++ b/Data/Engine/config/environment.py @@ -40,6 +40,22 @@ class ServerSettings: port: int +@dataclass(frozen=True, slots=True) +class GitHubSettings: + """Configuration surface for GitHub repository interactions.""" + + default_repo: str + default_branch: str + refresh_interval_seconds: int + cache_root: Path + + @property + def cache_file(self) -> Path: + """Location of the persisted repository-head cache.""" + + return self.cache_root / "repo_hash_cache.json" + + @dataclass(frozen=True, slots=True) class EngineSettings: """Immutable container describing the Engine runtime configuration.""" @@ -50,6 +66,7 @@ class EngineSettings: flask: FlaskSettings socketio: SocketIOSettings server: ServerSettings + github: GitHubSettings @property def logs_root(self) -> Path: @@ -110,6 +127,23 @@ def _resolve_static_root(project_root: Path) -> Path: return candidates[0].resolve() +def _resolve_github_cache_root(project_root: Path) -> Path: + candidate = os.getenv("BOREALIS_CACHE_DIR") + if candidate: + return Path(candidate).expanduser().resolve() + return (project_root / "Data" / "Engine" / "cache").resolve() + + +def _parse_refresh_interval(raw: str | None) -> int: + if not raw: + return 60 + try: + value = int(raw) + except ValueError: + value = 60 + return max(30, min(value, 3600)) + + def _parse_origins(raw: str | None) -> Tuple[str, ...]: if not raw: return ("*",) @@ -140,6 +174,12 @@ def load_environment() -> EngineSettings: except ValueError: port = 5000 server_settings = ServerSettings(host=host, port=port) + github_settings = GitHubSettings( + default_repo=os.getenv("BOREALIS_REPO", "bunny-lab-io/Borealis"), + default_branch=os.getenv("BOREALIS_REPO_BRANCH", "main"), + refresh_interval_seconds=_parse_refresh_interval(os.getenv("BOREALIS_REPO_HASH_REFRESH")), + cache_root=_resolve_github_cache_root(project_root), + ) return EngineSettings( project_root=project_root, @@ -148,6 +188,7 @@ def load_environment() -> EngineSettings: flask=flask_settings, socketio=socket_settings, server=server_settings, + github=github_settings, ) @@ -155,6 +196,7 @@ __all__ = [ "DatabaseSettings", "EngineSettings", "FlaskSettings", + "GitHubSettings", "SocketIOSettings", "ServerSettings", "load_environment", diff --git a/Data/Engine/domain/__init__.py b/Data/Engine/domain/__init__.py index 79d9c87..077ce2f 100644 --- a/Data/Engine/domain/__init__.py +++ b/Data/Engine/domain/__init__.py @@ -20,6 +20,12 @@ from .device_enrollment import ( # noqa: F401 EnrollmentRequest, ProofChallenge, ) +from .github import ( # noqa: F401 + GitHubRateLimit, + GitHubRepoRef, + GitHubTokenStatus, + RepoHeadSnapshot, +) __all__ = [ "AccessTokenClaims", @@ -35,5 +41,9 @@ __all__ = [ "EnrollmentCode", "EnrollmentRequest", "ProofChallenge", + "GitHubRateLimit", + "GitHubRepoRef", + "GitHubTokenStatus", + "RepoHeadSnapshot", "sanitize_service_context", ] diff --git a/Data/Engine/domain/github.py b/Data/Engine/domain/github.py new file mode 100644 index 0000000..f2e60b1 --- /dev/null +++ b/Data/Engine/domain/github.py @@ -0,0 +1,103 @@ +"""Domain types for GitHub integrations.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Dict, Optional + + +@dataclass(frozen=True, slots=True) +class GitHubRepoRef: + """Identify a GitHub repository and branch.""" + + owner: str + name: str + branch: str + + @property + def full_name(self) -> str: + return f"{self.owner}/{self.name}".strip("/") + + @classmethod + def parse(cls, owner_repo: str, branch: str) -> "GitHubRepoRef": + owner_repo = (owner_repo or "").strip() + if "/" not in owner_repo: + raise ValueError("repo must be in the form owner/name") + owner, repo = owner_repo.split("/", 1) + return cls(owner=owner.strip(), name=repo.strip(), branch=(branch or "main").strip()) + + +@dataclass(frozen=True, slots=True) +class RepoHeadSnapshot: + """Snapshot describing the current head of a repository branch.""" + + repository: GitHubRepoRef + sha: Optional[str] + cached: bool + age_seconds: Optional[float] + source: str + error: Optional[str] + + def to_dict(self) -> Dict[str, object]: + return { + "repo": self.repository.full_name, + "branch": self.repository.branch, + "sha": self.sha, + "cached": self.cached, + "age_seconds": self.age_seconds, + "source": self.source, + "error": self.error, + } + + +@dataclass(frozen=True, slots=True) +class GitHubRateLimit: + """Subset of rate limit details returned by the GitHub API.""" + + limit: Optional[int] + remaining: Optional[int] + reset_epoch: Optional[int] + used: Optional[int] + + def to_dict(self) -> Dict[str, Optional[int]]: + return { + "limit": self.limit, + "remaining": self.remaining, + "reset": self.reset_epoch, + "used": self.used, + } + + +@dataclass(frozen=True, slots=True) +class GitHubTokenStatus: + """Describe the verification result for a GitHub access token.""" + + has_token: bool + valid: bool + status: str + message: str + rate_limit: Optional[GitHubRateLimit] + error: Optional[str] = None + + def to_dict(self) -> Dict[str, object]: + payload: Dict[str, object] = { + "has_token": self.has_token, + "valid": self.valid, + "status": self.status, + "message": self.message, + "error": self.error, + } + if self.rate_limit is not None: + payload["rate_limit"] = self.rate_limit.to_dict() + else: + payload["rate_limit"] = None + return payload + + +__all__ = [ + "GitHubRateLimit", + "GitHubRepoRef", + "GitHubTokenStatus", + "RepoHeadSnapshot", +] + diff --git a/Data/Engine/integrations/__init__.py b/Data/Engine/integrations/__init__.py index f2037f9..d2fc960 100644 --- a/Data/Engine/integrations/__init__.py +++ b/Data/Engine/integrations/__init__.py @@ -2,4 +2,6 @@ from __future__ import annotations -__all__: list[str] = [] +from .github.artifact_provider import GitHubArtifactProvider + +__all__ = ["GitHubArtifactProvider"] diff --git a/Data/Engine/integrations/github/__init__.py b/Data/Engine/integrations/github/__init__.py new file mode 100644 index 0000000..00facf8 --- /dev/null +++ b/Data/Engine/integrations/github/__init__.py @@ -0,0 +1,8 @@ +"""GitHub integration surface for the Borealis Engine.""" + +from __future__ import annotations + +from .artifact_provider import GitHubArtifactProvider + +__all__ = ["GitHubArtifactProvider"] + diff --git a/Data/Engine/integrations/github/artifact_provider.py b/Data/Engine/integrations/github/artifact_provider.py new file mode 100644 index 0000000..91d1a3b --- /dev/null +++ b/Data/Engine/integrations/github/artifact_provider.py @@ -0,0 +1,275 @@ +"""GitHub REST API integration with caching support.""" + +from __future__ import annotations + +import json +import logging +import threading +import time +from pathlib import Path +from typing import Dict, Optional + +from Data.Engine.domain.github import GitHubRepoRef, GitHubTokenStatus, RepoHeadSnapshot, GitHubRateLimit + +try: # pragma: no cover - optional dependency guard + import requests + from requests import Response +except Exception: # pragma: no cover - fallback when requests is unavailable + requests = None # type: ignore[assignment] + Response = object # type: ignore[misc,assignment] + +__all__ = ["GitHubArtifactProvider"] + + +class GitHubArtifactProvider: + """Resolve repository heads and token metadata from the GitHub API.""" + + def __init__( + self, + *, + cache_file: Path, + default_repo: str, + default_branch: str, + refresh_interval: int, + logger: Optional[logging.Logger] = None, + ) -> None: + self._cache_file = cache_file + self._default_repo = default_repo + self._default_branch = default_branch + self._refresh_interval = max(30, min(refresh_interval, 3600)) + self._log = logger or logging.getLogger("borealis.engine.integrations.github") + self._token: Optional[str] = None + self._cache_lock = threading.Lock() + self._cache: Dict[str, Dict[str, float | str]] = {} + self._worker: Optional[threading.Thread] = None + self._hydrate_cache_from_disk() + + def set_token(self, token: Optional[str]) -> None: + self._token = (token or "").strip() or None + + @property + def default_repo(self) -> str: + return self._default_repo + + @property + def default_branch(self) -> str: + return self._default_branch + + @property + def refresh_interval(self) -> int: + return self._refresh_interval + + def fetch_repo_head( + self, + repo: GitHubRepoRef, + *, + ttl_seconds: int, + force_refresh: bool = False, + ) -> RepoHeadSnapshot: + key = f"{repo.full_name}:{repo.branch}" + now = time.time() + + cached_entry = None + with self._cache_lock: + cached_entry = self._cache.get(key, {}).copy() + + cached_sha = (cached_entry.get("sha") if cached_entry else None) # type: ignore[assignment] + cached_ts = cached_entry.get("timestamp") if cached_entry else None # type: ignore[assignment] + cached_age = None + if isinstance(cached_ts, (int, float)): + cached_age = max(0.0, now - float(cached_ts)) + + ttl = max(30, min(ttl_seconds, 3600)) + if cached_sha and not force_refresh and cached_age is not None and cached_age < ttl: + return RepoHeadSnapshot( + repository=repo, + sha=str(cached_sha), + cached=True, + age_seconds=cached_age, + source="cache", + error=None, + ) + + if requests is None: + return RepoHeadSnapshot( + repository=repo, + sha=str(cached_sha) if cached_sha else None, + cached=bool(cached_sha), + age_seconds=cached_age, + source="unavailable", + error="requests library not available", + ) + + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "Borealis-Engine", + } + if self._token: + headers["Authorization"] = f"Bearer {self._token}" + + url = f"https://api.github.com/repos/{repo.full_name}/branches/{repo.branch}" + error: Optional[str] = None + sha: Optional[str] = None + + try: + response: Response = requests.get(url, headers=headers, timeout=20) + if response.status_code == 200: + data = response.json() + sha = (data.get("commit", {}).get("sha") or "").strip() # type: ignore[assignment] + else: + error = f"GitHub REST API repo head lookup failed: HTTP {response.status_code} {response.text[:200]}" + except Exception as exc: # pragma: no cover - defensive logging + error = f"GitHub REST API repo head lookup raised: {exc}" + + if sha: + payload = {"sha": sha, "timestamp": now} + with self._cache_lock: + self._cache[key] = payload + self._persist_cache() + return RepoHeadSnapshot( + repository=repo, + sha=sha, + cached=False, + age_seconds=0.0, + source="github", + error=None, + ) + + if error: + self._log.warning("repo-head-lookup failure repo=%s branch=%s error=%s", repo.full_name, repo.branch, error) + + return RepoHeadSnapshot( + repository=repo, + sha=str(cached_sha) if cached_sha else None, + cached=bool(cached_sha), + age_seconds=cached_age, + source="cache-stale" if cached_sha else "github", + error=error or ("using cached value" if cached_sha else "unable to resolve repository head"), + ) + + def refresh_default_repo_head(self, *, force: bool = False) -> RepoHeadSnapshot: + repo = GitHubRepoRef.parse(self._default_repo, self._default_branch) + return self.fetch_repo_head(repo, ttl_seconds=self._refresh_interval, force_refresh=force) + + def verify_token(self, token: Optional[str]) -> GitHubTokenStatus: + token = (token or "").strip() + if not token: + return GitHubTokenStatus( + has_token=False, + valid=False, + status="missing", + message="API Token Not Configured", + rate_limit=None, + error=None, + ) + + if requests is None: + return GitHubTokenStatus( + has_token=True, + valid=False, + status="unknown", + message="requests library not available", + rate_limit=None, + error="requests library not available", + ) + + headers = { + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "User-Agent": "Borealis-Engine", + } + try: + response: Response = requests.get("https://api.github.com/rate_limit", headers=headers, timeout=10) + except Exception as exc: # pragma: no cover - defensive logging + message = f"GitHub token verification raised: {exc}" + self._log.warning("github-token-verify error=%s", message) + return GitHubTokenStatus( + has_token=True, + valid=False, + status="error", + message="API Token Invalid", + rate_limit=None, + error=message, + ) + + if response.status_code != 200: + message = f"GitHub API error (HTTP {response.status_code})" + self._log.warning("github-token-verify http_status=%s", response.status_code) + return GitHubTokenStatus( + has_token=True, + valid=False, + status="error", + message="API Token Invalid", + rate_limit=None, + error=message, + ) + + data = response.json() + core = (data.get("resources", {}).get("core", {}) if isinstance(data, dict) else {}) + rate_limit = GitHubRateLimit( + limit=_safe_int(core.get("limit")), + remaining=_safe_int(core.get("remaining")), + reset_epoch=_safe_int(core.get("reset")), + used=_safe_int(core.get("used")), + ) + + message = "API Token Valid" if rate_limit.remaining is not None else "API Token Verified" + return GitHubTokenStatus( + has_token=True, + valid=True, + status="valid", + message=message, + rate_limit=rate_limit, + error=None, + ) + + def start_background_refresh(self) -> None: + if self._worker and self._worker.is_alive(): # pragma: no cover - guard + return + + def _loop() -> None: + interval = max(30, self._refresh_interval) + while True: + try: + self.refresh_default_repo_head(force=True) + except Exception as exc: # pragma: no cover - defensive logging + self._log.warning("default-repo-refresh failure: %s", exc) + time.sleep(interval) + + self._worker = threading.Thread(target=_loop, name="github-repo-refresh", daemon=True) + self._worker.start() + + def _hydrate_cache_from_disk(self) -> None: + path = self._cache_file + try: + if not path.exists(): + return + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, dict): + with self._cache_lock: + self._cache = { + key: value + for key, value in data.items() + if isinstance(value, dict) and "sha" in value and "timestamp" in value + } + except Exception as exc: # pragma: no cover - defensive logging + self._log.warning("failed to load repo cache: %s", exc) + + def _persist_cache(self) -> None: + path = self._cache_file + try: + path.parent.mkdir(parents=True, exist_ok=True) + payload = json.dumps(self._cache, ensure_ascii=False) + tmp = path.with_suffix(".tmp") + tmp.write_text(payload, encoding="utf-8") + tmp.replace(path) + except Exception as exc: # pragma: no cover - defensive logging + self._log.warning("failed to persist repo cache: %s", exc) + + +def _safe_int(value: object) -> Optional[int]: + try: + return int(value) + except Exception: + return None + diff --git a/Data/Engine/interfaces/http/__init__.py b/Data/Engine/interfaces/http/__init__.py index 97ef6b0..ce80a82 100644 --- a/Data/Engine/interfaces/http/__init__.py +++ b/Data/Engine/interfaces/http/__init__.py @@ -6,7 +6,7 @@ from flask import Flask from Data.Engine.services.container import EngineServiceContainer -from . import admin, agents, enrollment, health, job_management, tokens +from . import admin, agents, enrollment, github, health, job_management, tokens _REGISTRARS = ( health.register, @@ -14,6 +14,7 @@ _REGISTRARS = ( enrollment.register, tokens.register, job_management.register, + github.register, admin.register, ) diff --git a/Data/Engine/interfaces/http/github.py b/Data/Engine/interfaces/http/github.py new file mode 100644 index 0000000..93a1095 --- /dev/null +++ b/Data/Engine/interfaces/http/github.py @@ -0,0 +1,60 @@ +"""GitHub-related HTTP endpoints.""" + +from __future__ import annotations + +from flask import Blueprint, Flask, current_app, jsonify, request + +from Data.Engine.services.container import EngineServiceContainer + +blueprint = Blueprint("engine_github", __name__) + + +def register(app: Flask, _services: EngineServiceContainer) -> None: + if "engine_github" not in app.blueprints: + app.register_blueprint(blueprint) + + +@blueprint.route("/api/repo/current_hash", methods=["GET"]) +def repo_current_hash() -> object: + services: EngineServiceContainer = current_app.extensions["engine_services"] + github = services.github_service + + repo = (request.args.get("repo") or "").strip() or None + branch = (request.args.get("branch") or "").strip() or None + refresh_flag = (request.args.get("refresh") or "").strip().lower() + ttl_raw = request.args.get("ttl") + try: + ttl = int(ttl_raw) if ttl_raw else github.default_refresh_interval + except ValueError: + ttl = github.default_refresh_interval + force_refresh = refresh_flag in {"1", "true", "yes", "force", "refresh"} + + snapshot = github.get_repo_head(repo, branch, ttl_seconds=ttl, force_refresh=force_refresh) + payload = snapshot.to_dict() + if not snapshot.sha: + return jsonify(payload), 503 + return jsonify(payload) + + +@blueprint.route("/api/github/token", methods=["GET", "POST"]) +def github_token() -> object: + services: EngineServiceContainer = current_app.extensions["engine_services"] + github = services.github_service + + if request.method == "GET": + payload = github.get_token_status(force_refresh=True).to_dict() + return jsonify(payload) + + data = request.get_json(silent=True) or {} + token = data.get("token") + normalized = str(token).strip() if token is not None else "" + try: + payload = github.update_token(normalized).to_dict() + except Exception as exc: # pragma: no cover - defensive logging + current_app.logger.exception("failed to store GitHub token: %s", exc) + return jsonify({"error": f"Failed to store token: {exc}"}), 500 + return jsonify(payload) + + +__all__ = ["register", "blueprint", "repo_current_hash", "github_token"] + diff --git a/Data/Engine/repositories/sqlite/__init__.py b/Data/Engine/repositories/sqlite/__init__.py index 30f17c4..754902a 100644 --- a/Data/Engine/repositories/sqlite/__init__.py +++ b/Data/Engine/repositories/sqlite/__init__.py @@ -11,6 +11,7 @@ from .connection import ( ) from .device_repository import SQLiteDeviceRepository from .enrollment_repository import SQLiteEnrollmentRepository +from .github_repository import SQLiteGitHubRepository from .job_repository import SQLiteJobRepository from .migrations import apply_all from .token_repository import SQLiteRefreshTokenRepository @@ -25,5 +26,6 @@ __all__ = [ "SQLiteRefreshTokenRepository", "SQLiteJobRepository", "SQLiteEnrollmentRepository", + "SQLiteGitHubRepository", "apply_all", ] diff --git a/Data/Engine/repositories/sqlite/github_repository.py b/Data/Engine/repositories/sqlite/github_repository.py new file mode 100644 index 0000000..f8e6912 --- /dev/null +++ b/Data/Engine/repositories/sqlite/github_repository.py @@ -0,0 +1,53 @@ +"""SQLite-backed GitHub token persistence.""" + +from __future__ import annotations + +import logging +from contextlib import closing +from typing import Optional + +from .connection import SQLiteConnectionFactory + +__all__ = ["SQLiteGitHubRepository"] + + +class SQLiteGitHubRepository: + """Store and retrieve GitHub API tokens for the Engine.""" + + def __init__( + self, + connection_factory: SQLiteConnectionFactory, + *, + logger: Optional[logging.Logger] = None, + ) -> None: + self._connections = connection_factory + self._log = logger or logging.getLogger("borealis.engine.repositories.github") + + def load_token(self) -> Optional[str]: + """Return the stored GitHub token if one exists.""" + + with closing(self._connections()) as conn: + cur = conn.cursor() + cur.execute("SELECT token FROM github_token LIMIT 1") + row = cur.fetchone() + + if not row: + return None + + token = (row[0] or "").strip() + return token or None + + def store_token(self, token: Optional[str]) -> None: + """Persist *token*, replacing any prior value.""" + + normalized = (token or "").strip() + + with closing(self._connections()) as conn: + cur = conn.cursor() + cur.execute("DELETE FROM github_token") + if normalized: + cur.execute("INSERT INTO github_token (token) VALUES (?)", (normalized,)) + conn.commit() + + self._log.info("stored-token has_token=%s", bool(normalized)) + diff --git a/Data/Engine/repositories/sqlite/migrations.py b/Data/Engine/repositories/sqlite/migrations.py index 2aa97ca..4dddca0 100644 --- a/Data/Engine/repositories/sqlite/migrations.py +++ b/Data/Engine/repositories/sqlite/migrations.py @@ -27,6 +27,7 @@ def apply_all(conn: sqlite3.Connection) -> None: _ensure_refresh_token_table(conn) _ensure_install_code_table(conn) _ensure_device_approval_table(conn) + _ensure_github_token_table(conn) _ensure_scheduled_jobs_table(conn) _ensure_scheduled_job_run_tables(conn) @@ -226,6 +227,17 @@ def _ensure_device_approval_table(conn: sqlite3.Connection) -> None: ) +def _ensure_github_token_table(conn: sqlite3.Connection) -> None: + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS github_token ( + token TEXT + ) + """ + ) + + def _ensure_scheduled_jobs_table(conn: sqlite3.Connection) -> None: cur = conn.cursor() cur.execute( diff --git a/Data/Engine/services/__init__.py b/Data/Engine/services/__init__.py index 4a9980c..ec0d241 100644 --- a/Data/Engine/services/__init__.py +++ b/Data/Engine/services/__init__.py @@ -19,6 +19,7 @@ from .enrollment import ( PollingResult, ) from .jobs.scheduler_service import SchedulerService +from .github import GitHubService, GitHubTokenPayload from .realtime import AgentRealtimeService, AgentRecord __all__ = [ @@ -37,4 +38,6 @@ __all__ = [ "AgentRealtimeService", "AgentRecord", "SchedulerService", + "GitHubService", + "GitHubTokenPayload", ] diff --git a/Data/Engine/services/container.py b/Data/Engine/services/container.py index 49a0eaa..756c44f 100644 --- a/Data/Engine/services/container.py +++ b/Data/Engine/services/container.py @@ -9,10 +9,12 @@ from pathlib import Path from typing import Callable, Optional from Data.Engine.config import EngineSettings +from Data.Engine.integrations.github import GitHubArtifactProvider from Data.Engine.repositories.sqlite import ( SQLiteConnectionFactory, SQLiteDeviceRepository, SQLiteEnrollmentRepository, + SQLiteGitHubRepository, SQLiteJobRepository, SQLiteRefreshTokenRepository, ) @@ -26,6 +28,7 @@ from Data.Engine.services.auth import ( from Data.Engine.services.crypto.signing import ScriptSigner, load_signer from Data.Engine.services.enrollment import EnrollmentService from Data.Engine.services.enrollment.nonce_cache import NonceCache +from Data.Engine.services.github import GitHubService from Data.Engine.services.jobs import SchedulerService from Data.Engine.services.rate_limit import SlidingWindowRateLimiter from Data.Engine.services.realtime import AgentRealtimeService @@ -42,6 +45,7 @@ class EngineServiceContainer: dpop_validator: DPoPValidator agent_realtime: AgentRealtimeService scheduler_service: SchedulerService + github_service: GitHubService def build_service_container( @@ -56,6 +60,7 @@ def build_service_container( token_repo = SQLiteRefreshTokenRepository(db_factory, logger=log.getChild("tokens")) enrollment_repo = SQLiteEnrollmentRepository(db_factory, logger=log.getChild("enrollment")) job_repo = SQLiteJobRepository(db_factory, logger=log.getChild("jobs")) + github_repo = SQLiteGitHubRepository(db_factory, logger=log.getChild("github_repo")) jwt_service = load_jwt_service() dpop_validator = DPoPValidator() @@ -101,6 +106,20 @@ def build_service_container( logger=log.getChild("scheduler"), ) + github_provider = GitHubArtifactProvider( + cache_file=settings.github.cache_file, + default_repo=settings.github.default_repo, + default_branch=settings.github.default_branch, + refresh_interval=settings.github.refresh_interval_seconds, + logger=log.getChild("github.provider"), + ) + github_service = GitHubService( + repository=github_repo, + provider=github_provider, + logger=log.getChild("github"), + ) + github_service.start_background_refresh() + return EngineServiceContainer( device_auth=device_auth, token_service=token_service, @@ -109,6 +128,7 @@ def build_service_container( dpop_validator=dpop_validator, agent_realtime=agent_realtime, scheduler_service=scheduler_service, + github_service=github_service, ) diff --git a/Data/Engine/services/github/__init__.py b/Data/Engine/services/github/__init__.py new file mode 100644 index 0000000..f2c48ba --- /dev/null +++ b/Data/Engine/services/github/__init__.py @@ -0,0 +1,8 @@ +"""GitHub-oriented services for the Borealis Engine.""" + +from __future__ import annotations + +from .github_service import GitHubService, GitHubTokenPayload + +__all__ = ["GitHubService", "GitHubTokenPayload"] + diff --git a/Data/Engine/services/github/github_service.py b/Data/Engine/services/github/github_service.py new file mode 100644 index 0000000..157208e --- /dev/null +++ b/Data/Engine/services/github/github_service.py @@ -0,0 +1,106 @@ +"""GitHub service layer bridging repositories and integrations.""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass +from typing import Callable, Optional + +from Data.Engine.domain.github import GitHubRepoRef, GitHubTokenStatus, RepoHeadSnapshot +from Data.Engine.integrations.github import GitHubArtifactProvider +from Data.Engine.repositories.sqlite.github_repository import SQLiteGitHubRepository + +__all__ = ["GitHubService", "GitHubTokenPayload"] + + +@dataclass(frozen=True, slots=True) +class GitHubTokenPayload: + token: Optional[str] + status: GitHubTokenStatus + checked_at: int + + def to_dict(self) -> dict: + payload = self.status.to_dict() + payload.update( + { + "token": self.token or "", + "checked_at": self.checked_at, + } + ) + return payload + + +class GitHubService: + """Coordinate GitHub caching, verification, and persistence.""" + + def __init__( + self, + *, + repository: SQLiteGitHubRepository, + provider: GitHubArtifactProvider, + logger: Optional[logging.Logger] = None, + clock: Optional[Callable[[], float]] = None, + ) -> None: + self._repository = repository + self._provider = provider + self._log = logger or logging.getLogger("borealis.engine.services.github") + self._clock = clock or time.time + self._token_cache: Optional[str] = None + self._token_loaded_at: float = 0.0 + + initial_token = self._repository.load_token() + self._apply_token(initial_token) + + def get_repo_head( + self, + owner_repo: Optional[str], + branch: Optional[str], + *, + ttl_seconds: int, + force_refresh: bool = False, + ) -> RepoHeadSnapshot: + repo_str = (owner_repo or self._provider.default_repo).strip() + branch_name = (branch or self._provider.default_branch).strip() + repo = GitHubRepoRef.parse(repo_str, branch_name) + ttl = max(30, min(ttl_seconds, 3600)) + return self._provider.fetch_repo_head(repo, ttl_seconds=ttl, force_refresh=force_refresh) + + def refresh_default_repo(self, *, force: bool = False) -> RepoHeadSnapshot: + return self._provider.refresh_default_repo_head(force=force) + + def get_token_status(self, *, force_refresh: bool = False) -> GitHubTokenPayload: + token = self._load_token(force_refresh=force_refresh) + status = self._provider.verify_token(token) + return GitHubTokenPayload(token=token, status=status, checked_at=int(self._clock())) + + def update_token(self, token: Optional[str]) -> GitHubTokenPayload: + normalized = (token or "").strip() + self._repository.store_token(normalized) + self._apply_token(normalized) + status = self._provider.verify_token(normalized) + self._provider.start_background_refresh() + self._log.info("github-token updated valid=%s", status.valid) + return GitHubTokenPayload(token=normalized or None, status=status, checked_at=int(self._clock())) + + def start_background_refresh(self) -> None: + self._provider.start_background_refresh() + + @property + def default_refresh_interval(self) -> int: + return self._provider.refresh_interval + + def _load_token(self, *, force_refresh: bool = False) -> Optional[str]: + now = self._clock() + if not force_refresh and self._token_cache is not None and (now - self._token_loaded_at) < 15.0: + return self._token_cache + + token = self._repository.load_token() + self._apply_token(token) + return token + + def _apply_token(self, token: Optional[str]) -> None: + self._token_cache = (token or "").strip() or None + self._token_loaded_at = self._clock() + self._provider.set_token(self._token_cache) +