# ====================================================== # Data\Engine\integrations\github.py # Description: GitHub REST integration providing cached repository head lookups for Engine services. # # API Endpoints (if applicable): None # ====================================================== """GitHub integration helpers for the Borealis Engine runtime.""" from __future__ import annotations import base64 import json import logging import os import sqlite3 import subprocess import sys import threading import time from pathlib import Path from typing import Any, Callable, Dict, Optional, Tuple from flask import has_request_context, request try: # pragma: no cover - import guard mirrors legacy runtime behaviour import requests # type: ignore except ImportError: # pragma: no cover - graceful fallback for minimal environments class _RequestsStub: class RequestException(RuntimeError): """Raised when the ``requests`` library is unavailable.""" def get(self, *args: Any, **kwargs: Any) -> Any: raise self.RequestException("The 'requests' library is required for GitHub integrations.") requests = _RequestsStub() # type: ignore try: # pragma: no cover - optional dependency for green thread integration from eventlet import tpool as _eventlet_tpool # type: ignore except Exception: # pragma: no cover - optional dependency _eventlet_tpool = None # type: ignore try: # pragma: no cover - optional dependency for retrieving original modules from eventlet import patcher as _eventlet_patcher # type: ignore except Exception: # pragma: no cover - optional dependency _eventlet_patcher = None # type: ignore __all__ = ["GitHubIntegration"] class GitHubIntegration: """Lightweight cache for GitHub repository head lookups.""" MIN_TTL_SECONDS = 30 MAX_TTL_SECONDS = 3600 DEFAULT_TTL_SECONDS = 60 DEFAULT_REPO = "bunny-lab-io/Borealis" DEFAULT_BRANCH = "main" def __init__( self, *, cache_file: Path, db_conn_factory: Callable[[], sqlite3.Connection], service_log: Callable[[str, str, Optional[str]], None], logger: Optional[logging.Logger] = None, default_repo: Optional[str] = None, default_branch: Optional[str] = None, default_ttl_seconds: Optional[int] = None, ) -> None: self._cache_file = cache_file self._cache_file.parent.mkdir(parents=True, exist_ok=True) self._db_conn_factory = db_conn_factory self._service_log = service_log self._logger = logger or logging.getLogger(__name__) self._lock = threading.Lock() self._token_lock = threading.Lock() self._cache: Dict[Tuple[str, str], Tuple[str, float]] = {} self._token_cache: Dict[str, Any] = {"value": None, "loaded_at": 0.0, "known": False} self._default_repo = self._determine_default_repo(default_repo) self._default_branch = self._determine_default_branch(default_branch) self._default_ttl = self._determine_default_ttl(default_ttl_seconds) self._load_cache() @property def default_repo(self) -> str: return self._default_repo @property def default_branch(self) -> str: return self._default_branch def current_repo_hash( self, repo: Optional[str], branch: Optional[str], *, ttl: Optional[Any] = None, force_refresh: bool = False, ) -> Tuple[Dict[str, Any], int]: owner_repo = (repo or self._default_repo).strip() target_branch = (branch or self._default_branch).strip() if "/" not in owner_repo: return {"error": "repo must be in the form owner/name"}, 400 ttl_seconds = self._normalise_ttl(ttl) return self._resolve(owner_repo, target_branch, ttl_seconds=ttl_seconds, force_refresh=force_refresh) def _determine_default_repo(self, override: Optional[str]) -> str: candidate = (override or os.environ.get("BOREALIS_REPO") or self.DEFAULT_REPO).strip() if "/" not in candidate: return self.DEFAULT_REPO return candidate def _determine_default_branch(self, override: Optional[str]) -> str: candidate = (override or os.environ.get("BOREALIS_REPO_BRANCH") or self.DEFAULT_BRANCH).strip() return candidate or self.DEFAULT_BRANCH def _determine_default_ttl(self, override: Optional[int]) -> int: env_value = os.environ.get("BOREALIS_REPO_HASH_REFRESH") candidate: Optional[int] = None if override is not None: candidate = override else: try: candidate = int(env_value) if env_value else None except (TypeError, ValueError): candidate = None if candidate is None: candidate = self.DEFAULT_TTL_SECONDS return self._normalise_ttl(candidate) def _normalise_ttl(self, ttl: Optional[Any]) -> int: value: Optional[int] = None if isinstance(ttl, str): ttl = ttl.strip() if not ttl: ttl = None if ttl is None: value = self._default_ttl else: try: value = int(ttl) except (TypeError, ValueError): value = self._default_ttl value = value if value is not None else self._default_ttl return max(self.MIN_TTL_SECONDS, min(value, self.MAX_TTL_SECONDS)) def _load_cache(self) -> None: try: if not self._cache_file.is_file(): return payload = json.loads(self._cache_file.read_text(encoding="utf-8")) entries = payload.get("entries") if not isinstance(entries, dict): return now = time.time() with self._lock: for key, data in entries.items(): if not isinstance(data, dict): continue sha = (data.get("sha") or "").strip() if not sha: continue ts_raw = data.get("ts") try: ts = float(ts_raw) except (TypeError, ValueError): ts = now repo, _, branch = key.partition(":") if repo and branch: self._cache[(repo, branch)] = (sha, ts) except Exception: # pragma: no cover - defensive logging self._logger.debug("Failed to hydrate GitHub repo hash cache", exc_info=True) def _persist_cache(self) -> None: with self._lock: snapshot = { f"{repo}:{branch}": {"sha": sha, "ts": ts} for (repo, branch), (sha, ts) in self._cache.items() if sha } try: if not snapshot: try: if self._cache_file.exists(): self._cache_file.unlink() except FileNotFoundError: return except Exception: self._logger.debug("Failed to remove GitHub repo hash cache file", exc_info=True) return payload = {"version": 1, "entries": snapshot} tmp_path = self._cache_file.with_suffix(".tmp") self._cache_file.parent.mkdir(parents=True, exist_ok=True) tmp_path.write_text(json.dumps(payload), encoding="utf-8") tmp_path.replace(self._cache_file) except Exception: # pragma: no cover - defensive logging self._logger.debug("Failed to persist GitHub repo hash cache", exc_info=True) def _resolve( self, repo: str, branch: str, *, ttl_seconds: int, force_refresh: bool, ) -> Tuple[Dict[str, Any], int]: key = (repo, branch) now = time.time() with self._lock: cached = self._cache.get(key) cached_sha: Optional[str] = None cached_ts: Optional[float] = None cached_age: Optional[float] = None if cached: cached_sha, cached_ts = cached cached_age = max(0.0, now - cached_ts) if cached_sha and not force_refresh and cached_age is not None and cached_age < ttl_seconds: return self._build_payload(repo, branch, cached_sha, True, cached_age, "cache", None), 200 sha, error = self._fetch_repo_head(repo, branch, force_refresh=force_refresh) if sha: with self._lock: self._cache[key] = (sha, now) self._persist_cache() return self._build_payload(repo, branch, sha, False, 0.0, "github", None), 200 if error: self._service_log("server", f"/api/repo/current_hash error: {error}") if cached_sha is not None: payload = self._build_payload( repo, branch, cached_sha or None, True, cached_age, "cache-stale", error or "using cached value", ) return payload, (200 if cached_sha else 503) payload = self._build_payload( repo, branch, None, False, None, "github", error or "unable to resolve repository head", ) return payload, 503 def _build_payload( self, repo: str, branch: str, sha: Optional[str], cached: bool, age_seconds: Optional[float], source: str, error: Optional[str], ) -> Dict[str, Any]: payload: Dict[str, Any] = { "repo": repo, "branch": branch, "sha": (sha.strip() if isinstance(sha, str) else None) or None, "cached": cached, "age_seconds": age_seconds, "source": source, } if error: payload["error"] = error return payload def _fetch_repo_head(self, repo: str, branch: str, *, force_refresh: bool) -> Tuple[Optional[str], Optional[str]]: headers = { "Accept": "application/vnd.github+json", "User-Agent": "Borealis-Engine", } token = self._github_token(force_refresh=force_refresh) if token: headers["Authorization"] = f"Bearer {token}" try: response = self._http_get( f"https://api.github.com/repos/{repo}/branches/{branch}", headers=headers, timeout=20, ) status = getattr(response, "status_code", None) if status == 200: try: data = response.json() except Exception as exc: return None, f"GitHub REST API repo head decode error: {exc}" sha = ((data.get("commit") or {}).get("sha") or "").strip() if sha: return sha, None return None, "GitHub REST API repo head missing commit SHA" snippet = "" try: text = getattr(response, "text", "") snippet = text[:200] if isinstance(text, str) else "" except Exception: snippet = "" error = f"GitHub REST API repo head lookup failed: HTTP {status}" if snippet: error = f"{error} {snippet}" return None, error except requests.RequestException as exc: # type: ignore[attr-defined] return None, f"GitHub REST API repo head lookup raised: {exc}" except RecursionError as exc: # pragma: no cover - defensive guard return None, f"GitHub REST API repo head lookup recursion error: {exc}" except Exception as exc: # pragma: no cover - defensive guard return None, f"GitHub REST API repo head lookup unexpected error: {exc}" def _github_token(self, *, force_refresh: bool) -> Optional[str]: if has_request_context(): header_token = (request.headers.get("X-GitHub-Token") or "").strip() if header_token: return header_token if not force_refresh: auth_header = request.headers.get("Authorization") or "" if auth_header.lower().startswith("bearer "): candidate = auth_header.split(" ", 1)[1].strip() if candidate: return candidate now = time.time() with self._token_lock: if ( not force_refresh and self._token_cache.get("known") and now - (self._token_cache.get("loaded_at") or 0.0) < 15.0 ): cached_token = self._token_cache.get("value") return cached_token if cached_token else None token = self._load_token_from_db(force_refresh=force_refresh) self._set_cached_token(token) if token: return token fallback = os.environ.get("BOREALIS_GITHUB_TOKEN") or os.environ.get("GITHUB_TOKEN") fallback = (fallback or "").strip() return fallback or None def _set_cached_token(self, token: Optional[str]) -> None: with self._token_lock: self._token_cache["value"] = token if token else None self._token_cache["loaded_at"] = time.time() self._token_cache["known"] = True def load_token(self, *, force_refresh: bool = False) -> Optional[str]: token = self._load_token_from_db(force_refresh=force_refresh) self._set_cached_token(token) return token def store_token(self, token: Optional[str]) -> None: conn: Optional[sqlite3.Connection] = None try: conn = self._db_conn_factory() cur = conn.cursor() cur.execute("DELETE FROM github_token") if token: cur.execute("INSERT INTO github_token (token) VALUES (?)", (token,)) conn.commit() except Exception as exc: if conn is not None: try: conn.rollback() except Exception: pass raise RuntimeError(f"Failed to store token: {exc}") from exc finally: if conn is not None: try: conn.close() except Exception: pass self._set_cached_token(token if token else None) def verify_token(self, token: Optional[str]) -> Dict[str, Any]: if not token: return { "valid": False, "message": "API Token Not Configured", "status": "missing", "rate_limit": None, } headers = { "Accept": "application/vnd.github+json", "User-Agent": "Borealis-Engine", "Authorization": f"Bearer {token}", } try: response = self._http_get( f"https://api.github.com/repos/{self._default_repo}/branches/{self._default_branch}", headers=headers, timeout=20, ) limit_header = response.headers.get("X-RateLimit-Limit") try: limit_value = int(limit_header) if limit_header is not None else None except (TypeError, ValueError): limit_value = None if response.status_code == 200: if limit_value is not None and limit_value >= 5000: return { "valid": True, "message": "API Authentication Successful", "status": "ok", "rate_limit": limit_value, } return { "valid": False, "message": "API Token Invalid", "status": "insufficient", "rate_limit": limit_value, "error": "Authenticated request did not elevate GitHub rate limits", } if response.status_code == 401: return { "valid": False, "message": "API Token Invalid", "status": "invalid", "rate_limit": limit_value, "error": getattr(response, "text", "")[:200], } return { "valid": False, "message": f"GitHub API error (HTTP {response.status_code})", "status": "error", "rate_limit": limit_value, "error": getattr(response, "text", "")[:200], } except Exception as exc: return { "valid": False, "message": f"API Token validation error: {exc}", "status": "error", "rate_limit": None, "error": str(exc), } def refresh_default_repo_hash(self, *, force: bool = False) -> Tuple[Dict[str, Any], int]: return self._resolve( self._default_repo, self._default_branch, ttl_seconds=self._default_ttl, force_refresh=force, ) def _http_get(self, url: str, *, headers: Dict[str, str], timeout: int) -> Any: try: if _eventlet_tpool is not None: try: return _eventlet_tpool.execute(requests.get, url, headers=headers, timeout=timeout) except Exception: pass return requests.get(url, headers=headers, timeout=timeout) except Exception: return self._http_get_subprocess(url, headers=headers, timeout=timeout) def _http_get_subprocess(self, url: str, *, headers: Dict[str, str], timeout: int) -> Any: script = """ import base64 import json import sys import urllib.request url = sys.argv[1] headers = json.loads(sys.argv[2]) timeout = float(sys.argv[3]) req = urllib.request.Request(url, headers=headers, method="GET") try: with urllib.request.urlopen(req, timeout=timeout) as resp: body = resp.read() payload = { "status": resp.status, "headers": dict(resp.getheaders()), "body": base64.b64encode(body).decode("ascii"), "encoding": "base64", } sys.stdout.write(json.dumps(payload)) except Exception as exc: error_payload = {"error": str(exc)} sys.stdout.write(json.dumps(error_payload)) sys.exit(1) """ proc = subprocess.run( [sys.executable, "-c", script, url, json.dumps(headers), str(float(timeout))], capture_output=True, text=True, ) output = proc.stdout.strip() or proc.stderr.strip() try: data = json.loads(output or "{}") except json.JSONDecodeError as exc: raise RuntimeError(f"GitHub subprocess returned invalid JSON: {output!r}") from exc if proc.returncode != 0: error_msg = data.get("error") if isinstance(data, dict) else output raise RuntimeError(f"GitHub subprocess request failed: {error_msg}") status_code = data.get("status") raw_headers = data.get("headers") or {} body_encoded = data.get("body") or "" encoding = data.get("encoding") if encoding == "base64": body_bytes = base64.b64decode(body_encoded.encode("ascii")) else: body_bytes = (body_encoded or "").encode("utf-8") class _SubprocessResponse: def __init__(self, status: int, headers: Dict[str, str], body: bytes): self.status_code = status self.headers = headers self._body = body self.text = body.decode("utf-8", errors="replace") def json(self) -> Any: if not self._body: return {} return json.loads(self.text) if status_code is None: raise RuntimeError(f"GitHub subprocess returned no status code: {data}") return _SubprocessResponse(int(status_code), {str(k): str(v) for k, v in raw_headers.items()}, body_bytes) def _resolve_original_ssl_module(self): if _eventlet_patcher is not None: try: original = _eventlet_patcher.original("ssl") if original is not None: return original except Exception: pass return ssl def _resolve_original_socket_module(self): if _eventlet_patcher is not None: try: original = _eventlet_patcher.original("socket") if original is not None: return original except Exception: pass import socket as socket_module # Local import for fallback return socket_module def _resolve_original_raw_socket_module(self): if _eventlet_patcher is not None: try: original = _eventlet_patcher.original("_socket") if original is not None: return original except Exception: pass try: import _socket as raw_socket_module # type: ignore return raw_socket_module except Exception: return self._resolve_original_socket_module() def _load_token_from_db(self, *, force_refresh: bool = False) -> Optional[str]: if force_refresh: with self._token_lock: self._token_cache["known"] = False conn: Optional[sqlite3.Connection] = None try: conn = self._db_conn_factory() cursor = conn.cursor() cursor.execute("SELECT token FROM github_token LIMIT 1") row = cursor.fetchone() if row and row[0]: candidate = str(row[0]).strip() return candidate or None return None except sqlite3.OperationalError: return None except Exception as exc: self._service_log("server", f"github token lookup failed: {exc}") return None finally: if conn is not None: try: conn.close() except Exception: pass