mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-12-16 08:05:48 -07:00
606 lines
22 KiB
Python
606 lines
22 KiB
Python
# ======================================================
|
|
# Data\Engine\integrations\github.py
|
|
# Description: GitHub REST integration providing cached repository head lookups for Engine services.
|
|
#
|
|
# API Endpoints (if applicable): None
|
|
# ======================================================
|
|
|
|
"""GitHub integration helpers for the Borealis Engine runtime."""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import logging
|
|
import os
|
|
import sqlite3
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Dict, Optional, Tuple
|
|
|
|
from flask import has_request_context, request
|
|
|
|
try: # pragma: no cover - import guard mirrors legacy runtime behaviour
|
|
import requests # type: ignore
|
|
except ImportError: # pragma: no cover - graceful fallback for minimal environments
|
|
class _RequestsStub:
|
|
class RequestException(RuntimeError):
|
|
"""Raised when the ``requests`` library is unavailable."""
|
|
|
|
def get(self, *args: Any, **kwargs: Any) -> Any:
|
|
raise self.RequestException("The 'requests' library is required for GitHub integrations.")
|
|
|
|
requests = _RequestsStub() # type: ignore
|
|
|
|
try: # pragma: no cover - optional dependency for green thread integration
|
|
from eventlet import tpool as _eventlet_tpool # type: ignore
|
|
except Exception: # pragma: no cover - optional dependency
|
|
_eventlet_tpool = None # type: ignore
|
|
|
|
try: # pragma: no cover - optional dependency for retrieving original modules
|
|
from eventlet import patcher as _eventlet_patcher # type: ignore
|
|
except Exception: # pragma: no cover - optional dependency
|
|
_eventlet_patcher = None # type: ignore
|
|
|
|
__all__ = ["GitHubIntegration"]
|
|
|
|
|
|
class GitHubIntegration:
|
|
"""Lightweight cache for GitHub repository head lookups."""
|
|
|
|
MIN_TTL_SECONDS = 30
|
|
MAX_TTL_SECONDS = 3600
|
|
DEFAULT_TTL_SECONDS = 60
|
|
DEFAULT_REPO = "bunny-lab-io/Borealis"
|
|
DEFAULT_BRANCH = "main"
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
cache_file: Path,
|
|
db_conn_factory: Callable[[], sqlite3.Connection],
|
|
service_log: Callable[[str, str, Optional[str]], None],
|
|
logger: Optional[logging.Logger] = None,
|
|
default_repo: Optional[str] = None,
|
|
default_branch: Optional[str] = None,
|
|
default_ttl_seconds: Optional[int] = None,
|
|
) -> None:
|
|
self._cache_file = cache_file
|
|
self._cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
self._db_conn_factory = db_conn_factory
|
|
self._service_log = service_log
|
|
self._logger = logger or logging.getLogger(__name__)
|
|
|
|
self._lock = threading.Lock()
|
|
self._token_lock = threading.Lock()
|
|
self._cache: Dict[Tuple[str, str], Tuple[str, float]] = {}
|
|
self._token_cache: Dict[str, Any] = {"value": None, "loaded_at": 0.0, "known": False}
|
|
|
|
self._default_repo = self._determine_default_repo(default_repo)
|
|
self._default_branch = self._determine_default_branch(default_branch)
|
|
self._default_ttl = self._determine_default_ttl(default_ttl_seconds)
|
|
|
|
self._load_cache()
|
|
|
|
@property
|
|
def default_repo(self) -> str:
|
|
return self._default_repo
|
|
|
|
@property
|
|
def default_branch(self) -> str:
|
|
return self._default_branch
|
|
|
|
def current_repo_hash(
|
|
self,
|
|
repo: Optional[str],
|
|
branch: Optional[str],
|
|
*,
|
|
ttl: Optional[Any] = None,
|
|
force_refresh: bool = False,
|
|
) -> Tuple[Dict[str, Any], int]:
|
|
owner_repo = (repo or self._default_repo).strip()
|
|
target_branch = (branch or self._default_branch).strip()
|
|
|
|
if "/" not in owner_repo:
|
|
return {"error": "repo must be in the form owner/name"}, 400
|
|
|
|
ttl_seconds = self._normalise_ttl(ttl)
|
|
return self._resolve(owner_repo, target_branch, ttl_seconds=ttl_seconds, force_refresh=force_refresh)
|
|
|
|
def _determine_default_repo(self, override: Optional[str]) -> str:
|
|
candidate = (override or os.environ.get("BOREALIS_REPO") or self.DEFAULT_REPO).strip()
|
|
if "/" not in candidate:
|
|
return self.DEFAULT_REPO
|
|
return candidate
|
|
|
|
def _determine_default_branch(self, override: Optional[str]) -> str:
|
|
candidate = (override or os.environ.get("BOREALIS_REPO_BRANCH") or self.DEFAULT_BRANCH).strip()
|
|
return candidate or self.DEFAULT_BRANCH
|
|
|
|
def _determine_default_ttl(self, override: Optional[int]) -> int:
|
|
env_value = os.environ.get("BOREALIS_REPO_HASH_REFRESH")
|
|
candidate: Optional[int] = None
|
|
if override is not None:
|
|
candidate = override
|
|
else:
|
|
try:
|
|
candidate = int(env_value) if env_value else None
|
|
except (TypeError, ValueError):
|
|
candidate = None
|
|
if candidate is None:
|
|
candidate = self.DEFAULT_TTL_SECONDS
|
|
return self._normalise_ttl(candidate)
|
|
|
|
def _normalise_ttl(self, ttl: Optional[Any]) -> int:
|
|
value: Optional[int] = None
|
|
if isinstance(ttl, str):
|
|
ttl = ttl.strip()
|
|
if not ttl:
|
|
ttl = None
|
|
if ttl is None:
|
|
value = self._default_ttl
|
|
else:
|
|
try:
|
|
value = int(ttl)
|
|
except (TypeError, ValueError):
|
|
value = self._default_ttl
|
|
value = value if value is not None else self._default_ttl
|
|
return max(self.MIN_TTL_SECONDS, min(value, self.MAX_TTL_SECONDS))
|
|
|
|
def _load_cache(self) -> None:
|
|
try:
|
|
if not self._cache_file.is_file():
|
|
return
|
|
payload = json.loads(self._cache_file.read_text(encoding="utf-8"))
|
|
entries = payload.get("entries")
|
|
if not isinstance(entries, dict):
|
|
return
|
|
now = time.time()
|
|
with self._lock:
|
|
for key, data in entries.items():
|
|
if not isinstance(data, dict):
|
|
continue
|
|
sha = (data.get("sha") or "").strip()
|
|
if not sha:
|
|
continue
|
|
ts_raw = data.get("ts")
|
|
try:
|
|
ts = float(ts_raw)
|
|
except (TypeError, ValueError):
|
|
ts = now
|
|
repo, _, branch = key.partition(":")
|
|
if repo and branch:
|
|
self._cache[(repo, branch)] = (sha, ts)
|
|
except Exception: # pragma: no cover - defensive logging
|
|
self._logger.debug("Failed to hydrate GitHub repo hash cache", exc_info=True)
|
|
|
|
def _persist_cache(self) -> None:
|
|
with self._lock:
|
|
snapshot = {
|
|
f"{repo}:{branch}": {"sha": sha, "ts": ts}
|
|
for (repo, branch), (sha, ts) in self._cache.items()
|
|
if sha
|
|
}
|
|
try:
|
|
if not snapshot:
|
|
try:
|
|
if self._cache_file.exists():
|
|
self._cache_file.unlink()
|
|
except FileNotFoundError:
|
|
return
|
|
except Exception:
|
|
self._logger.debug("Failed to remove GitHub repo hash cache file", exc_info=True)
|
|
return
|
|
payload = {"version": 1, "entries": snapshot}
|
|
tmp_path = self._cache_file.with_suffix(".tmp")
|
|
self._cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
tmp_path.write_text(json.dumps(payload), encoding="utf-8")
|
|
tmp_path.replace(self._cache_file)
|
|
except Exception: # pragma: no cover - defensive logging
|
|
self._logger.debug("Failed to persist GitHub repo hash cache", exc_info=True)
|
|
|
|
def _resolve(
|
|
self,
|
|
repo: str,
|
|
branch: str,
|
|
*,
|
|
ttl_seconds: int,
|
|
force_refresh: bool,
|
|
) -> Tuple[Dict[str, Any], int]:
|
|
key = (repo, branch)
|
|
now = time.time()
|
|
|
|
with self._lock:
|
|
cached = self._cache.get(key)
|
|
|
|
cached_sha: Optional[str] = None
|
|
cached_ts: Optional[float] = None
|
|
cached_age: Optional[float] = None
|
|
if cached:
|
|
cached_sha, cached_ts = cached
|
|
cached_age = max(0.0, now - cached_ts)
|
|
|
|
if cached_sha and not force_refresh and cached_age is not None and cached_age < ttl_seconds:
|
|
return self._build_payload(repo, branch, cached_sha, True, cached_age, "cache", None), 200
|
|
|
|
sha, error = self._fetch_repo_head(repo, branch, force_refresh=force_refresh)
|
|
if sha:
|
|
with self._lock:
|
|
self._cache[key] = (sha, now)
|
|
self._persist_cache()
|
|
return self._build_payload(repo, branch, sha, False, 0.0, "github", None), 200
|
|
|
|
if error:
|
|
self._service_log("server", f"/api/repo/current_hash error: {error}")
|
|
|
|
if cached_sha is not None:
|
|
payload = self._build_payload(
|
|
repo,
|
|
branch,
|
|
cached_sha or None,
|
|
True,
|
|
cached_age,
|
|
"cache-stale",
|
|
error or "using cached value",
|
|
)
|
|
return payload, (200 if cached_sha else 503)
|
|
|
|
payload = self._build_payload(
|
|
repo,
|
|
branch,
|
|
None,
|
|
False,
|
|
None,
|
|
"github",
|
|
error or "unable to resolve repository head",
|
|
)
|
|
return payload, 503
|
|
|
|
def _build_payload(
|
|
self,
|
|
repo: str,
|
|
branch: str,
|
|
sha: Optional[str],
|
|
cached: bool,
|
|
age_seconds: Optional[float],
|
|
source: str,
|
|
error: Optional[str],
|
|
) -> Dict[str, Any]:
|
|
payload: Dict[str, Any] = {
|
|
"repo": repo,
|
|
"branch": branch,
|
|
"sha": (sha.strip() if isinstance(sha, str) else None) or None,
|
|
"cached": cached,
|
|
"age_seconds": age_seconds,
|
|
"source": source,
|
|
}
|
|
if error:
|
|
payload["error"] = error
|
|
return payload
|
|
|
|
def _fetch_repo_head(self, repo: str, branch: str, *, force_refresh: bool) -> Tuple[Optional[str], Optional[str]]:
|
|
headers = {
|
|
"Accept": "application/vnd.github+json",
|
|
"User-Agent": "Borealis-Engine",
|
|
}
|
|
token = self._github_token(force_refresh=force_refresh)
|
|
if token:
|
|
headers["Authorization"] = f"Bearer {token}"
|
|
|
|
|
|
try:
|
|
response = self._http_get(
|
|
f"https://api.github.com/repos/{repo}/branches/{branch}",
|
|
headers=headers,
|
|
timeout=20,
|
|
)
|
|
status = getattr(response, "status_code", None)
|
|
if status == 200:
|
|
try:
|
|
data = response.json()
|
|
except Exception as exc:
|
|
return None, f"GitHub REST API repo head decode error: {exc}"
|
|
sha = ((data.get("commit") or {}).get("sha") or "").strip()
|
|
if sha:
|
|
return sha, None
|
|
return None, "GitHub REST API repo head missing commit SHA"
|
|
snippet = ""
|
|
try:
|
|
text = getattr(response, "text", "")
|
|
snippet = text[:200] if isinstance(text, str) else ""
|
|
except Exception:
|
|
snippet = ""
|
|
error = f"GitHub REST API repo head lookup failed: HTTP {status}"
|
|
if snippet:
|
|
error = f"{error} {snippet}"
|
|
return None, error
|
|
except requests.RequestException as exc: # type: ignore[attr-defined]
|
|
return None, f"GitHub REST API repo head lookup raised: {exc}"
|
|
except RecursionError as exc: # pragma: no cover - defensive guard
|
|
return None, f"GitHub REST API repo head lookup recursion error: {exc}"
|
|
except Exception as exc: # pragma: no cover - defensive guard
|
|
return None, f"GitHub REST API repo head lookup unexpected error: {exc}"
|
|
def _github_token(self, *, force_refresh: bool) -> Optional[str]:
|
|
if has_request_context():
|
|
header_token = (request.headers.get("X-GitHub-Token") or "").strip()
|
|
if header_token:
|
|
return header_token
|
|
if not force_refresh:
|
|
auth_header = request.headers.get("Authorization") or ""
|
|
if auth_header.lower().startswith("bearer "):
|
|
candidate = auth_header.split(" ", 1)[1].strip()
|
|
if candidate:
|
|
return candidate
|
|
|
|
now = time.time()
|
|
with self._token_lock:
|
|
if (
|
|
not force_refresh
|
|
and self._token_cache.get("known")
|
|
and now - (self._token_cache.get("loaded_at") or 0.0) < 15.0
|
|
):
|
|
cached_token = self._token_cache.get("value")
|
|
return cached_token if cached_token else None
|
|
|
|
token = self._load_token_from_db(force_refresh=force_refresh)
|
|
self._set_cached_token(token)
|
|
if token:
|
|
return token
|
|
|
|
fallback = os.environ.get("BOREALIS_GITHUB_TOKEN") or os.environ.get("GITHUB_TOKEN")
|
|
fallback = (fallback or "").strip()
|
|
return fallback or None
|
|
|
|
def _set_cached_token(self, token: Optional[str]) -> None:
|
|
with self._token_lock:
|
|
self._token_cache["value"] = token if token else None
|
|
self._token_cache["loaded_at"] = time.time()
|
|
self._token_cache["known"] = True
|
|
|
|
def load_token(self, *, force_refresh: bool = False) -> Optional[str]:
|
|
token = self._load_token_from_db(force_refresh=force_refresh)
|
|
self._set_cached_token(token)
|
|
return token
|
|
|
|
def store_token(self, token: Optional[str]) -> None:
|
|
conn: Optional[sqlite3.Connection] = None
|
|
try:
|
|
conn = self._db_conn_factory()
|
|
cur = conn.cursor()
|
|
cur.execute("DELETE FROM github_token")
|
|
if token:
|
|
cur.execute("INSERT INTO github_token (token) VALUES (?)", (token,))
|
|
conn.commit()
|
|
except Exception as exc:
|
|
if conn is not None:
|
|
try:
|
|
conn.rollback()
|
|
except Exception:
|
|
pass
|
|
raise RuntimeError(f"Failed to store token: {exc}") from exc
|
|
finally:
|
|
if conn is not None:
|
|
try:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|
|
self._set_cached_token(token if token else None)
|
|
|
|
def verify_token(self, token: Optional[str]) -> Dict[str, Any]:
|
|
if not token:
|
|
return {
|
|
"valid": False,
|
|
"message": "API Token Not Configured",
|
|
"status": "missing",
|
|
"rate_limit": None,
|
|
}
|
|
|
|
headers = {
|
|
"Accept": "application/vnd.github+json",
|
|
"User-Agent": "Borealis-Engine",
|
|
"Authorization": f"Bearer {token}",
|
|
}
|
|
try:
|
|
response = self._http_get(
|
|
f"https://api.github.com/repos/{self._default_repo}/branches/{self._default_branch}",
|
|
headers=headers,
|
|
timeout=20,
|
|
)
|
|
limit_header = response.headers.get("X-RateLimit-Limit")
|
|
try:
|
|
limit_value = int(limit_header) if limit_header is not None else None
|
|
except (TypeError, ValueError):
|
|
limit_value = None
|
|
|
|
if response.status_code == 200:
|
|
if limit_value is not None and limit_value >= 5000:
|
|
return {
|
|
"valid": True,
|
|
"message": "API Authentication Successful",
|
|
"status": "ok",
|
|
"rate_limit": limit_value,
|
|
}
|
|
return {
|
|
"valid": False,
|
|
"message": "API Token Invalid",
|
|
"status": "insufficient",
|
|
"rate_limit": limit_value,
|
|
"error": "Authenticated request did not elevate GitHub rate limits",
|
|
}
|
|
|
|
if response.status_code == 401:
|
|
return {
|
|
"valid": False,
|
|
"message": "API Token Invalid",
|
|
"status": "invalid",
|
|
"rate_limit": limit_value,
|
|
"error": getattr(response, "text", "")[:200],
|
|
}
|
|
|
|
return {
|
|
"valid": False,
|
|
"message": f"GitHub API error (HTTP {response.status_code})",
|
|
"status": "error",
|
|
"rate_limit": limit_value,
|
|
"error": getattr(response, "text", "")[:200],
|
|
}
|
|
except Exception as exc:
|
|
return {
|
|
"valid": False,
|
|
"message": f"API Token validation error: {exc}",
|
|
"status": "error",
|
|
"rate_limit": None,
|
|
"error": str(exc),
|
|
}
|
|
|
|
def refresh_default_repo_hash(self, *, force: bool = False) -> Tuple[Dict[str, Any], int]:
|
|
return self._resolve(
|
|
self._default_repo,
|
|
self._default_branch,
|
|
ttl_seconds=self._default_ttl,
|
|
force_refresh=force,
|
|
)
|
|
|
|
def _http_get(self, url: str, *, headers: Dict[str, str], timeout: int) -> Any:
|
|
try:
|
|
if _eventlet_tpool is not None:
|
|
try:
|
|
return _eventlet_tpool.execute(requests.get, url, headers=headers, timeout=timeout)
|
|
except Exception:
|
|
pass
|
|
return requests.get(url, headers=headers, timeout=timeout)
|
|
except Exception:
|
|
return self._http_get_subprocess(url, headers=headers, timeout=timeout)
|
|
|
|
def _http_get_subprocess(self, url: str, *, headers: Dict[str, str], timeout: int) -> Any:
|
|
script = """
|
|
import base64
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
|
|
url = sys.argv[1]
|
|
headers = json.loads(sys.argv[2])
|
|
timeout = float(sys.argv[3])
|
|
req = urllib.request.Request(url, headers=headers, method="GET")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
body = resp.read()
|
|
payload = {
|
|
"status": resp.status,
|
|
"headers": dict(resp.getheaders()),
|
|
"body": base64.b64encode(body).decode("ascii"),
|
|
"encoding": "base64",
|
|
}
|
|
sys.stdout.write(json.dumps(payload))
|
|
except Exception as exc:
|
|
error_payload = {"error": str(exc)}
|
|
sys.stdout.write(json.dumps(error_payload))
|
|
sys.exit(1)
|
|
"""
|
|
proc = subprocess.run(
|
|
[sys.executable, "-c", script, url, json.dumps(headers), str(float(timeout))],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
output = proc.stdout.strip() or proc.stderr.strip()
|
|
try:
|
|
data = json.loads(output or "{}")
|
|
except json.JSONDecodeError as exc:
|
|
raise RuntimeError(f"GitHub subprocess returned invalid JSON: {output!r}") from exc
|
|
|
|
if proc.returncode != 0:
|
|
error_msg = data.get("error") if isinstance(data, dict) else output
|
|
raise RuntimeError(f"GitHub subprocess request failed: {error_msg}")
|
|
|
|
status_code = data.get("status")
|
|
raw_headers = data.get("headers") or {}
|
|
body_encoded = data.get("body") or ""
|
|
encoding = data.get("encoding")
|
|
if encoding == "base64":
|
|
body_bytes = base64.b64decode(body_encoded.encode("ascii"))
|
|
else:
|
|
body_bytes = (body_encoded or "").encode("utf-8")
|
|
|
|
class _SubprocessResponse:
|
|
def __init__(self, status: int, headers: Dict[str, str], body: bytes):
|
|
self.status_code = status
|
|
self.headers = headers
|
|
self._body = body
|
|
self.text = body.decode("utf-8", errors="replace")
|
|
|
|
def json(self) -> Any:
|
|
if not self._body:
|
|
return {}
|
|
return json.loads(self.text)
|
|
|
|
if status_code is None:
|
|
raise RuntimeError(f"GitHub subprocess returned no status code: {data}")
|
|
|
|
return _SubprocessResponse(int(status_code), {str(k): str(v) for k, v in raw_headers.items()}, body_bytes)
|
|
|
|
def _resolve_original_ssl_module(self):
|
|
if _eventlet_patcher is not None:
|
|
try:
|
|
original = _eventlet_patcher.original("ssl")
|
|
if original is not None:
|
|
return original
|
|
except Exception:
|
|
pass
|
|
return ssl
|
|
|
|
def _resolve_original_socket_module(self):
|
|
if _eventlet_patcher is not None:
|
|
try:
|
|
original = _eventlet_patcher.original("socket")
|
|
if original is not None:
|
|
return original
|
|
except Exception:
|
|
pass
|
|
import socket as socket_module # Local import for fallback
|
|
|
|
return socket_module
|
|
|
|
def _resolve_original_raw_socket_module(self):
|
|
if _eventlet_patcher is not None:
|
|
try:
|
|
original = _eventlet_patcher.original("_socket")
|
|
if original is not None:
|
|
return original
|
|
except Exception:
|
|
pass
|
|
try:
|
|
import _socket as raw_socket_module # type: ignore
|
|
|
|
return raw_socket_module
|
|
except Exception:
|
|
return self._resolve_original_socket_module()
|
|
|
|
def _load_token_from_db(self, *, force_refresh: bool = False) -> Optional[str]:
|
|
if force_refresh:
|
|
with self._token_lock:
|
|
self._token_cache["known"] = False
|
|
conn: Optional[sqlite3.Connection] = None
|
|
try:
|
|
conn = self._db_conn_factory()
|
|
cursor = conn.cursor()
|
|
cursor.execute("SELECT token FROM github_token LIMIT 1")
|
|
row = cursor.fetchone()
|
|
if row and row[0]:
|
|
candidate = str(row[0]).strip()
|
|
return candidate or None
|
|
return None
|
|
except sqlite3.OperationalError:
|
|
return None
|
|
except Exception as exc:
|
|
self._service_log("server", f"github token lookup failed: {exc}")
|
|
return None
|
|
finally:
|
|
if conn is not None:
|
|
try:
|
|
conn.close()
|
|
except Exception:
|
|
pass
|