mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-10-26 17:41:58 -06:00
276 lines
9.8 KiB
Python
276 lines
9.8 KiB
Python
"""GitHub REST API integration with caching support."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, Optional
|
|
|
|
from Data.Engine.domain.github import GitHubRepoRef, GitHubTokenStatus, RepoHeadSnapshot, GitHubRateLimit
|
|
|
|
try: # pragma: no cover - optional dependency guard
|
|
import requests
|
|
from requests import Response
|
|
except Exception: # pragma: no cover - fallback when requests is unavailable
|
|
requests = None # type: ignore[assignment]
|
|
Response = object # type: ignore[misc,assignment]
|
|
|
|
__all__ = ["GitHubArtifactProvider"]
|
|
|
|
|
|
class GitHubArtifactProvider:
|
|
"""Resolve repository heads and token metadata from the GitHub API."""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
cache_file: Path,
|
|
default_repo: str,
|
|
default_branch: str,
|
|
refresh_interval: int,
|
|
logger: Optional[logging.Logger] = None,
|
|
) -> None:
|
|
self._cache_file = cache_file
|
|
self._default_repo = default_repo
|
|
self._default_branch = default_branch
|
|
self._refresh_interval = max(30, min(refresh_interval, 3600))
|
|
self._log = logger or logging.getLogger("borealis.engine.integrations.github")
|
|
self._token: Optional[str] = None
|
|
self._cache_lock = threading.Lock()
|
|
self._cache: Dict[str, Dict[str, float | str]] = {}
|
|
self._worker: Optional[threading.Thread] = None
|
|
self._hydrate_cache_from_disk()
|
|
|
|
def set_token(self, token: Optional[str]) -> None:
|
|
self._token = (token or "").strip() or None
|
|
|
|
@property
|
|
def default_repo(self) -> str:
|
|
return self._default_repo
|
|
|
|
@property
|
|
def default_branch(self) -> str:
|
|
return self._default_branch
|
|
|
|
@property
|
|
def refresh_interval(self) -> int:
|
|
return self._refresh_interval
|
|
|
|
def fetch_repo_head(
|
|
self,
|
|
repo: GitHubRepoRef,
|
|
*,
|
|
ttl_seconds: int,
|
|
force_refresh: bool = False,
|
|
) -> RepoHeadSnapshot:
|
|
key = f"{repo.full_name}:{repo.branch}"
|
|
now = time.time()
|
|
|
|
cached_entry = None
|
|
with self._cache_lock:
|
|
cached_entry = self._cache.get(key, {}).copy()
|
|
|
|
cached_sha = (cached_entry.get("sha") if cached_entry else None) # type: ignore[assignment]
|
|
cached_ts = cached_entry.get("timestamp") if cached_entry else None # type: ignore[assignment]
|
|
cached_age = None
|
|
if isinstance(cached_ts, (int, float)):
|
|
cached_age = max(0.0, now - float(cached_ts))
|
|
|
|
ttl = max(30, min(ttl_seconds, 3600))
|
|
if cached_sha and not force_refresh and cached_age is not None and cached_age < ttl:
|
|
return RepoHeadSnapshot(
|
|
repository=repo,
|
|
sha=str(cached_sha),
|
|
cached=True,
|
|
age_seconds=cached_age,
|
|
source="cache",
|
|
error=None,
|
|
)
|
|
|
|
if requests is None:
|
|
return RepoHeadSnapshot(
|
|
repository=repo,
|
|
sha=str(cached_sha) if cached_sha else None,
|
|
cached=bool(cached_sha),
|
|
age_seconds=cached_age,
|
|
source="unavailable",
|
|
error="requests library not available",
|
|
)
|
|
|
|
headers = {
|
|
"Accept": "application/vnd.github+json",
|
|
"User-Agent": "Borealis-Engine",
|
|
}
|
|
if self._token:
|
|
headers["Authorization"] = f"Bearer {self._token}"
|
|
|
|
url = f"https://api.github.com/repos/{repo.full_name}/branches/{repo.branch}"
|
|
error: Optional[str] = None
|
|
sha: Optional[str] = None
|
|
|
|
try:
|
|
response: Response = requests.get(url, headers=headers, timeout=20)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
sha = (data.get("commit", {}).get("sha") or "").strip() # type: ignore[assignment]
|
|
else:
|
|
error = f"GitHub REST API repo head lookup failed: HTTP {response.status_code} {response.text[:200]}"
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
error = f"GitHub REST API repo head lookup raised: {exc}"
|
|
|
|
if sha:
|
|
payload = {"sha": sha, "timestamp": now}
|
|
with self._cache_lock:
|
|
self._cache[key] = payload
|
|
self._persist_cache()
|
|
return RepoHeadSnapshot(
|
|
repository=repo,
|
|
sha=sha,
|
|
cached=False,
|
|
age_seconds=0.0,
|
|
source="github",
|
|
error=None,
|
|
)
|
|
|
|
if error:
|
|
self._log.warning("repo-head-lookup failure repo=%s branch=%s error=%s", repo.full_name, repo.branch, error)
|
|
|
|
return RepoHeadSnapshot(
|
|
repository=repo,
|
|
sha=str(cached_sha) if cached_sha else None,
|
|
cached=bool(cached_sha),
|
|
age_seconds=cached_age,
|
|
source="cache-stale" if cached_sha else "github",
|
|
error=error or ("using cached value" if cached_sha else "unable to resolve repository head"),
|
|
)
|
|
|
|
def refresh_default_repo_head(self, *, force: bool = False) -> RepoHeadSnapshot:
|
|
repo = GitHubRepoRef.parse(self._default_repo, self._default_branch)
|
|
return self.fetch_repo_head(repo, ttl_seconds=self._refresh_interval, force_refresh=force)
|
|
|
|
def verify_token(self, token: Optional[str]) -> GitHubTokenStatus:
|
|
token = (token or "").strip()
|
|
if not token:
|
|
return GitHubTokenStatus(
|
|
has_token=False,
|
|
valid=False,
|
|
status="missing",
|
|
message="API Token Not Configured",
|
|
rate_limit=None,
|
|
error=None,
|
|
)
|
|
|
|
if requests is None:
|
|
return GitHubTokenStatus(
|
|
has_token=True,
|
|
valid=False,
|
|
status="unknown",
|
|
message="requests library not available",
|
|
rate_limit=None,
|
|
error="requests library not available",
|
|
)
|
|
|
|
headers = {
|
|
"Accept": "application/vnd.github+json",
|
|
"Authorization": f"Bearer {token}",
|
|
"User-Agent": "Borealis-Engine",
|
|
}
|
|
try:
|
|
response: Response = requests.get("https://api.github.com/rate_limit", headers=headers, timeout=10)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
message = f"GitHub token verification raised: {exc}"
|
|
self._log.warning("github-token-verify error=%s", message)
|
|
return GitHubTokenStatus(
|
|
has_token=True,
|
|
valid=False,
|
|
status="error",
|
|
message="API Token Invalid",
|
|
rate_limit=None,
|
|
error=message,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
message = f"GitHub API error (HTTP {response.status_code})"
|
|
self._log.warning("github-token-verify http_status=%s", response.status_code)
|
|
return GitHubTokenStatus(
|
|
has_token=True,
|
|
valid=False,
|
|
status="error",
|
|
message="API Token Invalid",
|
|
rate_limit=None,
|
|
error=message,
|
|
)
|
|
|
|
data = response.json()
|
|
core = (data.get("resources", {}).get("core", {}) if isinstance(data, dict) else {})
|
|
rate_limit = GitHubRateLimit(
|
|
limit=_safe_int(core.get("limit")),
|
|
remaining=_safe_int(core.get("remaining")),
|
|
reset_epoch=_safe_int(core.get("reset")),
|
|
used=_safe_int(core.get("used")),
|
|
)
|
|
|
|
message = "API Token Valid" if rate_limit.remaining is not None else "API Token Verified"
|
|
return GitHubTokenStatus(
|
|
has_token=True,
|
|
valid=True,
|
|
status="valid",
|
|
message=message,
|
|
rate_limit=rate_limit,
|
|
error=None,
|
|
)
|
|
|
|
def start_background_refresh(self) -> None:
|
|
if self._worker and self._worker.is_alive(): # pragma: no cover - guard
|
|
return
|
|
|
|
def _loop() -> None:
|
|
interval = max(30, self._refresh_interval)
|
|
while True:
|
|
try:
|
|
self.refresh_default_repo_head(force=True)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
self._log.warning("default-repo-refresh failure: %s", exc)
|
|
time.sleep(interval)
|
|
|
|
self._worker = threading.Thread(target=_loop, name="github-repo-refresh", daemon=True)
|
|
self._worker.start()
|
|
|
|
def _hydrate_cache_from_disk(self) -> None:
|
|
path = self._cache_file
|
|
try:
|
|
if not path.exists():
|
|
return
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
if isinstance(data, dict):
|
|
with self._cache_lock:
|
|
self._cache = {
|
|
key: value
|
|
for key, value in data.items()
|
|
if isinstance(value, dict) and "sha" in value and "timestamp" in value
|
|
}
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
self._log.warning("failed to load repo cache: %s", exc)
|
|
|
|
def _persist_cache(self) -> None:
|
|
path = self._cache_file
|
|
try:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
payload = json.dumps(self._cache, ensure_ascii=False)
|
|
tmp = path.with_suffix(".tmp")
|
|
tmp.write_text(payload, encoding="utf-8")
|
|
tmp.replace(path)
|
|
except Exception as exc: # pragma: no cover - defensive logging
|
|
self._log.warning("failed to persist repo cache: %s", exc)
|
|
|
|
|
|
def _safe_int(value: object) -> Optional[int]:
|
|
try:
|
|
return int(value)
|
|
except Exception:
|
|
return None
|
|
|