Files
Borealis-Github-Replica/Data/Engine/integrations/github/artifact_provider.py

276 lines
9.8 KiB
Python

"""GitHub REST API integration with caching support."""
from __future__ import annotations
import json
import logging
import threading
import time
from pathlib import Path
from typing import Dict, Optional
from Data.Engine.domain.github import GitHubRepoRef, GitHubTokenStatus, RepoHeadSnapshot, GitHubRateLimit
try: # pragma: no cover - optional dependency guard
import requests
from requests import Response
except Exception: # pragma: no cover - fallback when requests is unavailable
requests = None # type: ignore[assignment]
Response = object # type: ignore[misc,assignment]
__all__ = ["GitHubArtifactProvider"]
class GitHubArtifactProvider:
"""Resolve repository heads and token metadata from the GitHub API."""
def __init__(
self,
*,
cache_file: Path,
default_repo: str,
default_branch: str,
refresh_interval: int,
logger: Optional[logging.Logger] = None,
) -> None:
self._cache_file = cache_file
self._default_repo = default_repo
self._default_branch = default_branch
self._refresh_interval = max(30, min(refresh_interval, 3600))
self._log = logger or logging.getLogger("borealis.engine.integrations.github")
self._token: Optional[str] = None
self._cache_lock = threading.Lock()
self._cache: Dict[str, Dict[str, float | str]] = {}
self._worker: Optional[threading.Thread] = None
self._hydrate_cache_from_disk()
def set_token(self, token: Optional[str]) -> None:
self._token = (token or "").strip() or None
@property
def default_repo(self) -> str:
return self._default_repo
@property
def default_branch(self) -> str:
return self._default_branch
@property
def refresh_interval(self) -> int:
return self._refresh_interval
def fetch_repo_head(
self,
repo: GitHubRepoRef,
*,
ttl_seconds: int,
force_refresh: bool = False,
) -> RepoHeadSnapshot:
key = f"{repo.full_name}:{repo.branch}"
now = time.time()
cached_entry = None
with self._cache_lock:
cached_entry = self._cache.get(key, {}).copy()
cached_sha = (cached_entry.get("sha") if cached_entry else None) # type: ignore[assignment]
cached_ts = cached_entry.get("timestamp") if cached_entry else None # type: ignore[assignment]
cached_age = None
if isinstance(cached_ts, (int, float)):
cached_age = max(0.0, now - float(cached_ts))
ttl = max(30, min(ttl_seconds, 3600))
if cached_sha and not force_refresh and cached_age is not None and cached_age < ttl:
return RepoHeadSnapshot(
repository=repo,
sha=str(cached_sha),
cached=True,
age_seconds=cached_age,
source="cache",
error=None,
)
if requests is None:
return RepoHeadSnapshot(
repository=repo,
sha=str(cached_sha) if cached_sha else None,
cached=bool(cached_sha),
age_seconds=cached_age,
source="unavailable",
error="requests library not available",
)
headers = {
"Accept": "application/vnd.github+json",
"User-Agent": "Borealis-Engine",
}
if self._token:
headers["Authorization"] = f"Bearer {self._token}"
url = f"https://api.github.com/repos/{repo.full_name}/branches/{repo.branch}"
error: Optional[str] = None
sha: Optional[str] = None
try:
response: Response = requests.get(url, headers=headers, timeout=20)
if response.status_code == 200:
data = response.json()
sha = (data.get("commit", {}).get("sha") or "").strip() # type: ignore[assignment]
else:
error = f"GitHub REST API repo head lookup failed: HTTP {response.status_code} {response.text[:200]}"
except Exception as exc: # pragma: no cover - defensive logging
error = f"GitHub REST API repo head lookup raised: {exc}"
if sha:
payload = {"sha": sha, "timestamp": now}
with self._cache_lock:
self._cache[key] = payload
self._persist_cache()
return RepoHeadSnapshot(
repository=repo,
sha=sha,
cached=False,
age_seconds=0.0,
source="github",
error=None,
)
if error:
self._log.warning("repo-head-lookup failure repo=%s branch=%s error=%s", repo.full_name, repo.branch, error)
return RepoHeadSnapshot(
repository=repo,
sha=str(cached_sha) if cached_sha else None,
cached=bool(cached_sha),
age_seconds=cached_age,
source="cache-stale" if cached_sha else "github",
error=error or ("using cached value" if cached_sha else "unable to resolve repository head"),
)
def refresh_default_repo_head(self, *, force: bool = False) -> RepoHeadSnapshot:
repo = GitHubRepoRef.parse(self._default_repo, self._default_branch)
return self.fetch_repo_head(repo, ttl_seconds=self._refresh_interval, force_refresh=force)
def verify_token(self, token: Optional[str]) -> GitHubTokenStatus:
token = (token or "").strip()
if not token:
return GitHubTokenStatus(
has_token=False,
valid=False,
status="missing",
message="API Token Not Configured",
rate_limit=None,
error=None,
)
if requests is None:
return GitHubTokenStatus(
has_token=True,
valid=False,
status="unknown",
message="requests library not available",
rate_limit=None,
error="requests library not available",
)
headers = {
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {token}",
"User-Agent": "Borealis-Engine",
}
try:
response: Response = requests.get("https://api.github.com/rate_limit", headers=headers, timeout=10)
except Exception as exc: # pragma: no cover - defensive logging
message = f"GitHub token verification raised: {exc}"
self._log.warning("github-token-verify error=%s", message)
return GitHubTokenStatus(
has_token=True,
valid=False,
status="error",
message="API Token Invalid",
rate_limit=None,
error=message,
)
if response.status_code != 200:
message = f"GitHub API error (HTTP {response.status_code})"
self._log.warning("github-token-verify http_status=%s", response.status_code)
return GitHubTokenStatus(
has_token=True,
valid=False,
status="error",
message="API Token Invalid",
rate_limit=None,
error=message,
)
data = response.json()
core = (data.get("resources", {}).get("core", {}) if isinstance(data, dict) else {})
rate_limit = GitHubRateLimit(
limit=_safe_int(core.get("limit")),
remaining=_safe_int(core.get("remaining")),
reset_epoch=_safe_int(core.get("reset")),
used=_safe_int(core.get("used")),
)
message = "API Token Valid" if rate_limit.remaining is not None else "API Token Verified"
return GitHubTokenStatus(
has_token=True,
valid=True,
status="valid",
message=message,
rate_limit=rate_limit,
error=None,
)
def start_background_refresh(self) -> None:
if self._worker and self._worker.is_alive(): # pragma: no cover - guard
return
def _loop() -> None:
interval = max(30, self._refresh_interval)
while True:
try:
self.refresh_default_repo_head(force=True)
except Exception as exc: # pragma: no cover - defensive logging
self._log.warning("default-repo-refresh failure: %s", exc)
time.sleep(interval)
self._worker = threading.Thread(target=_loop, name="github-repo-refresh", daemon=True)
self._worker.start()
def _hydrate_cache_from_disk(self) -> None:
path = self._cache_file
try:
if not path.exists():
return
data = json.loads(path.read_text(encoding="utf-8"))
if isinstance(data, dict):
with self._cache_lock:
self._cache = {
key: value
for key, value in data.items()
if isinstance(value, dict) and "sha" in value and "timestamp" in value
}
except Exception as exc: # pragma: no cover - defensive logging
self._log.warning("failed to load repo cache: %s", exc)
def _persist_cache(self) -> None:
path = self._cache_file
try:
path.parent.mkdir(parents=True, exist_ok=True)
payload = json.dumps(self._cache, ensure_ascii=False)
tmp = path.with_suffix(".tmp")
tmp.write_text(payload, encoding="utf-8")
tmp.replace(path)
except Exception as exc: # pragma: no cover - defensive logging
self._log.warning("failed to persist repo cache: %s", exc)
def _safe_int(value: object) -> Optional[int]:
try:
return int(value)
except Exception:
return None