Addressed Agent Context GUID mismatches

This commit is contained in:
2025-10-19 19:00:22 -06:00
parent 4f4929e5cc
commit 7700865bf7

View File

@@ -360,6 +360,21 @@ class _CrossProcessFileLock:
pass pass
_GUID_FILE_LOCK: Optional[_CrossProcessFileLock] = None
@contextlib.contextmanager
def _acquire_guid_lock(*, timeout: float = 60.0):
global _GUID_FILE_LOCK
if _GUID_FILE_LOCK is None:
_GUID_FILE_LOCK = _CrossProcessFileLock(os.path.join(_settings_dir(), 'agent_guid.lock'))
_GUID_FILE_LOCK.acquire(timeout=timeout)
try:
yield
finally:
_GUID_FILE_LOCK.release()
_ENROLLMENT_FILE_LOCK: Optional[_CrossProcessFileLock] = None _ENROLLMENT_FILE_LOCK: Optional[_CrossProcessFileLock] = None
@@ -387,11 +402,17 @@ def _key_store() -> AgentKeyStore:
def _persist_agent_guid_local(guid: str): def _persist_agent_guid_local(guid: str):
guid = _normalize_agent_guid(guid) _persist_agent_guid_local_internal(guid, assume_locked=False)
if not guid:
def _persist_agent_guid_local_internal(guid: str, *, assume_locked: bool) -> None:
normalized = _normalize_agent_guid(guid)
if not normalized:
return return
def _write():
try: try:
_key_store().save_guid(guid) _key_store().save_guid(normalized)
except Exception as exc: except Exception as exc:
_log_agent(f'Unable to persist guid via key store: {exc}', fname='agent.error.log') _log_agent(f'Unable to persist guid via key store: {exc}', fname='agent.error.log')
path = _agent_guid_path() path = _agent_guid_path()
@@ -406,12 +427,36 @@ def _persist_agent_guid_local(guid: str):
existing = fh.read().strip() existing = fh.read().strip()
except Exception: except Exception:
existing = '' existing = ''
if existing != guid: if existing != normalized:
with open(path, 'w', encoding='utf-8') as fh: with open(path, 'w', encoding='utf-8') as fh:
fh.write(guid) fh.write(normalized)
except Exception as exc: except Exception as exc:
_log_agent(f'Failed to persist agent GUID locally: {exc}', fname='agent.error.log') _log_agent(f'Failed to persist agent GUID locally: {exc}', fname='agent.error.log')
legacy_paths: List[str] = []
try:
root = _find_project_root()
legacy_paths.extend(
[
os.path.join(root, 'Agent', 'Borealis', 'agent_GUID'),
os.path.join(root, 'Agent', 'Settings', 'agent_GUID'),
]
)
except Exception:
pass
for legacy in legacy_paths:
try:
if legacy and os.path.isfile(legacy) and os.path.abspath(legacy) != os.path.abspath(path):
os.remove(legacy)
except Exception:
pass
if assume_locked:
_write()
else:
with _acquire_guid_lock():
_write()
if not SYSTEM_SERVICE_MODE: if not SYSTEM_SERVICE_MODE:
# Reduce noisy Qt output and attempt to avoid Windows OleInitialize warnings # Reduce noisy Qt output and attempt to avoid Windows OleInitialize warnings
os.environ.setdefault("QT_LOGGING_RULES", "qt.qpa.*=false;*.debug=false") os.environ.setdefault("QT_LOGGING_RULES", "qt.qpa.*=false;*.debug=false")
@@ -834,6 +879,7 @@ class AgentHttpClient:
self._cached_ssl_context: Optional[ssl.SSLContext] = None self._cached_ssl_context: Optional[ssl.SSLContext] = None
self._socketio_http_session = None self._socketio_http_session = None
self._socketio_session_mode: Optional[Tuple[str, Optional[str]]] = None self._socketio_session_mode: Optional[Tuple[str, Optional[str]]] = None
self._last_reload_state: Optional[Tuple[Optional[str], bool, bool, Optional[int]]] = None
self.refresh_base_url() self.refresh_base_url()
self._configure_verify() self._configure_verify()
self._reload_tokens_from_disk() self._reload_tokens_from_disk()
@@ -878,6 +924,7 @@ class AgentHttpClient:
self.key_store.save_guid(normalized_guid) self.key_store.save_guid(normalized_guid)
except Exception: except Exception:
pass pass
prev_state = self._last_reload_state
self.guid = normalized_guid or None self.guid = normalized_guid or None
self.access_token = access_token if access_token else None self.access_token = access_token if access_token else None
self.refresh_token = refresh_token if refresh_token else None self.refresh_token = refresh_token if refresh_token else None
@@ -886,6 +933,16 @@ class AgentHttpClient:
self.session.headers.update({"Authorization": f"Bearer {self.access_token}"}) self.session.headers.update({"Authorization": f"Bearer {self.access_token}"})
else: else:
self.session.headers.pop("Authorization", None) self.session.headers.pop("Authorization", None)
if self.guid:
desired = _compose_agent_id(socket.gethostname(), self.guid, _get_context_label())
existing = (CONFIG.data.get('agent_id') or '').strip()
if desired and existing != desired:
try:
_update_agent_id_for_guid(self.guid)
except Exception:
pass
state = (self.guid, bool(self.access_token), bool(self.refresh_token), self.access_expires_at)
if state != prev_state:
try: try:
_log_agent( _log_agent(
"Reloaded tokens from disk " "Reloaded tokens from disk "
@@ -897,6 +954,7 @@ class AgentHttpClient:
) )
except Exception: except Exception:
pass pass
self._last_reload_state = state
def auth_headers(self) -> Dict[str, str]: def auth_headers(self) -> Dict[str, str]:
headers: Dict[str, str] = {} headers: Dict[str, str] = {}
@@ -1181,6 +1239,7 @@ class AgentHttpClient:
self._ensure_authenticated_locked() self._ensure_authenticated_locked()
def _ensure_authenticated_locked(self) -> None: def _ensure_authenticated_locked(self) -> None:
self._reload_tokens_from_disk()
self.refresh_base_url() self.refresh_base_url()
if not self.guid or not self.refresh_token: if not self.guid or not self.refresh_token:
self._perform_enrollment_locked() self._perform_enrollment_locked()
@@ -1447,6 +1506,18 @@ class AgentHttpClient:
) )
if resp.status_code in (401, 403): if resp.status_code in (401, 403):
error_code, snippet = self._error_details(resp) error_code, snippet = self._error_details(resp)
if resp.status_code == 403 and error_code == 'guid_mismatch':
try:
_log_agent(
"Refresh token request saw guid mismatch; reloading credentials from disk",
fname="agent.log",
)
except Exception:
pass
self._reload_tokens_from_disk()
if self.access_token:
self.session.headers.update({"Authorization": f"Bearer {self.access_token}"})
return
if resp.status_code == 401 and self._should_retry_auth(resp.status_code, error_code): if resp.status_code == 401 and self._should_retry_auth(resp.status_code, error_code):
_log_agent( _log_agent(
"Refresh token rejected; attempting re-enrollment" "Refresh token rejected; attempting re-enrollment"
@@ -1585,19 +1656,29 @@ class AgentHttpClient:
# HTTP helpers # HTTP helpers
# ------------------------------------------------------------------ # ------------------------------------------------------------------
def post_json(self, path: str, payload: Optional[Dict[str, Any]] = None, *, require_auth: bool = True) -> Any: def post_json(self, path: str, payload: Optional[Dict[str, Any]] = None, *, require_auth: bool = True) -> Any:
attempt = 0
max_attempts = 3
while True:
if require_auth: if require_auth:
self.ensure_authenticated() self.ensure_authenticated()
url = f"{self.base_url}{path}" url = f"{self.base_url}{path}"
headers = self.auth_headers() headers = self.auth_headers()
response = self.session.post(url, json=payload, headers=headers, timeout=30) response = self.session.post(url, json=payload, headers=headers, timeout=30)
if response.status_code in (401, 403) and require_auth: if require_auth and response.status_code in (401, 403):
error_code, snippet = self._error_details(response) error_code, snippet = self._error_details(response)
if self._should_retry_auth(response.status_code, error_code): if response.status_code == 403 and error_code == 'guid_mismatch' and attempt < max_attempts:
attempt += 1
self._reload_tokens_from_disk()
if self.guid:
try:
_update_agent_id_for_guid(self.guid)
except Exception:
pass
continue
if self._should_retry_auth(response.status_code, error_code) and attempt < max_attempts:
self.clear_tokens() self.clear_tokens()
self.ensure_authenticated() attempt += 1
headers = self.auth_headers() continue
response = self.session.post(url, json=payload, headers=headers, timeout=30)
else:
_log_agent( _log_agent(
"Authenticated request rejected " "Authenticated request rejected "
f"path={path} status={response.status_code} error={error_code or '<unknown>'}" f"path={path} status={response.status_code} error={error_code or '<unknown>'}"
@@ -1675,26 +1756,51 @@ def _normalize_agent_guid(guid: str) -> str:
def _read_agent_guid_from_disk() -> str: def _read_agent_guid_from_disk() -> str:
try: try:
ks_guid = _key_store().load_guid()
if ks_guid:
return _normalize_agent_guid(ks_guid)
path = _agent_guid_path() path = _agent_guid_path()
if os.path.isfile(path): candidates = [path]
with open(path, 'r', encoding='utf-8') as fh: try:
root = _find_project_root()
legacy_candidates = [
os.path.join(root, 'Agent', 'Borealis', 'agent_GUID'),
os.path.join(root, 'Agent', 'Settings', 'agent_GUID'),
]
for candidate in legacy_candidates:
if candidate not in candidates:
candidates.append(candidate)
except Exception:
pass
for candidate in candidates:
if os.path.isfile(candidate):
try:
with open(candidate, 'r', encoding='utf-8') as fh:
value = fh.read() value = fh.read()
return _normalize_agent_guid(value) except Exception:
value = ''
guid = _normalize_agent_guid(value)
if guid:
return guid
except Exception: except Exception:
pass pass
return '' return ''
def _ensure_agent_guid() -> str: def _ensure_agent_guid() -> str:
with _acquire_guid_lock():
guid = _read_agent_guid_from_disk() guid = _read_agent_guid_from_disk()
if guid: if not guid:
try:
ks_guid = _key_store().load_guid()
except Exception:
ks_guid = None
guid = _normalize_agent_guid(ks_guid or '')
if not guid:
guid = str(uuid.uuid4()).upper()
_persist_agent_guid_local_internal(guid, assume_locked=True)
try:
_update_agent_id_for_guid(guid)
except Exception:
pass
return guid return guid
new_guid = str(uuid.uuid4()).upper()
_persist_agent_guid_local(new_guid)
return new_guid
def _compose_agent_id(hostname: str, guid: str, context: str) -> str: def _compose_agent_id(hostname: str, guid: str, context: str) -> str: