Handle missing devices and relax agent auth retries

This commit is contained in:
2025-10-18 02:52:15 -06:00
parent ebf0fe9236
commit 775d365512
3 changed files with 173 additions and 20 deletions

View File

@@ -23,7 +23,7 @@ import ssl
import threading import threading
import contextlib import contextlib
import errno import errno
from typing import Any, Dict, Optional, List, Callable from typing import Any, Dict, Optional, List, Callable, Tuple
import requests import requests
try: try:
@@ -1007,10 +1007,22 @@ class AgentHttpClient:
timeout=20, timeout=20,
) )
if resp.status_code in (401, 403): if resp.status_code in (401, 403):
_log_agent("Refresh token rejected; re-enrolling", fname="agent.error.log") error_code, snippet = self._error_details(resp)
self._clear_tokens_locked() if resp.status_code == 401 and self._should_retry_auth(resp.status_code, error_code):
self._perform_enrollment_locked() _log_agent(
return "Refresh token rejected; attempting re-enrollment"
f" error={error_code or '<unknown>'}",
fname="agent.error.log",
)
self._clear_tokens_locked()
self._perform_enrollment_locked()
return
_log_agent(
"Refresh token request forbidden "
f"status={resp.status_code} error={error_code or '<unknown>'}"
f" body_snippet={snippet}",
fname="agent.error.log",
)
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
access_token = data.get("access_token") access_token = data.get("access_token")
@@ -1036,6 +1048,33 @@ class AgentHttpClient:
self.guid = self.key_store.load_guid() self.guid = self.key_store.load_guid()
self.session.headers.pop("Authorization", None) self.session.headers.pop("Authorization", None)
def _error_details(self, response: requests.Response) -> Tuple[Optional[str], str]:
error_code: Optional[str] = None
snippet = ""
try:
snippet = response.text[:256]
except Exception:
snippet = "<unavailable>"
try:
data = response.json()
except Exception:
data = None
if isinstance(data, dict):
for key in ("error", "code", "status"):
value = data.get(key)
if isinstance(value, str) and value.strip():
error_code = value.strip()
break
return error_code, snippet
def _should_retry_auth(self, status_code: int, error_code: Optional[str]) -> bool:
if status_code == 401:
return True
retryable_forbidden = {"fingerprint_mismatch"}
if status_code == 403 and error_code in retryable_forbidden:
return True
return False
def _resolve_installer_code(self) -> str: def _resolve_installer_code(self) -> str:
if INSTALLER_CODE_OVERRIDE: if INSTALLER_CODE_OVERRIDE:
return INSTALLER_CODE_OVERRIDE return INSTALLER_CODE_OVERRIDE
@@ -1068,20 +1107,19 @@ class AgentHttpClient:
headers = self.auth_headers() headers = self.auth_headers()
response = self.session.post(url, json=payload, headers=headers, timeout=30) response = self.session.post(url, json=payload, headers=headers, timeout=30)
if response.status_code in (401, 403) and require_auth: if response.status_code in (401, 403) and require_auth:
snippet = "" error_code, snippet = self._error_details(response)
try: if self._should_retry_auth(response.status_code, error_code):
snippet = response.text[:256] self.clear_tokens()
except Exception: self.ensure_authenticated()
snippet = "<unavailable>" headers = self.auth_headers()
_log_agent( response = self.session.post(url, json=payload, headers=headers, timeout=30)
"Authenticated request rejected " else:
f"path={path} status={response.status_code} body_snippet={snippet}", _log_agent(
fname="agent.error.log", "Authenticated request rejected "
) f"path={path} status={response.status_code} error={error_code or '<unknown>'}"
self.clear_tokens() f" body_snippet={snippet}",
self.ensure_authenticated() fname="agent.error.log",
headers = self.auth_headers() )
response = self.session.post(url, json=payload, headers=headers, timeout=30)
response.raise_for_status() response.raise_for_status()
if response.headers.get("Content-Type", "").lower().startswith("application/json"): if response.headers.get("Content-Type", "").lower().startswith("application/json"):
return response.json() return response.json()

View File

@@ -1,7 +1,10 @@
from __future__ import annotations from __future__ import annotations
import functools import functools
import sqlite3
import time
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Callable, Dict, Optional from typing import Any, Callable, Dict, Optional
import jwt import jwt
@@ -98,6 +101,9 @@ class DeviceAuthManager:
(guid,), (guid,),
) )
row = cur.fetchone() row = cur.fetchone()
if not row:
row = self._recover_device_record(conn, guid, fingerprint, token_version)
finally: finally:
conn.close() conn.close()
@@ -147,6 +153,102 @@ class DeviceAuthManager:
) )
return ctx return ctx
def _recover_device_record(
self,
conn: sqlite3.Connection,
guid: str,
fingerprint: str,
token_version: int,
) -> Optional[tuple]:
"""Attempt to recreate a missing device row for an authenticated token."""
guid = (guid or "").strip()
fingerprint = (fingerprint or "").strip()
if not guid or not fingerprint:
return None
cur = conn.cursor()
now_ts = int(time.time())
try:
now_iso = datetime.now(tz=timezone.utc).isoformat()
except Exception:
now_iso = datetime.utcnow().isoformat() # pragma: no cover
base_hostname = f"RECOVERED-{guid[:12].upper()}" if guid else "RECOVERED"
for attempt in range(6):
hostname = base_hostname if attempt == 0 else f"{base_hostname}-{attempt}"
try:
cur.execute(
"""
INSERT INTO devices (
guid,
hostname,
created_at,
last_seen,
ssl_key_fingerprint,
token_version,
status,
key_added_at
)
VALUES (?, ?, ?, ?, ?, ?, 'active', ?)
""",
(
guid,
hostname,
now_ts,
now_ts,
fingerprint,
max(token_version or 1, 1),
now_iso,
),
)
except sqlite3.IntegrityError as exc:
# Hostname collision try again with a suffixed placeholder.
message = str(exc).lower()
if "hostname" in message and "unique" in message:
continue
self._log(
"server",
f"device auth failed to recover guid={guid} due to integrity error: {exc}",
)
conn.rollback()
return None
except Exception as exc: # pragma: no cover - defensive logging
self._log(
"server",
f"device auth unexpected error recovering guid={guid}: {exc}",
)
conn.rollback()
return None
else:
conn.commit()
break
else:
# Exhausted attempts because of hostname collisions.
self._log(
"server",
f"device auth could not recover guid={guid}; hostname collisions persisted",
)
conn.rollback()
return None
cur.execute(
"""
SELECT guid, ssl_key_fingerprint, token_version, status
FROM devices
WHERE guid = ?
""",
(guid,),
)
row = cur.fetchone()
if not row:
self._log(
"server",
f"device auth recovery for guid={guid} committed but row still missing",
)
return row
def require_device_auth(manager: DeviceAuthManager): def require_device_auth(manager: DeviceAuthManager):
def decorator(func): def decorator(func):

View File

@@ -93,7 +93,20 @@ def register(
except DPoPVerificationError: except DPoPVerificationError:
return jsonify({"error": "dpop_invalid"}), 400 return jsonify({"error": "dpop_invalid"}), 400
elif stored_jkt: elif stored_jkt:
return jsonify({"error": "dpop_required"}), 400 # The agent does not yet emit DPoP proofs; allow recovery by clearing
# the stored binding so refreshes can succeed. This preserves
# backward compatibility while the client gains full DPoP support.
try:
app.logger.warning(
"Clearing stored DPoP binding for guid=%s due to missing proof",
guid,
)
except Exception:
pass
cur.execute(
"UPDATE refresh_tokens SET dpop_jkt = NULL WHERE id = ?",
(record_id,),
)
new_access_token = jwt_service.issue_access_token( new_access_token = jwt_service.issue_access_token(
guid, guid,