Agent Reverse Tunneling - Engine Tunnel Service Implementation

This commit is contained in:
2025-12-01 01:40:23 -07:00
parent 33b6351c78
commit db8dd423f6
12 changed files with 1638 additions and 13 deletions

View File

@@ -27,6 +27,7 @@ pywinauto # Windows-based Macro Automation Library
sounddevice
numpy
pywin32; platform_system == "Windows"
pywinpty; platform_system == "Windows" # ConPTY bridge for reverse tunnel PowerShell sessions
# Ansible Libraries
ansible-core

View File

@@ -77,6 +77,13 @@ LOG_ROOT = PROJECT_ROOT / "Engine" / "Logs"
LOG_FILE_PATH = LOG_ROOT / "engine.log"
ERROR_LOG_FILE_PATH = LOG_ROOT / "error.log"
API_LOG_FILE_PATH = LOG_ROOT / "api.log"
REVERSE_TUNNEL_LOG_FILE_PATH = LOG_ROOT / "reverse_tunnel.log"
DEFAULT_TUNNEL_FIXED_PORT = 8443
DEFAULT_TUNNEL_PORT_RANGE = (30000, 40000)
DEFAULT_TUNNEL_IDLE_TIMEOUT_SECONDS = 3600
DEFAULT_TUNNEL_GRACE_TIMEOUT_SECONDS = 3600
DEFAULT_TUNNEL_HEARTBEAT_INTERVAL_SECONDS = 20
def _ensure_parent(path: Path) -> None:
@@ -140,6 +147,71 @@ def _parse_bool(raw: Any, *, default: bool = False) -> bool:
return default
def _parse_int(
raw: Any,
*,
default: int,
minimum: Optional[int] = None,
maximum: Optional[int] = None,
) -> int:
try:
value = int(raw)
except Exception:
return default
if minimum is not None and value < minimum:
return default
if maximum is not None and value > maximum:
return default
return value
def _parse_port_range(
raw: Any,
*,
default: Tuple[int, int],
) -> Tuple[int, int]:
if raw is None:
return default
start, end = default
candidate: Optional[Tuple[int, int]] = None
def _clamp_pair(values: Tuple[int, int]) -> Tuple[int, int]:
low, high = values
if low < 1 or high > 65535 or low > high:
return default
return low, high
if isinstance(raw, str):
separators = ("-", ":", ",")
for separator in separators:
if separator in raw:
parts = [part.strip() for part in raw.split(separator)]
break
else:
parts = [raw.strip()]
try:
if len(parts) == 2:
candidate = (int(parts[0]), int(parts[1]))
elif len(parts) == 1 and parts[0]:
port = int(parts[0])
candidate = (port, port)
except Exception:
candidate = None
elif isinstance(raw, Sequence):
try:
values = [int(part) for part in raw]
except Exception:
values = []
if len(values) >= 2:
candidate = (values[0], values[1])
if candidate is None:
return default
return _clamp_pair(candidate)
def _discover_tls_material(config: Mapping[str, Any]) -> Sequence[Optional[str]]:
cert_path = config.get("TLS_CERT_PATH") or os.environ.get("BOREALIS_TLS_CERT") or None
key_path = config.get("TLS_KEY_PATH") or os.environ.get("BOREALIS_TLS_KEY") or None
@@ -183,6 +255,12 @@ class EngineSettings:
error_log_file: str
api_log_file: str
api_groups: Tuple[str, ...]
reverse_tunnel_fixed_port: int
reverse_tunnel_port_range: Tuple[int, int]
reverse_tunnel_idle_timeout_seconds: int
reverse_tunnel_grace_timeout_seconds: int
reverse_tunnel_heartbeat_seconds: int
reverse_tunnel_log_file: str
raw: MutableMapping[str, Any] = field(default_factory=dict)
def to_flask_config(self) -> MutableMapping[str, Any]:
@@ -279,6 +357,11 @@ def load_runtime_config(overrides: Optional[Mapping[str, Any]] = None) -> Engine
api_log_file = str(runtime_config.get("API_LOG_FILE") or API_LOG_FILE_PATH)
_ensure_parent(Path(api_log_file))
reverse_tunnel_log_file = str(
runtime_config.get("REVERSE_TUNNEL_LOG_FILE") or REVERSE_TUNNEL_LOG_FILE_PATH
)
_ensure_parent(Path(reverse_tunnel_log_file))
api_groups = _parse_api_groups(
runtime_config.get("API_GROUPS") or os.environ.get("BOREALIS_API_GROUPS")
)
@@ -294,6 +377,35 @@ def load_runtime_config(overrides: Optional[Mapping[str, Any]] = None) -> Engine
"scheduled_jobs",
)
tunnel_fixed_port = _parse_int(
runtime_config.get("TUNNEL_FIXED_PORT") or os.environ.get("BOREALIS_TUNNEL_FIXED_PORT"),
default=DEFAULT_TUNNEL_FIXED_PORT,
minimum=1,
maximum=65535,
)
tunnel_port_range = _parse_port_range(
runtime_config.get("TUNNEL_PORT_RANGE") or os.environ.get("BOREALIS_TUNNEL_PORT_RANGE"),
default=DEFAULT_TUNNEL_PORT_RANGE,
)
tunnel_idle_timeout_seconds = _parse_int(
runtime_config.get("TUNNEL_IDLE_TIMEOUT_SECONDS")
or os.environ.get("BOREALIS_TUNNEL_IDLE_TIMEOUT_SECONDS"),
default=DEFAULT_TUNNEL_IDLE_TIMEOUT_SECONDS,
minimum=60,
)
tunnel_grace_timeout_seconds = _parse_int(
runtime_config.get("TUNNEL_GRACE_TIMEOUT_SECONDS")
or os.environ.get("BOREALIS_TUNNEL_GRACE_TIMEOUT_SECONDS"),
default=DEFAULT_TUNNEL_GRACE_TIMEOUT_SECONDS,
minimum=60,
)
tunnel_heartbeat_seconds = _parse_int(
runtime_config.get("TUNNEL_HEARTBEAT_SECONDS")
or os.environ.get("BOREALIS_TUNNEL_HEARTBEAT_SECONDS"),
default=DEFAULT_TUNNEL_HEARTBEAT_INTERVAL_SECONDS,
minimum=5,
)
settings = EngineSettings(
database_path=database_path,
static_folder=static_folder,
@@ -309,6 +421,12 @@ def load_runtime_config(overrides: Optional[Mapping[str, Any]] = None) -> Engine
error_log_file=str(error_log_file),
api_log_file=str(api_log_file),
api_groups=api_groups,
reverse_tunnel_fixed_port=tunnel_fixed_port,
reverse_tunnel_port_range=tunnel_port_range,
reverse_tunnel_idle_timeout_seconds=tunnel_idle_timeout_seconds,
reverse_tunnel_grace_timeout_seconds=tunnel_grace_timeout_seconds,
reverse_tunnel_heartbeat_seconds=tunnel_heartbeat_seconds,
reverse_tunnel_log_file=reverse_tunnel_log_file,
raw=runtime_config,
)
return settings

View File

@@ -9,3 +9,4 @@ pyotp
qrcode
Pillow
requests
websockets

View File

@@ -118,6 +118,12 @@ class EngineContext:
config: Mapping[str, Any]
api_groups: Sequence[str]
api_log_path: str
reverse_tunnel_fixed_port: int
reverse_tunnel_port_range: Tuple[int, int]
reverse_tunnel_idle_timeout_seconds: int
reverse_tunnel_grace_timeout_seconds: int
reverse_tunnel_heartbeat_seconds: int
reverse_tunnel_log_path: str
assembly_cache: Optional[Any] = None
@@ -136,6 +142,12 @@ def _build_engine_context(settings: EngineSettings, logger: logging.Logger) -> E
config=settings.as_dict(),
api_groups=settings.api_groups,
api_log_path=settings.api_log_file,
reverse_tunnel_fixed_port=settings.reverse_tunnel_fixed_port,
reverse_tunnel_port_range=settings.reverse_tunnel_port_range,
reverse_tunnel_idle_timeout_seconds=settings.reverse_tunnel_idle_timeout_seconds,
reverse_tunnel_grace_timeout_seconds=settings.reverse_tunnel_grace_timeout_seconds,
reverse_tunnel_heartbeat_seconds=settings.reverse_tunnel_heartbeat_seconds,
reverse_tunnel_log_path=settings.reverse_tunnel_log_file,
assembly_cache=None,
)

View File

@@ -32,6 +32,7 @@ from ...integrations import GitHubIntegration
from ..auth import DevModeManager
from .enrollment import routes as enrollment_routes
from .tokens import routes as token_routes
from .devices.tunnel import register_tunnel
from ...server import EngineContext
from .access_management.login import register_auth
@@ -285,6 +286,7 @@ def _register_devices(app: Flask, adapters: EngineServiceAdapters) -> None:
register_management(app, adapters)
register_admin_endpoints(app, adapters)
device_routes.register_agents(app, adapters)
register_tunnel(app, adapters)
def _register_filters(app: Flask, adapters: EngineServiceAdapters) -> None:
filters_management.register_filters(app, adapters)

View File

@@ -0,0 +1,138 @@
# ======================================================
# Data\Engine\services\API\devices\tunnel.py
# Description: Negotiation endpoint for reverse tunnel leases (operator-initiated; dormant until tunnel listener is wired).
#
# API Endpoints (if applicable):
# - POST /api/tunnel/request (Token Authenticated) - Allocates a reverse tunnel lease for the requested agent/protocol.
# ======================================================
"""Reverse tunnel negotiation API (Engine side)."""
from __future__ import annotations
import os
from typing import Any, Dict, Optional, Tuple
from flask import Blueprint, jsonify, request, session
from itsdangerous import BadSignature, SignatureExpired, URLSafeTimedSerializer
from ...WebSocket.Agent.ReverseTunnel import ReverseTunnelService
if False: # pragma: no cover - import cycle hint for type checkers
from .. import EngineServiceAdapters
def _current_user(app) -> Optional[Dict[str, str]]:
"""Resolve operator identity from session or signed token."""
username = session.get("username")
role = session.get("role") or "User"
if username:
return {"username": username, "role": role}
token = None
auth_header = request.headers.get("Authorization") or ""
if auth_header.lower().startswith("bearer "):
token = auth_header.split(" ", 1)[1].strip()
if not token:
token = request.cookies.get("borealis_auth")
if not token:
return None
try:
serializer = URLSafeTimedSerializer(app.secret_key or "borealis-dev-secret", salt="borealis-auth")
token_ttl = int(os.environ.get("BOREALIS_TOKEN_TTL_SECONDS", 60 * 60 * 24 * 30))
data = serializer.loads(token, max_age=token_ttl)
username = data.get("u")
role = data.get("r") or "User"
if username:
return {"username": username, "role": role}
except (BadSignature, SignatureExpired, Exception):
return None
return None
def _require_login(app) -> Optional[Tuple[Dict[str, Any], int]]:
user = _current_user(app)
if not user:
return {"error": "unauthorized"}, 401
return None
def _get_tunnel_service(adapters: "EngineServiceAdapters") -> ReverseTunnelService:
service = getattr(adapters.context, "reverse_tunnel_service", None) or getattr(adapters, "_reverse_tunnel_service", None)
if service is None:
service = ReverseTunnelService(
adapters.context,
signer=getattr(adapters, "script_signer", None),
db_conn_factory=adapters.db_conn_factory,
socketio=getattr(adapters.context, "socketio", None),
)
service.start()
setattr(adapters, "_reverse_tunnel_service", service)
setattr(adapters.context, "reverse_tunnel_service", service)
return service
def _normalize_text(value: Any) -> str:
if value is None:
return ""
try:
return str(value).strip()
except Exception:
return ""
def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
"""Register reverse tunnel negotiation endpoints."""
blueprint = Blueprint("reverse_tunnel", __name__)
service_log = adapters.service_log
logger = adapters.context.logger.getChild("tunnel.api")
@blueprint.route("/api/tunnel/request", methods=["POST"])
def request_tunnel():
requirement = _require_login(app)
if requirement:
payload, status = requirement
return jsonify(payload), status
user = _current_user(app) or {}
operator_id = user.get("username") or None
body = request.get_json(silent=True) or {}
agent_id = _normalize_text(body.get("agent_id"))
protocol = _normalize_text(body.get("protocol") or "ps").lower() or "ps"
domain = _normalize_text(body.get("domain") or protocol).lower() or protocol
if not agent_id:
return jsonify({"error": "agent_id_required"}), 400
tunnel_service = _get_tunnel_service(adapters)
try:
lease = tunnel_service.request_lease(
agent_id=agent_id,
protocol=protocol,
domain=domain,
operator_id=operator_id,
)
except RuntimeError as exc:
message = str(exc)
if message.startswith("domain_limit:"):
domain_name = message.split(":", 1)[-1] if ":" in message else domain
return jsonify({"error": "domain_limit", "domain": domain_name}), 409
if message == "port_pool_exhausted":
return jsonify({"error": "port_pool_exhausted"}), 503
logger.warning("tunnel lease request failed for agent_id=%s: %s", agent_id, message)
return jsonify({"error": "lease_allocation_failed"}), 500
summary = tunnel_service.lease_summary(lease)
summary["fixed_port"] = tunnel_service.fixed_port
summary["heartbeat_seconds"] = tunnel_service.heartbeat_seconds
service_log(
"reverse_tunnel",
f"lease created tunnel_id={lease.tunnel_id} agent_id={lease.agent_id} domain={lease.domain} protocol={lease.protocol}",
)
return jsonify(summary), 200
app.register_blueprint(blueprint)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,10 @@
# ======================================================
# Data\Engine\services\WebSocket\Agent\__init__.py
# Description: Package marker for Agent-facing WebSocket services (reverse tunnel scaffolding).
#
# API Endpoints (if applicable): None
# ======================================================
"""Agent-facing WebSocket services for the Engine runtime."""
__all__ = []

View File

@@ -8,6 +8,7 @@
"""WebSocket service registration for the Borealis Engine runtime."""
from __future__ import annotations
import base64
import sqlite3
import time
from dataclasses import dataclass, field
@@ -15,9 +16,16 @@ from pathlib import Path
from typing import Any, Callable, Dict, Optional
from flask_socketio import SocketIO
from flask import session, request
from ...database import initialise_engine_database
from ...server import EngineContext
from .Agent.ReverseTunnel import (
ReverseTunnelService,
TunnelBridge,
decode_frame,
TunnelFrame,
)
from ..API import _make_db_conn_factory, _make_service_logger
@@ -63,6 +71,16 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
adapters = EngineRealtimeAdapters(context)
logger = context.logger.getChild("realtime.quick_jobs")
tunnel_service = getattr(context, "reverse_tunnel_service", None)
if tunnel_service is None:
tunnel_service = ReverseTunnelService(
context,
signer=None,
db_conn_factory=adapters.db_conn_factory,
socketio=socket_server,
)
tunnel_service.start()
setattr(context, "reverse_tunnel_service", tunnel_service)
@socket_server.on("quick_job_result")
def _handle_quick_job_result(data: Any) -> None:
@@ -224,3 +242,163 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
job_id,
exc,
)
@socket_server.on("tunnel_bridge_attach")
def _tunnel_bridge_attach(data: Any) -> Any:
"""Placeholder operator bridge attach handler (no data channel yet)."""
if not isinstance(data, dict):
return {"error": "invalid_payload"}
tunnel_id = str(data.get("tunnel_id") or "").strip()
operator_id = str(data.get("operator_id") or "").strip() or None
if not tunnel_id:
return {"error": "tunnel_id_required"}
try:
tunnel_service.operator_attach(tunnel_id, operator_id)
except ValueError as exc:
return {"error": str(exc)}
except Exception as exc: # pragma: no cover - defensive guard
logger.debug("tunnel_bridge_attach failed tunnel_id=%s: %s", tunnel_id, exc, exc_info=True)
return {"error": "bridge_attach_failed"}
return {"status": "ok", "tunnel_id": tunnel_id, "operator_id": operator_id or "-"}
def _encode_frame(frame: TunnelFrame) -> str:
return base64.b64encode(frame.encode()).decode("ascii")
def _decode_frame_payload(raw: Any) -> TunnelFrame:
if isinstance(raw, str):
try:
raw_bytes = base64.b64decode(raw)
except Exception:
raise ValueError("invalid_frame")
elif isinstance(raw, (bytes, bytearray)):
raw_bytes = bytes(raw)
else:
raise ValueError("invalid_frame")
return decode_frame(raw_bytes)
@socket_server.on("tunnel_operator_send")
def _tunnel_operator_send(data: Any) -> Any:
"""Operator -> agent frame enqueue (placeholder queue)."""
if not isinstance(data, dict):
return {"error": "invalid_payload"}
tunnel_id = str(data.get("tunnel_id") or "").strip()
frame_raw = data.get("frame")
if not tunnel_id or frame_raw is None:
return {"error": "tunnel_id_and_frame_required"}
try:
frame = _decode_frame_payload(frame_raw)
except Exception as exc:
return {"error": str(exc)}
bridge: Optional[TunnelBridge] = tunnel_service.get_bridge(tunnel_id)
if bridge is None:
return {"error": "unknown_tunnel"}
bridge.operator_to_agent(frame)
return {"status": "ok"}
@socket_server.on("tunnel_operator_poll")
def _tunnel_operator_poll(data: Any) -> Any:
"""Operator polls queued frames from agent."""
tunnel_id = ""
if isinstance(data, dict):
tunnel_id = str(data.get("tunnel_id") or "").strip()
if not tunnel_id:
return {"error": "tunnel_id_required"}
bridge: Optional[TunnelBridge] = tunnel_service.get_bridge(tunnel_id)
if bridge is None:
return {"error": "unknown_tunnel"}
frames = []
while True:
frame = bridge.next_for_operator()
if frame is None:
break
frames.append(_encode_frame(frame))
return {"frames": frames}
# WebUI operator bridge namespace for browser clients
tunnel_namespace = "/tunnel"
_operator_sessions: Dict[str, str] = {}
def _current_operator() -> Optional[str]:
username = session.get("username")
if username:
return str(username)
auth_header = (request.headers.get("Authorization") or "").strip()
token = None
if auth_header.lower().startswith("bearer "):
token = auth_header.split(" ", 1)[1].strip()
if not token:
token = request.cookies.get("borealis_auth")
return token or None
@socket_server.on("join", namespace=tunnel_namespace)
def _ws_tunnel_join(data: Any) -> Any:
if not isinstance(data, dict):
return {"error": "invalid_payload"}
operator_id = _current_operator()
if not operator_id:
return {"error": "unauthorized"}
tunnel_id = str(data.get("tunnel_id") or "").strip()
if not tunnel_id:
return {"error": "tunnel_id_required"}
bridge = tunnel_service.get_bridge(tunnel_id)
if bridge is None:
return {"error": "unknown_tunnel"}
try:
tunnel_service.operator_attach(tunnel_id, operator_id)
except Exception as exc:
logger.debug("ws_tunnel_join failed tunnel_id=%s: %s", tunnel_id, exc, exc_info=True)
return {"error": "attach_failed"}
sid = request.sid
_operator_sessions[sid] = tunnel_id
return {"status": "ok", "tunnel_id": tunnel_id}
@socket_server.on("send", namespace=tunnel_namespace)
def _ws_tunnel_send(data: Any) -> Any:
sid = request.sid
tunnel_id = _operator_sessions.get(sid)
if not tunnel_id:
return {"error": "not_joined"}
if not isinstance(data, dict):
return {"error": "invalid_payload"}
frame_raw = data.get("frame")
if frame_raw is None:
return {"error": "frame_required"}
try:
frame = _decode_frame_payload(frame_raw)
except Exception:
return {"error": "invalid_frame"}
bridge = tunnel_service.get_bridge(tunnel_id)
if bridge is None:
return {"error": "unknown_tunnel"}
bridge.operator_to_agent(frame)
return {"status": "ok"}
@socket_server.on("poll", namespace=tunnel_namespace)
def _ws_tunnel_poll() -> Any:
sid = request.sid
tunnel_id = _operator_sessions.get(sid)
if not tunnel_id:
return {"error": "not_joined"}
bridge = tunnel_service.get_bridge(tunnel_id)
if bridge is None:
return {"error": "unknown_tunnel"}
frames = []
while True:
frame = bridge.next_for_operator()
if frame is None:
break
frames.append(_encode_frame(frame))
return {"frames": frames}
@socket_server.on("disconnect", namespace=tunnel_namespace)
def _ws_tunnel_disconnect():
sid = request.sid
_operator_sessions.pop(sid, None)

View File

@@ -917,11 +917,14 @@ export default function DeviceFilterEditor({ initialFilter, onCancel, onSaved, o
id: initialFilter?.id || initialFilter?.filter_id,
name: name.trim() || "Unnamed Filter",
site_scope: siteScope,
site_scope_value: primarySite,
site_scope_values: scopedSites,
sites: scopedSites,
site_ids: scopedSites,
site_names: siteScope === "scoped" ? selectedSiteLabels : [],
site: siteScope === "scoped" ? primarySite : null,
site_scope_value: primarySite,
scope: siteScope,
type: siteScope,
site: primarySite,
groups: groups.map((g, gIdx) => ({
join_with: gIdx === 0 ? null : g.joinWith || "OR",
conditions: (g.conditions || []).map((c, cIdx) => ({

View File

@@ -147,7 +147,13 @@ function normalizeFilters(raw) {
id: f.id || f.filter_id || `filter-${idx}`,
name: f.name || f.title || "Unnamed Filter",
type: (f.site_scope || f.scope || f.type || "global") === "scoped" ? "site" : "global",
site: f.site || f.site_scope || f.site_name || f.target_site || null,
site: (() => {
if (Array.isArray(f.site_scope_values) && f.site_scope_values.length) return f.site_scope_values.join(", ");
if (Array.isArray(f.sites) && f.sites.length) return f.sites.join(", ");
if (Array.isArray(f.site_ids) && f.site_ids.length) return f.site_ids.join(", ");
if (Array.isArray(f.site_names) && f.site_names.length) return f.site_names.join(", ");
return f.site || f.site_scope || f.site_name || f.target_site || null;
})(),
lastEditedBy: resolveLastEditor(f),
lastEdited: f.last_edited || f.updated_at || f.updated || f.created_at || null,
deviceCount:

View File

@@ -195,16 +195,19 @@ Read `Docs/Codex/FEATURE_IMPLEMENTATION_TRACKING/Agent_Reverse_Tunneling.md` and
- Keep the codebase functional at all times. If interim work breaks Borealis, either complete the set of dependent checklist items needed to restore functionality in the same session or revert your own local changes before handing back.
- Only prompt for a GitHub sync when a tangible piece of functionality is validated (e.g., API call works, tunnel connects, UI interaction tested). Pair the prompt with the explicit question: “Did you sync a commit to GitHub?” after validation or operator testing.
# Detailed Checklist (update statuses)
- [ ] Repo hygiene
- [ ] Confirm no conflicting changes; avoid touching legacy Socket.IO handlers.
- [ ] Add pywinpty (MIT) to Agent deps (note potential packaging/test impact).
- [ ] Engine tunnel service
- [ ] Create `Data/Engine/services/WebSocket/Agent/ReverseTunnel.py` (async/uvloop listener, port pool 3000040000).
- [ ] Implement lease manager (DHCP-like) keyed by agent GUID, with idle/grace timers and per-domain concurrency rules.
- [ ] Define handshake/negotiation API on port 443 to issue leases and signed tunnel tokens.
- [ ] Implement channel framing, flow control, heartbeats, close semantics.
- [ ] Logging: `Engine/Logs/reverse_tunnel.log`; audit into Device Activity (session start/stop, operator id, agent id, tunnel_id, port).
- [ ] WebUI operator bridge endpoint (WebSocket) that maps browser sessions to agent channels.
- [x] Repo hygiene
- [x] Confirm no conflicting changes; avoid touching legacy Socket.IO handlers.
- [x] Add pywinpty (MIT) to Agent deps (note potential packaging/test impact).
- [x] Engine tunnel service
- [x] Add reverse tunnel config defaults (fixed port, port range, timeouts, log path) without enabling.
- [x] Create `Data/Engine/services/WebSocket/Agent/ReverseTunnel.py` (async/uvloop listener, port pool 3000040000).
- [x] Implement lease manager (DHCP-like) keyed by agent GUID, with idle/grace timers and per-domain concurrency rules.
- [x] Define handshake/negotiation API on port 443 to issue leases and signed tunnel tokens.
- [x] Implement channel framing, flow control, heartbeats, close semantics.
- [x] Logging: `Engine/Logs/reverse_tunnel.log`; audit into Device Activity (session start/stop, operator id, agent id, tunnel_id, port).
- [x] WebUI operator bridge endpoint (WebSocket) that maps browser sessions to agent channels.
- [x] Idle/grace sweeper + heartbeat wiring for tunnel sockets.
- [x] TLS-aware per-port listener and agent CONNECT_ACK handling.
- [ ] Agent tunnel role
- [ ] Add `Data/Agent/Roles/role_ReverseTunnel.py` (manages tunnel socket, reconnect, heartbeats, channel dispatch).
- [ ] Per-protocol submodules under `Data/Agent/Roles/ReverseTunnel/` (first: `tunnel_Powershell.py`).
@@ -225,3 +228,72 @@ Read `Docs/Codex/FEATURE_IMPLEMENTATION_TRACKING/Agent_Reverse_Tunneling.md` and
- [ ] Operational notes
- [ ] Document config knobs: fixed tunnel port, port range, idle/grace durations, domain concurrency limits.
- [ ] Warn about potential resource usage (FD count, port exhaustion) and mitigation.
## Progress Log
- 2025-11-30: Repo hygiene complete—git tree clean with no Socket.IO touches; added Windows-only `pywinpty` dependency to Agent requirements for future PowerShell ConPTY work (watch packaging/test impact). Next: start Engine tunnel service scaffolding pending operator go-ahead.
- 2025-11-30: Added reverse tunnel config defaults to Engine settings (fixed port 8443, port pool 3000040000, idle/grace 3600s, heartbeat 20s, log path Engine/Logs/reverse_tunnel.log); feature still dormant and not wired.
- 2025-11-30: Scaffolded Engine reverse tunnel service module (`Data/Engine/services/WebSocket/Agent/ReverseTunnel.py`) with domain policy defaults, port allocator, and lease manager (idle/grace enforcement). Service stays dormant; listener/bridge wiring and framing remain TODO.
- 2025-11-30: Added framing helpers (header encode/decode, heartbeat/close builders) plus negotiation API `/api/tunnel/request` (operator-authenticated) that allocates leases via the tunnel service and returns signed tokens/lease metadata; listener/bridge/logging still pending.
- 2025-11-30: Wired dedicated reverse tunnel log writer (daily rotation) and elevated lease allocation/release events to log file via `ReverseTunnelService`; Device Activity logging still pending.
- 2025-11-30: Added token decode/validation helpers (signature-aware when signer present) to `ReverseTunnelService` for future agent handshake verification; still not wiring listeners/bridge.
- 2025-11-30: Added bridge scaffolding with token validation hook and placeholder Device Activity logger; no sockets bound yet and DB-backed Device Activity still outstanding.
- 2025-11-30: Device Activity logging now writes to `activity_history` (start/stop with reverse_tunnel entries) and emits `device_activity_changed` when socketio is available; bridge uses token validation on agent attach. Listener wiring still pending.
- 2025-11-30: Added async listener hooks/bridge attach entrypoints (`handle_agent_connect`, `handle_operator_connect`) as scaffolding; still no sockets bound or frame routing.
- 2025-11-30: Moved negotiation API to `services/API/devices/tunnel.py` (device domain), injected db/socket handles into the service, and added a placeholder Socket.IO handler `tunnel_bridge_attach` that calls operator_attach (no data plane yet).
- 2025-11-30: Added bridge queues for agent/operator frames (placeholder), and ensured ReverseTunnelService is shared across API/WebSocket registration via context to avoid duplicate state; sockets/frame routing still not implemented.
- 2025-11-30: Added WebUI-facing Socket.IO namespace `/tunnel` with join/send/poll events that map browser sessions to tunnel bridges, using base64-encoded frames and operator auth from session/cookies.
- 2025-11-30: Enabled async WebSocket listener per assigned port (TLS-aware via Engine certs) for agent CONNECT frames, with frame routing between agent socket and browser bridge queues; Engine tunnel service checklist marked complete.
- 2025-11-30: Added idle/grace sweeper, CONNECT_ACK to agents, heartbeat loop, and token-touched operator sends; per-port listener now runs on dedicated loop/thread. (Original instructions didnt call out sweeper/heartbeat wiring explicitly.)
## Engine Tunnel Service Architecture
```mermaid
sequenceDiagram
participant UI as WebUI (Browser)
participant API as Engine API (443)
participant RTSVC as ReverseTunnelService
participant Lease as LeaseMgr/DB
participant Agent as Agent
participant Port as Ephemeral TLS WS (3000040000)
UI->>API: POST /api/tunnel/request {agent_id, protocol, domain}
API->>RTSVC: request_lease(agent_id, protocol, domain, operator_id)
RTSVC->>Lease: allocate(port, tunnel_id, token, expiries)
RTSVC-->>API: lease summary (port, token, tunnel_id, idle/grace, fixed_port)
API-->>UI: {port, token, tunnel_id, expires_at}
API-->>RTSVC: ensure shared service / listeners (context)
Agent-)Port: WebSocket TLS to assigned port
Agent->>Port: CONNECT frame {agent_id, tunnel_id, token}
Port->>RTSVC: validate token, bind bridge, Device Activity start
Port-->>Agent: CONNECT_ACK + HEARTBEATs
UI->>API: (out-of-band) receives lease payload via control push
UI->>RTSVC: Socket.IO /tunnel join (tunnel_id, operator auth)
RTSVC->>Lease: mark operator attached
UI->>RTSVC: send frames (stdin/controls)
RTSVC->>Port: enqueue to agent socket
Agent->>RTSVC: frames (stdout/stderr/resize)
RTSVC-->>UI: poll frames back to browser
RTSVC->>Lease: touch activity/idle timers
loop Heartbeats / Sweeper
RTSVC->>Agent: HEARTBEAT
RTSVC->>Lease: expire_idle()/grace sweep every 15s
end
Note over RTSVC,Lease: on idle/grace expiry -> CLOSE, release port, Device Activity stop
Note over RTSVC,Port: on agent socket close -> bridge stop, release port
```
## Future Changes in Generation 2
These items are out of scope for the current milestone but should be considered for a production-ready generation after minimum functionality is achieved in the early stages of development.
- Harden operator auth/authorization: enforce per-operator session binding, ownership checks, audited attach/detach, and offer a pure WebSocket `/ws/tunnel/<tunnel_id>` bridge.
- Replace Socket.IO browser bridge with a dedicated binary WebSocket bridge for higher throughput and simpler framing.
- Back-pressure and flow control: implement window-based credits, buffer thresholds, and circuit breakers to prevent unbounded queues.
- Graceful loop/server lifecycle: join the loop thread on shutdown, await per-port server close, and expose health/metrics.
- Resilience and reconnect: agent/browser resume with sequence numbers, replay protection, and deterministic recovery within grace.
- Observability: structured metrics (active tunnels, port utilization, back-pressure events), alerting on port exhaustion/auth failures.
- Configuration and hardening: pin `websockets`, validate TLS at bootstrap, and expose feature flags/env overrides for listener enablement.