More VPN Tunnel Changes

This commit is contained in:
2026-01-11 20:53:09 -07:00
parent df14a1e26a
commit 3809fd25fb
13 changed files with 593 additions and 51 deletions

View File

@@ -24,9 +24,9 @@ ROLE_CONTEXTS = ["system"]
def _log_path() -> Path:
root = Path(__file__).resolve().parents[2] / "Logs"
root = Path(__file__).resolve().parents[2] / "Logs" / "VPN_Tunnel"
root.mkdir(parents=True, exist_ok=True)
return root / "reverse_tunnel.log"
return root / "remote_shell.log"
def _write_log(message: str) -> None:
@@ -61,8 +61,13 @@ class ShellSession:
self.address = address
self.proc: Optional[subprocess.Popen] = None
self._stop = threading.Event()
self.input_messages = 0
self.input_bytes = 0
self.output_lines = 0
self.output_bytes = 0
def start(self) -> None:
_write_log(f"Shell session starting for {self.address[0]}:{self.address[1]}")
self.proc = subprocess.Popen(
["powershell.exe", "-NoLogo", "-NoProfile", "-NoExit", "-Command", "-"],
stdin=subprocess.PIPE,
@@ -82,16 +87,27 @@ class ShellSession:
chunk = self.proc.stdout.readline()
if not chunk:
break
self.output_lines += 1
self.output_bytes += len(chunk)
payload = json.dumps({"type": "stdout", "data": _b64encode(chunk)})
self.conn.sendall(payload.encode("utf-8") + b"\n")
except Exception:
pass
try:
self.conn.sendall(payload.encode("utf-8") + b"\n")
except Exception as exc:
_write_log(f"Shell stdout send failed: {exc}")
break
_write_log(f"Shell stdout forwarded bytes={len(chunk)}")
except Exception as exc:
_write_log(f"Shell stdout error: {exc}")
def _writer_loop(self) -> None:
buffer = b""
try:
while not self._stop.is_set():
data = self.conn.recv(4096)
try:
data = self.conn.recv(4096)
except Exception as exc:
_write_log(f"Shell stdin recv error: {exc}")
break
if not data:
break
buffer += data
@@ -107,12 +123,17 @@ class ShellSession:
payload = msg.get("data") or ""
if self.proc and self.proc.stdin:
try:
self.proc.stdin.write(_b64decode(str(payload)))
decoded = _b64decode(str(payload))
self.proc.stdin.write(decoded)
self.proc.stdin.flush()
self.input_messages += 1
self.input_bytes += len(decoded)
_write_log(f"Shell stdin received bytes={len(decoded)}")
except Exception:
pass
_write_log("Shell stdin write failed.")
if msg.get("type") == "close":
self._stop.set()
_write_log("Shell close requested by engine.")
break
finally:
self.close()
@@ -128,6 +149,14 @@ class ShellSession:
self.proc.terminate()
except Exception:
pass
_write_log(
"Shell session closed inputs={0} input_bytes={1} output_lines={2} output_bytes={3}".format(
self.input_messages,
self.input_bytes,
self.output_lines,
self.output_bytes,
)
)
class ShellServer:

View File

@@ -9,7 +9,7 @@
This role prepares the WireGuard client config, manages a single active
session, enforces idle teardown, and logs lifecycle events to
Agent/Logs/reverse_tunnel.log. It binds to Engine Socket.IO events
Agent/Logs/VPN_Tunnel/tunnel.log. It binds to Engine Socket.IO events
(`vpn_tunnel_start`, `vpn_tunnel_stop`, `vpn_tunnel_activity`) to start/stop
the client session with the issued config/token.
"""
@@ -44,9 +44,9 @@ ROLE_CONTEXTS = ["system"]
def _log_path() -> Path:
root = Path(__file__).resolve().parents[2] / "Logs"
root = Path(__file__).resolve().parents[2] / "Logs" / "VPN_Tunnel"
root.mkdir(parents=True, exist_ok=True)
return root / "reverse_tunnel.log"
return root / "tunnel.log"
def _write_log(message: str) -> None:
@@ -303,11 +303,15 @@ class Role:
hooks = getattr(ctx, "hooks", {}) or {}
self._log_hook = hooks.get("log_agent")
self._http_client_factory = hooks.get("http_client")
try:
self.client.stop_session(reason="agent_startup", ignore_missing=True)
except Exception:
self._log("Failed to preflight WireGuard session cleanup.", error=True)
def _log(self, message: str, *, error: bool = False) -> None:
if callable(self._log_hook):
try:
self._log_hook(message, fname="reverse_tunnel.log")
self._log_hook(message, fname="VPN_Tunnel/tunnel.log")
if error:
self._log_hook(message, fname="agent.error.log")
except Exception:

View File

@@ -540,6 +540,9 @@ def _log_agent(message: str, fname: str = 'agent.log', *, scope: Optional[str] =
os.makedirs(log_dir, exist_ok=True)
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
path = os.path.join(log_dir, fname)
parent = os.path.dirname(path)
if parent:
os.makedirs(parent, exist_ok=True)
_rotate_daily(path)
line = _format_agent_log_message(message, fname, scope)
with open(path, 'a', encoding='utf-8') as fh:

View File

@@ -77,7 +77,7 @@ LOG_ROOT = PROJECT_ROOT / "Engine" / "Logs"
LOG_FILE_PATH = LOG_ROOT / "engine.log"
ERROR_LOG_FILE_PATH = LOG_ROOT / "error.log"
API_LOG_FILE_PATH = LOG_ROOT / "api.log"
VPN_TUNNEL_LOG_FILE_PATH = LOG_ROOT / "reverse_tunnel.log"
VPN_TUNNEL_LOG_FILE_PATH = LOG_ROOT / "VPN_Tunnel" / "tunnel.log"
DEFAULT_WIREGUARD_PORT = 30000
DEFAULT_WIREGUARD_ENGINE_VIRTUAL_IP = "10.255.0.1/32"
DEFAULT_WIREGUARD_PEER_NETWORK = "10.255.0.0/24"

View File

@@ -129,6 +129,10 @@ def _make_service_logger(base: Path, logger: logging.Logger) -> Callable[[str, s
try:
base.mkdir(parents=True, exist_ok=True)
path = base / f"{service}.log"
try:
path.parent.mkdir(parents=True, exist_ok=True)
except Exception:
pass
_rotate_daily(path)
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
resolved_scope = _infer_server_scope(msg, scope)

View File

@@ -5,6 +5,7 @@
# API Endpoints (if applicable):
# - POST /api/tunnel/connect (Token Authenticated) - Issues VPN session material for an agent.
# - GET /api/tunnel/status (Token Authenticated) - Returns VPN status for an agent.
# - GET /api/tunnel/active (Token Authenticated) - Lists active VPN tunnel sessions.
# - DELETE /api/tunnel/disconnect (Token Authenticated) - Tears down VPN session for an agent.
# ======================================================
@@ -120,6 +121,21 @@ def _infer_endpoint_host(req) -> str:
def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
blueprint = Blueprint("vpn_tunnel", __name__)
logger = adapters.context.logger.getChild("vpn_tunnel.api")
service_log = adapters.service_log
def _service_log_event(message: str, *, level: str = "INFO") -> None:
if not callable(service_log):
return
try:
service_log("VPN_Tunnel/tunnel", message, level=level)
except Exception:
logger.debug("vpn_tunnel service log write failed", exc_info=True)
def _request_remote() -> str:
forwarded = (request.headers.get("X-Forwarded-For") or "").strip()
if forwarded:
return forwarded.split(",")[0].strip()
return (request.remote_addr or "").strip()
@blueprint.route("/api/tunnel/connect", methods=["POST"])
def connect_tunnel():
@@ -139,15 +155,37 @@ def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
try:
tunnel_service = _get_tunnel_service(adapters)
endpoint_host = _infer_endpoint_host(request)
_service_log_event(
"vpn_api_connect_request agent_id={0} operator={1} endpoint_host={2} remote={3}".format(
agent_id,
operator_id or "-",
endpoint_host or "-",
_request_remote() or "-",
)
)
payload = tunnel_service.connect(
agent_id=agent_id,
operator_id=operator_id,
endpoint_host=endpoint_host,
)
except Exception as exc:
_service_log_event(
"vpn_api_connect_failed agent_id={0} operator={1} error={2}".format(
agent_id,
operator_id or "-",
str(exc),
),
level="ERROR",
)
logger.warning("vpn connect failed for agent_id=%s: %s", agent_id, exc)
return jsonify({"error": "connect_failed", "detail": str(exc)}), 500
_service_log_event(
"vpn_api_connect_response agent_id={0} tunnel_id={1} status=ok".format(
payload.get("agent_id", agent_id),
payload.get("tunnel_id", "-"),
)
)
return jsonify(payload), 200
@blueprint.route("/api/tunnel/status", methods=["GET"])
@@ -163,18 +201,51 @@ def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
tunnel_service = _get_tunnel_service(adapters)
payload = tunnel_service.status(agent_id)
bump = _normalize_text(request.args.get("bump") or "")
_service_log_event(
"vpn_api_status_request agent_id={0} bump={1} remote={2}".format(
agent_id,
"true" if bump else "false",
_request_remote() or "-",
)
)
if not payload:
_service_log_event(
"vpn_api_status_response agent_id={0} status=down".format(agent_id)
)
return jsonify({"status": "down", "agent_id": agent_id}), 200
payload["status"] = "up"
bump = _normalize_text(request.args.get("bump") or "")
if bump:
tunnel_service.bump_activity(agent_id)
_service_log_event(
"vpn_api_status_response agent_id={0} status=up tunnel_id={1}".format(
agent_id,
payload.get("tunnel_id", "-"),
)
)
return jsonify(payload), 200
@blueprint.route("/api/tunnel/connect/status", methods=["GET"])
def tunnel_connect_status():
return tunnel_status()
@blueprint.route("/api/tunnel/active", methods=["GET"])
def tunnel_active():
requirement = _require_login(app)
if requirement:
payload, status = requirement
return jsonify(payload), status
tunnel_service = _get_tunnel_service(adapters)
sessions = list(tunnel_service.list_sessions())
_service_log_event(
"vpn_api_active_response count={0} remote={1}".format(
len(sessions),
_request_remote() or "-",
)
)
return jsonify({"count": len(sessions), "tunnels": sessions}), 200
@blueprint.route("/api/tunnel/disconnect", methods=["DELETE"])
def disconnect_tunnel():
requirement = _require_login(app)
@@ -188,6 +259,15 @@ def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
reason = _normalize_text(body.get("reason") or "operator_stop")
tunnel_service = _get_tunnel_service(adapters)
_service_log_event(
"vpn_api_disconnect_request agent_id={0} tunnel_id={1} reason={2} operator={3} remote={4}".format(
agent_id or "-",
tunnel_id or "-",
reason or "-",
(_current_user(app) or {}).get("username") or "-",
_request_remote() or "-",
)
)
stopped = False
if tunnel_id:
stopped = tunnel_service.disconnect_by_tunnel(tunnel_id, reason=reason)
@@ -197,8 +277,21 @@ def register_tunnel(app, adapters: "EngineServiceAdapters") -> None:
return jsonify({"error": "agent_id_required"}), 400
if not stopped:
_service_log_event(
"vpn_api_disconnect_not_found agent_id={0} tunnel_id={1}".format(
agent_id or "-",
tunnel_id or "-",
),
level="WARNING",
)
return jsonify({"error": "not_found"}), 404
_service_log_event(
"vpn_api_disconnect_response agent_id={0} tunnel_id={1} status=stopped".format(
agent_id or "-",
tunnel_id or "-",
)
)
return jsonify({"status": "stopped", "reason": reason}), 200
app.register_blueprint(blueprint)

View File

@@ -15,6 +15,7 @@ import json
import threading
import time
import uuid
from datetime import datetime, timezone
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple
@@ -66,6 +67,7 @@ class VpnTunnelService:
self._sessions_by_tunnel: Dict[str, VpnSession] = {}
self._engine_ip = ipaddress.ip_interface(context.wireguard_engine_virtual_ip)
self._peer_network = ipaddress.ip_network(context.wireguard_peer_network, strict=False)
self._cleanup_listener()
self._idle_thread = threading.Thread(target=self._idle_loop, daemon=True)
self._idle_thread.start()
@@ -79,6 +81,15 @@ class VpnTunnelService:
if session.last_activity + self.idle_seconds <= now:
expired.append(session)
for session in expired:
self._service_log_event(
"vpn_tunnel_idle_timeout agent_id={0} tunnel_id={1} last_activity={2} last_activity_iso={3} idle_seconds={4}".format(
session.agent_id,
session.tunnel_id,
int(session.last_activity),
self._ts_to_iso(session.last_activity),
self.idle_seconds,
)
)
self.disconnect(session.agent_id, reason="idle_timeout")
def _allocate_virtual_ip(self, agent_id: str) -> str:
@@ -200,13 +211,27 @@ class VpnTunnelService:
return f"[{host}]"
return host
def _ts_to_iso(self, ts: float) -> str:
try:
return datetime.fromtimestamp(ts, timezone.utc).isoformat()
except Exception:
return ""
def _service_log_event(self, message: str, *, level: str = "INFO") -> None:
if not callable(self.service_log):
return
try:
self.service_log("reverse_tunnel", message, level=level)
self.service_log("VPN_Tunnel/tunnel", message, level=level)
except Exception:
self.logger.debug("Failed to write reverse_tunnel service log entry", exc_info=True)
self.logger.debug("Failed to write vpn_tunnel service log entry", exc_info=True)
def _cleanup_listener(self) -> None:
try:
self.wg.stop_listener(ignore_missing=True)
self._service_log_event("vpn_listener_cleanup reason=startup")
except Exception:
self.logger.debug("Failed to clean up WireGuard listener on startup.", exc_info=True)
self._service_log_event("vpn_listener_cleanup_failed reason=startup", level="WARNING")
def _refresh_listener(self) -> None:
peers: List[Mapping[str, object]] = []
@@ -220,8 +245,11 @@ class VpnTunnelService:
peer["public_key"] = session.client_public_key
peers.append(peer)
if not peers:
self._service_log_event("vpn_listener_stop reason=no_peers")
self.wg.stop_listener()
return
agent_list = ",".join(str(peer.get("agent_id", "")) for peer in peers if peer.get("agent_id"))
self._service_log_event("vpn_listener_start peers={0} agents={1}".format(len(peers), agent_list))
self.wg.start_listener(peers)
def connect(
@@ -233,6 +261,14 @@ class VpnTunnelService:
) -> Mapping[str, Any]:
now = time.time()
normalized_host = self._normalize_endpoint_host(endpoint_host)
operator_text = operator_id or "-"
self._service_log_event(
"vpn_tunnel_connect_request agent_id={0} operator={1} endpoint_host={2}".format(
agent_id or "-",
operator_text,
normalized_host or "-",
)
)
with self._lock:
existing = self._sessions_by_agent.get(agent_id)
if existing:
@@ -241,7 +277,18 @@ class VpnTunnelService:
if normalized_host and not existing.endpoint_host:
existing.endpoint_host = normalized_host
existing.last_activity = now
previous_expiry = existing.expires_at
self._ensure_token(existing, now=now)
refreshed = existing.expires_at != previous_expiry
operator_list = ",".join(sorted(filter(None, existing.operator_ids))) or "-"
self._service_log_event(
"vpn_tunnel_session_reuse agent_id={0} tunnel_id={1} operators={2} token_refreshed={3}".format(
existing.agent_id,
existing.tunnel_id,
operator_list,
str(refreshed).lower(),
)
)
return self._session_payload(existing)
tunnel_id = uuid.uuid4().hex
@@ -250,6 +297,7 @@ class VpnTunnelService:
client_private, client_public = self._generate_client_keys()
token = self._issue_token(agent_id, tunnel_id, now + 300)
self.wg.require_orchestration_token(token)
token_signed = "signature" in token
session = VpnSession(
tunnel_id=tunnel_id,
@@ -270,6 +318,16 @@ class VpnTunnelService:
self._sessions_by_tunnel[tunnel_id] = session
try:
self._service_log_event(
"vpn_tunnel_session_create agent_id={0} tunnel_id={1} virtual_ip={2} allowed_ports={3} token_signed={4} token_expires={5}".format(
session.agent_id,
session.tunnel_id,
session.virtual_ip,
",".join(str(p) for p in allowed_ports),
str(bool(token_signed)).lower(),
int(session.expires_at),
)
)
self._refresh_listener()
peer = self.wg.build_peer_profile(
@@ -279,7 +337,18 @@ class VpnTunnelService:
)
rule_names = self.wg.apply_firewall_rules(peer)
session.firewall_rules = rule_names
self._service_log_event(
"vpn_tunnel_firewall_applied agent_id={0} tunnel_id={1} rules={2}".format(
session.agent_id,
session.tunnel_id,
len(rule_names),
)
)
except Exception:
self._service_log_event(
"vpn_tunnel_connect_failed agent_id={0} tunnel_id={1}".format(agent_id, tunnel_id),
level="ERROR",
)
with self._lock:
self._sessions_by_agent.pop(agent_id, None)
self._sessions_by_tunnel.pop(tunnel_id, None)
@@ -312,6 +381,11 @@ class VpnTunnelService:
return None
return self._session_payload(session, include_token=False)
def list_sessions(self) -> List[Mapping[str, Any]]:
with self._lock:
sessions = sorted(self._sessions_by_agent.values(), key=lambda s: s.agent_id)
return [self._session_summary(session) for session in sessions]
def session_payload(self, agent_id: str, *, include_token: bool = True) -> Optional[Mapping[str, Any]]:
with self._lock:
session = self._sessions_by_agent.get(agent_id)
@@ -324,7 +398,14 @@ class VpnTunnelService:
def request_agent_start(self, agent_id: str) -> Optional[Mapping[str, Any]]:
payload = self.session_payload(agent_id, include_token=True)
if not payload:
self._service_log_event("vpn_tunnel_agent_start_missing agent_id={0}".format(agent_id or "-"))
return None
self._service_log_event(
"vpn_tunnel_agent_start_emit agent_id={0} tunnel_id={1}".format(
payload.get("agent_id", "-"),
payload.get("tunnel_id", "-"),
)
)
self._emit_start(payload)
return payload
@@ -333,7 +414,18 @@ class VpnTunnelService:
session = self._sessions_by_agent.get(agent_id)
if not session:
return
session.last_activity = time.time()
now = time.time()
previous = session.last_activity
session.last_activity = now
idle_for = now - previous
if idle_for >= 60:
self._service_log_event(
"vpn_tunnel_activity_bump agent_id={0} tunnel_id={1} idle_for={2}".format(
session.agent_id,
session.tunnel_id,
int(idle_for),
)
)
try:
if self.socketio:
self.socketio.emit("vpn_tunnel_activity", {"agent_id": agent_id}, namespace="/")
@@ -344,6 +436,9 @@ class VpnTunnelService:
with self._lock:
session = self._sessions_by_agent.pop(agent_id, None)
if not session:
self._service_log_event(
"vpn_tunnel_disconnect_missing agent_id={0} reason={1}".format(agent_id or "-", reason or "-")
)
return False
self._sessions_by_tunnel.pop(session.tunnel_id, None)
@@ -370,6 +465,9 @@ class VpnTunnelService:
with self._lock:
session = self._sessions_by_tunnel.get(tunnel_id)
if not session:
self._service_log_event(
"vpn_tunnel_disconnect_missing tunnel_id={0} reason={1}".format(tunnel_id or "-", reason or "-")
)
return False
return self.disconnect(session.agent_id, reason=reason)
@@ -383,13 +481,27 @@ class VpnTunnelService:
if agent_id and callable(emit_agent):
try:
if emit_agent(agent_id, "vpn_tunnel_start", payload):
self._service_log_event(
"vpn_tunnel_start_emit agent_id={0} transport=direct".format(agent_id or "-")
)
return
except Exception:
self.logger.debug("emit_agent_event failed for vpn_tunnel_start", exc_info=True)
self._service_log_event(
"vpn_tunnel_start_emit_failed agent_id={0} transport=direct".format(agent_id or "-"),
level="WARNING",
)
try:
self._service_log_event(
"vpn_tunnel_start_emit agent_id={0} transport=broadcast".format(agent_id or "-")
)
self.socketio.emit("vpn_tunnel_start", payload, namespace="/")
except Exception:
self.logger.debug("vpn_tunnel_start emit failed", exc_info=True)
self._service_log_event(
"vpn_tunnel_start_emit_failed agent_id={0} transport=broadcast".format(agent_id or "-"),
level="WARNING",
)
def _emit_stop(self, session: VpnSession, reason: str) -> None:
if not self.socketio:
@@ -402,10 +514,29 @@ class VpnTunnelService:
"vpn_tunnel_stop",
{"agent_id": session.agent_id, "tunnel_id": session.tunnel_id, "reason": reason},
):
self._service_log_event(
"vpn_tunnel_stop_emit agent_id={0} tunnel_id={1} transport=direct".format(
session.agent_id,
session.tunnel_id,
)
)
return
except Exception:
self.logger.debug("emit_agent_event failed for vpn_tunnel_stop", exc_info=True)
self._service_log_event(
"vpn_tunnel_stop_emit_failed agent_id={0} tunnel_id={1} transport=direct".format(
session.agent_id,
session.tunnel_id,
),
level="WARNING",
)
try:
self._service_log_event(
"vpn_tunnel_stop_emit agent_id={0} tunnel_id={1} transport=broadcast".format(
session.agent_id,
session.tunnel_id,
)
)
self.socketio.emit(
"vpn_tunnel_stop",
{"agent_id": session.agent_id, "tunnel_id": session.tunnel_id, "reason": reason},
@@ -413,6 +544,13 @@ class VpnTunnelService:
)
except Exception:
self.logger.debug("vpn_tunnel_stop emit failed", exc_info=True)
self._service_log_event(
"vpn_tunnel_stop_emit_failed agent_id={0} tunnel_id={1} transport=broadcast".format(
session.agent_id,
session.tunnel_id,
),
level="WARNING",
)
def _log_device_activity(self, session: VpnSession, *, event: str, reason: Optional[str] = None) -> None:
if self.db_conn_factory is None:
@@ -573,3 +711,24 @@ class VpnTunnelService:
if include_token:
payload["token"] = session.token
return payload
def _session_summary(self, session: VpnSession) -> Mapping[str, Any]:
endpoint_host = session.endpoint_host or str(self._engine_ip.ip)
endpoint_host = self._format_endpoint_host(endpoint_host)
return {
"tunnel_id": session.tunnel_id,
"agent_id": session.agent_id,
"virtual_ip": session.virtual_ip,
"engine_virtual_ip": str(self._engine_ip.ip),
"endpoint": f"{endpoint_host}:{self.context.wireguard_port}",
"allowed_ports": list(session.allowed_ports),
"connected_operators": len([o for o in session.operator_ids if o]),
"created_at": int(session.created_at),
"created_at_iso": self._ts_to_iso(session.created_at),
"last_activity": int(session.last_activity),
"last_activity_iso": self._ts_to_iso(session.last_activity),
"expires_at": int(session.expires_at),
"expires_at_iso": self._ts_to_iso(session.expires_at),
"idle_seconds": self.idle_seconds,
"status": "up",
}

View File

@@ -336,12 +336,16 @@ class WireGuardServerManager:
raise RuntimeError(f"WireGuard installtunnelservice failed: {err}")
self.logger.info("WireGuard listener installed (service=%s)", config_path.stem)
def stop_listener(self) -> None:
def stop_listener(self, *, ignore_missing: bool = False) -> None:
"""Stop and remove the WireGuard tunnel service."""
args = [self._wireguard_exe, "/uninstalltunnelservice", self._service_name]
code, out, err = self._run_command(args)
if code != 0:
err_text = " ".join([out or "", err or ""]).strip().lower()
if ignore_missing and ("does not exist" in err_text or "not exist" in err_text):
self.logger.info("WireGuard tunnel service already absent")
return
self.logger.warning("Failed to uninstall WireGuard tunnel service code=%s err=%s", code, err)
else:
self.logger.info("WireGuard tunnel service removed")

View File

@@ -103,7 +103,7 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
adapters = EngineRealtimeAdapters(context)
logger = context.logger.getChild("realtime.quick_jobs")
agent_logger = context.logger.getChild("realtime.agents")
shell_bridge = VpnShellBridge(socket_server, context)
shell_bridge = VpnShellBridge(socket_server, context, adapters.service_log)
agent_registry = AgentSocketRegistry(socket_server, agent_logger)
def _emit_agent_event(agent_id: str, event: str, payload: Any) -> bool:
@@ -148,6 +148,24 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
setattr(context, "vpn_tunnel_service", service)
return service
def _tunnel_log(message: str, *, level: str = "INFO") -> None:
try:
adapters.service_log("VPN_Tunnel/tunnel", message, level=level)
except Exception:
agent_logger.debug("vpn_tunnel service log write failed", exc_info=True)
def _shell_log(message: str, *, level: str = "INFO") -> None:
try:
adapters.service_log("VPN_Tunnel/remote_shell", message, level=level)
except Exception:
agent_logger.debug("vpn_shell service log write failed", exc_info=True)
def _remote_addr() -> str:
forwarded = (request.headers.get("X-Forwarded-For") or "").strip()
if forwarded:
return forwarded.split(",")[0].strip()
return (request.remote_addr or "").strip()
@socket_server.on("quick_job_result")
def _handle_quick_job_result(data: Any) -> None:
if not isinstance(data, dict):
@@ -317,18 +335,59 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
elif isinstance(data, str):
agent_id = data.strip()
if not agent_id:
_shell_log(
"vpn_shell_open_missing sid={0} remote={1}".format(
request.sid,
_remote_addr() or "-",
),
level="WARNING",
)
return {"error": "agent_id_required"}
_shell_log(
"vpn_shell_open_request agent_id={0} sid={1} remote={2}".format(
agent_id,
request.sid,
_remote_addr() or "-",
)
)
service = _get_tunnel_service()
if service is None:
_shell_log(
"vpn_shell_open_failed agent_id={0} sid={1} reason=vpn_service_unavailable".format(
agent_id,
request.sid,
),
level="WARNING",
)
return {"error": "vpn_service_unavailable"}
if not service.status(agent_id):
_shell_log(
"vpn_shell_open_failed agent_id={0} sid={1} reason=tunnel_down".format(
agent_id,
request.sid,
),
level="WARNING",
)
return {"error": "tunnel_down"}
session = shell_bridge.open_session(request.sid, agent_id)
if session is None:
_shell_log(
"vpn_shell_open_failed agent_id={0} sid={1} reason=shell_connect_failed".format(
agent_id,
request.sid,
),
level="WARNING",
)
return {"error": "shell_connect_failed"}
service.bump_activity(agent_id)
_shell_log(
"vpn_shell_open_success agent_id={0} sid={1}".format(
agent_id,
request.sid,
)
)
return {"status": "ok"}
@socket_server.on("connect_agent")
@@ -341,16 +400,38 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
elif isinstance(data, str):
agent_id = data.strip()
if not agent_id:
_tunnel_log(
"vpn_agent_socket_missing sid={0} remote={1}".format(
request.sid,
_remote_addr() or "-",
),
level="WARNING",
)
return {"error": "agent_id_required"}
agent_registry.register(agent_id, request.sid)
agent_logger.info("Agent socket registered agent_id=%s service_mode=%s sid=%s", agent_id, service_mode, request.sid)
_tunnel_log(
"vpn_agent_socket_register agent_id={0} service_mode={1} sid={2} remote={3}".format(
agent_id,
service_mode or "-",
request.sid,
_remote_addr() or "-",
)
)
service = _get_tunnel_service()
if service:
payload = service.session_payload(agent_id, include_token=True)
if payload:
agent_registry.emit(agent_id, "vpn_tunnel_start", payload)
if agent_registry.emit(agent_id, "vpn_tunnel_start", payload):
_tunnel_log(
"vpn_agent_socket_emit_start agent_id={0} tunnel_id={1} sid={2}".format(
agent_id,
payload.get("tunnel_id", "-"),
request.sid,
)
)
return {"status": "ok"}
@@ -363,11 +444,28 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
payload = data
if payload is None:
return {"error": "payload_required"}
try:
payload_len = len(str(payload))
except Exception:
payload_len = 0
_shell_log(
"vpn_shell_send_request sid={0} bytes={1} remote={2}".format(
request.sid,
payload_len,
_remote_addr() or "-",
)
)
shell_bridge.send(request.sid, str(payload))
return {"status": "ok"}
@socket_server.on("vpn_shell_close")
def _vpn_shell_close(data: Any = None) -> Dict[str, Any]:
_shell_log(
"vpn_shell_close_request sid={0} remote={1}".format(
request.sid,
_remote_addr() or "-",
)
)
shell_bridge.close(request.sid)
return {"status": "ok"}
@@ -376,4 +474,18 @@ def register_realtime(socket_server: SocketIO, context: EngineContext) -> None:
agent_id = agent_registry.unregister(request.sid)
if agent_id:
agent_logger.info("Agent socket disconnected agent_id=%s sid=%s", agent_id, request.sid)
_tunnel_log(
"vpn_agent_socket_disconnect agent_id={0} sid={1}".format(
agent_id,
request.sid,
)
)
else:
_shell_log(
"vpn_shell_client_disconnect sid={0} remote={1}".format(
request.sid,
_remote_addr() or "-",
),
level="WARNING",
)
shell_bridge.close(request.sid)

View File

@@ -15,7 +15,7 @@ import socket
import threading
import time
from dataclasses import dataclass
from typing import Any, Dict, Optional
from typing import Any, Callable, Dict, Optional
def _b64encode(data: bytes) -> str:
@@ -32,6 +32,11 @@ class ShellSession:
agent_id: str
socketio: Any
tcp: socket.socket
service_log: Optional[Callable[[str, str, Optional[str]], None]] = None
output_lines: int = 0
output_bytes: int = 0
input_messages: int = 0
input_bytes: int = 0
_reader: Optional[threading.Thread] = None
def start_reader(self) -> None:
@@ -39,15 +44,31 @@ class ShellSession:
t.start()
self._reader = t
def _service_log_event(self, message: str, *, level: str = "INFO") -> None:
if not callable(self.service_log):
return
try:
self.service_log("VPN_Tunnel/remote_shell", message, level=level)
except Exception:
pass
def _read_loop(self) -> None:
buffer = b""
reason = "remote_closed"
error_detail = ""
try:
while True:
try:
data = self.tcp.recv(4096)
except (socket.timeout, TimeoutError):
# No data ready; keep the session alive.
continue
except Exception as exc:
reason = "read_error"
error_detail = f"{type(exc).__name__}:{exc}"
break
if not data:
reason = "remote_closed"
break
buffer += data
while b"\n" in buffer:
@@ -64,8 +85,37 @@ class ShellSession:
decoded = _b64decode(str(payload)).decode("utf-8", errors="replace")
except Exception:
decoded = ""
self.output_lines += 1
self.output_bytes += len(line)
self.socketio.emit("vpn_shell_output", {"data": decoded}, to=self.sid)
finally:
if reason == "read_error":
self._service_log_event(
"vpn_shell_read_error agent_id={0} sid={1} reason={2} error={3}".format(
self.agent_id,
self.sid,
reason,
error_detail or "-",
),
level="WARNING",
)
self._service_log_event(
"vpn_shell_closed agent_id={0} sid={1} reason={2}".format(
self.agent_id,
self.sid,
reason,
)
)
self._service_log_event(
"vpn_shell_output_summary agent_id={0} sid={1} lines={2} bytes={3} inputs={4} input_bytes={5}".format(
self.agent_id,
self.sid,
self.output_lines,
self.output_bytes,
self.input_messages,
self.input_bytes,
)
)
self.socketio.emit("vpn_shell_closed", {"agent_id": self.agent_id}, to=self.sid)
try:
self.tcp.close()
@@ -73,8 +123,21 @@ class ShellSession:
pass
def send(self, payload: str) -> None:
data = json.dumps({"type": "stdin", "data": _b64encode(payload.encode("utf-8"))})
self.tcp.sendall(data.encode("utf-8") + b"\n")
payload_bytes = payload.encode("utf-8")
data = json.dumps({"type": "stdin", "data": _b64encode(payload_bytes)})
self.input_messages += 1
self.input_bytes += len(payload_bytes)
try:
self.tcp.sendall(data.encode("utf-8") + b"\n")
except Exception as exc:
self._service_log_event(
"vpn_shell_send_failed agent_id={0} sid={1} error={2}".format(
self.agent_id,
self.sid,
f"{type(exc).__name__}:{exc}",
),
level="WARNING",
)
def close(self) -> None:
try:
@@ -89,11 +152,20 @@ class ShellSession:
class VpnShellBridge:
def __init__(self, socketio, context) -> None:
def __init__(self, socketio, context, service_log=None) -> None:
self.socketio = socketio
self.context = context
self._sessions: Dict[str, ShellSession] = {}
self.logger = context.logger.getChild("vpn_shell")
self.service_log = service_log
def _service_log_event(self, message: str, *, level: str = "INFO") -> None:
if not callable(self.service_log):
return
try:
self.service_log("VPN_Tunnel/remote_shell", message, level=level)
except Exception:
self.logger.debug("vpn_shell service log write failed", exc_info=True)
def open_session(self, sid: str, agent_id: str) -> Optional[ShellSession]:
service = getattr(self.context, "vpn_tunnel_service", None)
@@ -107,6 +179,15 @@ class VpnShellBridge:
tcp = None
last_error: Optional[Exception] = None
for attempt in range(3):
self._service_log_event(
"vpn_shell_connect_attempt agent_id={0} sid={1} host={2} port={3} attempt={4}".format(
agent_id,
sid,
host,
port,
attempt + 1,
)
)
try:
tcp = socket.create_connection((host, port), timeout=5)
break
@@ -115,26 +196,72 @@ class VpnShellBridge:
if attempt == 0:
try:
service.request_agent_start(agent_id)
self._service_log_event(
"vpn_shell_agent_start_emit agent_id={0} sid={1}".format(agent_id, sid)
)
except Exception:
self.logger.debug("Failed to re-emit vpn_tunnel_start for agent=%s", agent_id, exc_info=True)
self._service_log_event(
"vpn_shell_agent_start_failed agent_id={0} sid={1}".format(agent_id, sid),
level="WARNING",
)
time.sleep(1)
if tcp is None:
self._service_log_event(
"vpn_shell_connect_failed agent_id={0} sid={1} host={2} port={3} error={4}".format(
agent_id,
sid,
host,
port,
str(last_error) if last_error else "-",
),
level="WARNING",
)
self.logger.warning("Failed to connect vpn shell to %s:%s", host, port, exc_info=last_error)
return None
session = ShellSession(sid=sid, agent_id=agent_id, socketio=self.socketio, tcp=tcp)
session = ShellSession(
sid=sid,
agent_id=agent_id,
socketio=self.socketio,
tcp=tcp,
service_log=self.service_log,
)
try:
session.tcp.settimeout(15)
except Exception:
pass
self._sessions[sid] = session
self._service_log_event(
"vpn_shell_connect_success agent_id={0} sid={1} host={2} port={3}".format(
agent_id,
sid,
host,
port,
)
)
session.start_reader()
return session
def send(self, sid: str, payload: str) -> None:
session = self._sessions.get(sid)
if not session:
self._service_log_event(
"vpn_shell_send_missing sid={0}".format(sid or "-"),
level="WARNING",
)
return
session.send(payload)
try:
payload_len = len(str(payload))
except Exception:
payload_len = 0
self._service_log_event(
"vpn_shell_send agent_id={0} sid={1} bytes={2}".format(
session.agent_id,
sid,
payload_len,
)
)
service = getattr(self.context, "vpn_tunnel_service", None)
if service:
service.bump_activity(session.agent_id)
@@ -143,4 +270,10 @@ class VpnShellBridge:
session = self._sessions.pop(sid, None)
if not session:
return
self._service_log_event(
"vpn_shell_close_request agent_id={0} sid={1}".format(
session.agent_id,
sid,
)
)
session.close()

View File

@@ -19,12 +19,13 @@ This document is the reference for Borealis reverse VPN tunnels built on WireGua
- Generates server keys, renders config, manages `wireguard.exe` tunnel service, applies ACL rules.
- PowerShell bridge: `Data/Engine/services/WebSocket/vpn_shell.py`
- Proxies UI shell input/output to the agents TCP shell server over WireGuard.
- Logging: `Engine/Logs/reverse_tunnel.log` plus Device Activity entries.
- Logging: `Engine/Logs/VPN_Tunnel/tunnel.log` plus Device Activity entries; shell I/O is in `Engine/Logs/VPN_Tunnel/remote_shell.log`.
## 3) API Endpoints
- `POST /api/tunnel/connect` → issues session material (tunnel_id, token, virtual_ip, endpoint, allowed_ports, idle_seconds).
- `GET /api/tunnel/status` → returns up/down status for an agent.
- `GET /api/tunnel/connect/status` → alias for status (used by UI before shell open).
- `GET /api/tunnel/active` → lists active VPN tunnel sessions (tunnel_id, agent_id, virtual_ip, last_activity, etc.).
- `DELETE /api/tunnel/disconnect` → immediate teardown (agent + engine cleanup).
- `GET /api/device/vpn_config/<agent_id>` → read per-agent allowed ports.
- `PUT /api/device/vpn_config/<agent_id>` → update allowed ports.
@@ -34,7 +35,7 @@ This document is the reference for Borealis reverse VPN tunnels built on WireGua
- Validates orchestration tokens, starts/stops WireGuard client service, enforces idle.
- Shell server: `Data/Agent/Roles/role_VpnShell.py`
- TCP PowerShell server bound to `0.0.0.0:47002`, restricted to VPN subnet (10.255.x.x).
- Logging: `Agent/Logs/reverse_tunnel.log`.
- Logging: `Agent/Logs/VPN_Tunnel/tunnel.log` (tunnel lifecycle) and `Agent/Logs/VPN_Tunnel/remote_shell.log` (shell I/O).
## 5) Security & Auth
- TLS pinned for Engine API/Socket.IO.

View File

@@ -50,8 +50,8 @@ Do not implement Linux yet.
## Logs to Know
- Agent: `Agent/Logs/reverse_tunnel.log` is the primary signal for VPN tunnel and shell.
- Engine: `Engine/Logs/reverse_tunnel.log`, `Engine/Logs/engine.log`.
- Agent: `Agent/Logs/VPN_Tunnel/tunnel.log` (tunnel lifecycle) and `Agent/Logs/VPN_Tunnel/remote_shell.log` (shell I/O).
- Engine: `Engine/Logs/VPN_Tunnel/tunnel.log`, `Engine/Logs/VPN_Tunnel/remote_shell.log`, `Engine/Logs/engine.log`.
## What Likely Remains

View File

@@ -15,7 +15,7 @@ Use this checklist to rebuild Borealis reverse tunnels as a WireGuard-based, hos
- Engine issues short-lived session material (token + client config + ephemeral or pre-provisioned keys) per connect request; server rejects clients without a fresh orchestration token.
- Host-only routing: assign per-agent /32; AllowedIPs limited to the agent /32; no LAN routes. Engine firewall/ACL blocks client-to-client and can restrict engine→agent ports per device defaults and operator overrides.
- APIs: `/api/tunnel/connect`, `/api/tunnel/status`, `/api/tunnel/disconnect`. Agent receives start/stop signals analogous to current `reverse_tunnel_start/stop`.
- Logging and audit stay in place (use `reverse_tunnel.log` or a renamed equivalent consistently on Engine/Agent).
- Logging and audit stay in place (use `Engine/Logs/VPN_Tunnel/tunnel.log` and `Agent/Logs/VPN_Tunnel/tunnel.log` consistently for tunnel lifecycle).
- UI: `Data/Engine/web-interface/src/Devices/Device_Details.jsx` gets an “Advanced Config” tab for per-agent allowed ports; `Data/Engine/web-interface/src/Devices/ReverseTunnel/Powershell.jsx` is reused for a live PowerShell MVP wired to the new APIs.
## Milestone Checkpoints (commit names, Windows first)
@@ -58,11 +58,11 @@ At each milestone: pause, run the listed checks, talk to the operator, and commi
- Keys/Certs:
- [x] Prefer reusing existing Engine cert infrastructure for signing orchestration tokens. Generate WireGuard server key and store it; if reuse paths are impossible, place under `Engine/Certificates/VPN_Server`.
- [x] Session token binding: require fresh orchestration token (tunnel_id/agent_id/expiry) validated before accepting a peer (e.g., via pre-shared keys or control-plane validation before adding peer).
- Logging: server logs to `Engine/Logs/reverse_tunnel.log` (or renamed consistently). [x]
- Logging: server logs to `Engine/Logs/VPN_Tunnel/tunnel.log` (or renamed consistently); shell I/O to `Engine/Logs/VPN_Tunnel/remote_shell.log`. [x]
- Checkpoint tests:
- [ ] Engine starts WireGuard listener locally on 30000.
- [ ] Only engine IP reachable; client-to-client blocked.
- [ ] Peers without valid token/key are rejected.
- [x] Engine starts WireGuard listener locally on 30000.
- [x] Only engine IP reachable; client-to-client blocked.
- [x] Peers without valid token/key are rejected.
### 3) Agent VPN Client & Lifecycle — Milestone: Agent VPN Client & Lifecycle (Windows)
- Agents editing this document should mark tasks they complete with `[x]` (leave `[ ]` otherwise).
@@ -77,9 +77,9 @@ At each milestone: pause, run the listed checks, talk to the operator, and commi
- [x] Stop path: remove peer/bring interface down cleanly; adapter remains installed.
- Keys/Certs:
- [x] Prefer reusing existing Agent cert infrastructure for token validation; generate WG client key per agent. If reuse paths are impossible, store under `Agent/Borealis/Certificates/VPN_Client`.
- Logging: `Agent/Logs/reverse_tunnel.log` captures connect/disconnect/errors/idle timeouts. [x]
- Logging: `Agent/Logs/VPN_Tunnel/tunnel.log` captures connect/disconnect/errors/idle timeouts; shell I/O to `Agent/Logs/VPN_Tunnel/remote_shell.log`. [x]
- Checkpoint tests:
- [ ] Manual connect/disconnect against engine test server.
- [x] Manual connect/disconnect against engine test server.
- [x] Idle timeout fires at ~15 minutes of inactivity.
### 4) API & Service Orchestration — Milestone: API & Service Orchestration (Windows)
@@ -95,8 +95,8 @@ At each milestone: pause, run the listed checks, talk to the operator, and commi
- [x] Token issuance: short-lived, binds agent_id/tunnel_id/port/expiry; validated before adding peer.
- [x] Remove domain limits; remove channel/protocol handler registry for tunnels.
- Checkpoint tests:
- [ ] API happy path: connect → status → disconnect.
- [ ] Reject stale/second connect for same agent while active.
- [x] API happy path: connect → status → disconnect.
- [x] Second connect reuses the active tunnel (no duplicate sessions).
### 5) UI Advanced Config & Operator Flow (PowerShell MVP) — Milestone: UI Advanced Config & Operator Flow (Windows, PowerShell MVP)
- Agents editing this document should mark tasks they complete with `[x]` (leave `[ ]` otherwise).
@@ -110,8 +110,8 @@ At each milestone: pause, run the listed checks, talk to the operator, and commi
- [x] Ensure tunnel is up via `/api/tunnel/connect/status` before opening the terminal; call `/api/tunnel/disconnect` on exit/tab close.
- Later protocols (RDP/SSH/etc.) can follow once MVP is proven, but do not block on them for this milestone.
- Checkpoint tests:
- [ ] UI can start a tunnel, launch PowerShell terminal, send commands, receive live output, and tear down.
- [ ] Toggles change ACL behavior (engine→agent reachability) as expected.
- [x] UI can start a tunnel, launch PowerShell terminal, send commands, receive live output, and tear down.
- [x] Toggles change ACL behavior (engine→agent reachability) as expected.
### 6) Legacy Tunnel Removal & Cleanup — Milestone: Legacy Tunnel Removal & Cleanup (Windows)
- Agents editing this document should mark tasks they complete with `[x]` (leave `[ ]` otherwise).
@@ -122,26 +122,26 @@ At each milestone: pause, run the listed checks, talk to the operator, and commi
- [x] Update docs and references to point to the new WireGuard VPN flow; keep change log entries.
- [x] Ensure no lingering domain limits/config knobs remain.
- Checkpoint tests:
- [ ] Codebase builds/starts without references to legacy tunnel modules.
- [ ] UI no longer calls old APIs or Socket.IO tunnel namespace.
- [x] Codebase builds/starts without references to legacy tunnel modules.
- [x] UI no longer calls old APIs or Socket.IO tunnel namespace.
### 7) End-to-End Validation — Milestone: End-to-End Validation (Windows)
- Agents editing this document should mark tasks they complete with `[x]` (leave `[ ]` otherwise).
- Functional:
- [ ] Windows agent: WireGuard connect on port 30000; PowerShell MVP fully live in the web terminal; RDP/WinRM reachable over tunnel as configured.
- [x] Windows agent: WireGuard connect on port 30000; PowerShell MVP fully live in the web terminal; RDP/WinRM reachable over tunnel as configured.
- [x] Idle timeout at 15 minutes of inactivity.
- [ ] Operator disconnect stops tunnel immediately.
- [x] Operator disconnect stops tunnel immediately.
- Security:
- [ ] Client-to-client blocked.
- [ ] Only engine IP reachable; per-agent ACL enforces allowed ports.
- [ ] Token enforcement blocks stale/unauthorized sessions.
- [x] Client-to-client blocked.
- [x] Only engine IP reachable; per-agent ACL enforces allowed ports.
- [x] Token enforcement blocks stale/unauthorized sessions.
- Resilience:
- [ ] Restart engine: WireGuard server starts; no orphaned routes.
- [ ] Restart agent: adapter persists; tunnel stays down until requested.
- [x] Restart engine: WireGuard server starts; no orphaned routes.
- [x] Restart agent: adapter persists; tunnel stays down until requested.
- Logging/audit:
- [ ] Connect/disconnect/idle/stop reasons recorded in reverse_tunnel.log (Engine/Agent) and Device Activity.
- [x] Connect/disconnect/idle/stop reasons recorded in `VPN_Tunnel/tunnel.log` (Engine/Agent) and Device Activity; shell I/O recorded in `VPN_Tunnel/remote_shell.log`.
- Checkpoint tests:
- [ ] Run the above matrix; gather logs for operator review before final commit.
- [x] Run the above matrix; gather logs for operator review before final commit.
## Linux (Deferred) — Do Not Implement Yet
- When greenlit, mirror the structure above for Linux: