Fixed Runaway Agent Supervisor Event

This commit is contained in:
2025-09-04 15:30:27 -06:00
parent f905a50501
commit ee7c6f3062
4 changed files with 233 additions and 11 deletions

View File

@@ -4,6 +4,9 @@ import time
import subprocess import subprocess
import threading import threading
import datetime import datetime
import json
import ctypes
from ctypes import wintypes
# Optional pywin32 imports for per-session launching # Optional pywin32 imports for per-session launching
try: try:
@@ -24,10 +27,35 @@ BOREALIS_DIR = os.path.join(AGENT_DIR, 'Borealis')
LOG_DIR = os.path.join(ROOT, 'Logs', 'Agent') LOG_DIR = os.path.join(ROOT, 'Logs', 'Agent')
os.makedirs(LOG_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, 'Supervisor.log') LOG_FILE = os.path.join(LOG_DIR, 'Supervisor.log')
PID_FILE = os.path.join(LOG_DIR, 'script_agent.pid')
# Internal state for process + backoff
_script_proc = None
_spawn_backoff = 5 # seconds (exponential backoff start)
_max_backoff = 300 # cap at 5 minutes
_next_spawn_time = 0.0
_last_disable_log = 0.0
_last_fail_log = 0.0
def log(msg: str): def log(msg: str):
try: try:
# simple size-based rotation (~1MB)
try:
if os.path.isfile(LOG_FILE) and os.path.getsize(LOG_FILE) > 1_000_000:
bak = LOG_FILE + '.1'
try:
if os.path.isfile(bak):
os.remove(bak)
except Exception:
pass
try:
os.replace(LOG_FILE, bak)
except Exception:
pass
except Exception:
pass
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
with open(LOG_FILE, 'a', encoding='utf-8') as f: with open(LOG_FILE, 'a', encoding='utf-8') as f:
f.write(f"[{ts}] {msg}\n") f.write(f"[{ts}] {msg}\n")
@@ -57,28 +85,153 @@ def venv_pythonw():
return venv_python() return venv_python()
def ensure_script_agent(): def _settings_path():
"""Ensure LocalSystem script_agent.py is running; restart if not.""" return os.path.join(ROOT, 'agent_settings.json')
def load_settings():
cfg = {}
try:
path = _settings_path()
if os.path.isfile(path):
with open(path, 'r', encoding='utf-8') as f:
cfg = json.load(f)
except Exception:
cfg = {}
return cfg or {}
def _psutil_process_exists(pid: int) -> bool:
try: try:
# best-effort: avoid duplicate spawns
import psutil # type: ignore import psutil # type: ignore
for p in psutil.process_iter(['name', 'cmdline']): if pid <= 0:
return False
p = psutil.Process(pid)
return p.is_running() and (p.status() != psutil.STATUS_ZOMBIE)
except Exception:
return False
def _win_process_exists(pid: int) -> bool:
try:
if pid <= 0:
return False
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
OpenProcess = kernel32.OpenProcess
OpenProcess.restype = wintypes.HANDLE
OpenProcess.argtypes = (wintypes.DWORD, wintypes.BOOL, wintypes.DWORD)
CloseHandle = kernel32.CloseHandle
CloseHandle.argtypes = (wintypes.HANDLE,)
h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
if h:
try: try:
cl = (p.info.get('cmdline') or []) CloseHandle(h)
if any('script_agent.py' in (part or '') for part in cl):
return
except Exception: except Exception:
pass pass
return True
return False
except Exception:
return False
def process_exists(pid: int) -> bool:
# Prefer psutil if available; else Win32 API
return _psutil_process_exists(pid) or _win_process_exists(pid)
def _read_pid_file() -> int:
try:
if os.path.isfile(PID_FILE):
with open(PID_FILE, 'r', encoding='utf-8') as f:
s = f.read().strip()
return int(s)
except Exception: except Exception:
pass pass
return 0
def _write_pid_file(pid: int):
try:
with open(PID_FILE, 'w', encoding='utf-8') as f:
f.write(str(pid))
except Exception:
pass
def _clear_pid_file():
try:
if os.path.isfile(PID_FILE):
os.remove(PID_FILE)
except Exception:
pass
def ensure_script_agent():
"""Ensure LocalSystem script_agent.py is running; restart if not, with backoff and PID tracking."""
global _script_proc, _spawn_backoff, _next_spawn_time, _last_disable_log, _last_fail_log
# Allow disabling via config
try:
cfg = load_settings()
if not cfg.get('enable_system_script_agent', True):
now = time.time()
if now - _last_disable_log > 60:
log('System script agent disabled by config (enable_system_script_agent=false)')
_last_disable_log = now
return
except Exception:
pass
# If we have a running child process, keep it
try:
if _script_proc is not None:
if _script_proc.poll() is None:
return
else:
# Child exited; clear PID file for safety
_clear_pid_file()
_script_proc = None
except Exception:
pass
# If PID file points to a living process, don't spawn
try:
pid = _read_pid_file()
if pid and process_exists(pid):
return
elif pid and not process_exists(pid):
_clear_pid_file()
except Exception:
pass
# Honor backoff window
if time.time() < _next_spawn_time:
return
py = venv_python() py = venv_python()
script = os.path.join(ROOT, 'Data', 'Agent', 'script_agent.py') script = os.path.join(ROOT, 'Data', 'Agent', 'script_agent.py')
try: try:
subprocess.Popen([py, '-W', 'ignore::SyntaxWarning', script], creationflags=(0x08000000 if os.name == 'nt' else 0)) proc = subprocess.Popen(
log('Launched script_agent.py') [py, '-W', 'ignore::SyntaxWarning', script],
creationflags=(0x08000000 if os.name == 'nt' else 0),
)
_script_proc = proc
_write_pid_file(proc.pid)
log(f'Launched script_agent.py (pid {proc.pid})')
# reset backoff on success
_spawn_backoff = 5
_next_spawn_time = 0.0
except Exception as e: except Exception as e:
log(f'Failed to launch script_agent.py: {e}') msg = f'Failed to launch script_agent.py: {e}'
now = time.time()
# rate-limit identical failure logs to once per 10s
if now - _last_fail_log > 10:
log(msg)
_last_fail_log = now
# exponential backoff
_spawn_backoff = min(_spawn_backoff * 2, _max_backoff)
_next_spawn_time = time.time() + _spawn_backoff
def _enable_privileges(): def _enable_privileges():
@@ -166,4 +319,3 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@@ -1065,6 +1065,8 @@ async def _run_powershell_via_user_task(content: str):
ps = "powershell.exe" ps = "powershell.exe"
else: else:
return -999, '', 'Windows only' return -999, '', 'Windows only'
path = None
out_path = None
try: try:
temp_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'Temp') temp_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'Temp')
temp_dir = os.path.abspath(temp_dir) temp_dir = os.path.abspath(temp_dir)
@@ -1110,6 +1112,18 @@ Get-ScheduledTask -TaskName $task | Out-Null
return 0, out_data or '', '' return 0, out_data or '', ''
except Exception as e: except Exception as e:
return -999, '', str(e) return -999, '', str(e)
finally:
# Best-effort cleanup of temp script and output files
try:
if path and os.path.isfile(path):
os.remove(path)
except Exception:
pass
try:
if out_path and os.path.isfile(out_path):
os.remove(out_path)
except Exception:
pass
# ---------------- Dummy Qt Widget to Prevent Exit ---------------- # ---------------- Dummy Qt Widget to Prevent Exit ----------------
class PersistentWindow(QtWidgets.QWidget): class PersistentWindow(QtWidgets.QWidget):

View File

@@ -6,12 +6,14 @@ import asyncio
import json import json
import subprocess import subprocess
import tempfile import tempfile
from typing import Optional
import socketio import socketio
import platform import platform
import time import time
import uuid import uuid
import tempfile import tempfile
import contextlib
def get_project_root(): def get_project_root():
@@ -54,6 +56,13 @@ def run_powershell_script_content(content: str):
return proc.returncode, proc.stdout or "", proc.stderr or "" return proc.returncode, proc.stdout or "", proc.stderr or ""
except Exception as e: except Exception as e:
return -1, "", str(e) return -1, "", str(e)
finally:
# Best-effort cleanup of the ephemeral script
try:
if os.path.isfile(path):
os.remove(path)
except Exception:
pass
async def main(): async def main():
@@ -201,10 +210,49 @@ Get-ScheduledTask -TaskName $task | Out-Null
# Cleanup task (best-effort) # Cleanup task (best-effort)
cleanup_ps = f"try {{ Unregister-ScheduledTask -TaskName '{task_name}' -Confirm:$false }} catch {{}}" cleanup_ps = f"try {{ Unregister-ScheduledTask -TaskName '{task_name}' -Confirm:$false }} catch {{}}"
subprocess.run([ps_exe, '-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', cleanup_ps], capture_output=True, text=True) subprocess.run([ps_exe, '-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', cleanup_ps], capture_output=True, text=True)
# Best-effort removal of temp script and output files
try:
if os.path.isfile(script_path):
os.remove(script_path)
except Exception:
pass
try:
if os.path.isfile(out_path):
os.remove(out_path)
except Exception:
pass
return 0, out_data or '', '' return 0, out_data or '', ''
except Exception as e: except Exception as e:
return -999, '', str(e) return -999, '', str(e)
if __name__ == '__main__': if __name__ == '__main__':
# Ensure only a single instance of the script agent runs (Windows-only lock)
def _acquire_singleton_lock() -> bool:
try:
lock_dir = os.path.join(get_project_root(), 'Logs', 'Agent')
os.makedirs(lock_dir, exist_ok=True)
lock_path = os.path.join(lock_dir, 'script_agent.lock')
# Keep handle open for process lifetime
fh = open(lock_path, 'a')
try:
import msvcrt # type: ignore
# Lock 1 byte non-blocking; released on handle close/process exit
msvcrt.locking(fh.fileno(), msvcrt.LK_NBLCK, 1)
globals()['_LOCK_FH'] = fh
return True
except Exception:
try:
fh.close()
except Exception:
pass
return False
except Exception:
# If we cannot establish a lock, continue (do not prevent agent)
return True
if not _acquire_singleton_lock():
print('[ScriptAgent] Another instance is running; exiting.')
sys.exit(0)
asyncio.run(main()) asyncio.run(main())

View File

@@ -0,0 +1,8 @@
# Dynamically get the current user's Desktop path
$desktopPath = "C:\Users\nicole.rappe\Desktop"
# Define the file path relative to the Desktop
$filePath = Join-Path $desktopPath "Canary.txt"
# Write some content into the file
"USER Canary is alive." | Out-File -FilePath $filePath -Encoding UTF8