mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-09-10 21:18:42 -06:00
Fixed Runaway Agent Supervisor Event
This commit is contained in:
@@ -4,6 +4,9 @@ import time
|
|||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
import datetime
|
import datetime
|
||||||
|
import json
|
||||||
|
import ctypes
|
||||||
|
from ctypes import wintypes
|
||||||
|
|
||||||
# Optional pywin32 imports for per-session launching
|
# Optional pywin32 imports for per-session launching
|
||||||
try:
|
try:
|
||||||
@@ -24,10 +27,35 @@ BOREALIS_DIR = os.path.join(AGENT_DIR, 'Borealis')
|
|||||||
LOG_DIR = os.path.join(ROOT, 'Logs', 'Agent')
|
LOG_DIR = os.path.join(ROOT, 'Logs', 'Agent')
|
||||||
os.makedirs(LOG_DIR, exist_ok=True)
|
os.makedirs(LOG_DIR, exist_ok=True)
|
||||||
LOG_FILE = os.path.join(LOG_DIR, 'Supervisor.log')
|
LOG_FILE = os.path.join(LOG_DIR, 'Supervisor.log')
|
||||||
|
PID_FILE = os.path.join(LOG_DIR, 'script_agent.pid')
|
||||||
|
|
||||||
|
# Internal state for process + backoff
|
||||||
|
_script_proc = None
|
||||||
|
_spawn_backoff = 5 # seconds (exponential backoff start)
|
||||||
|
_max_backoff = 300 # cap at 5 minutes
|
||||||
|
_next_spawn_time = 0.0
|
||||||
|
_last_disable_log = 0.0
|
||||||
|
_last_fail_log = 0.0
|
||||||
|
|
||||||
|
|
||||||
def log(msg: str):
|
def log(msg: str):
|
||||||
try:
|
try:
|
||||||
|
# simple size-based rotation (~1MB)
|
||||||
|
try:
|
||||||
|
if os.path.isfile(LOG_FILE) and os.path.getsize(LOG_FILE) > 1_000_000:
|
||||||
|
bak = LOG_FILE + '.1'
|
||||||
|
try:
|
||||||
|
if os.path.isfile(bak):
|
||||||
|
os.remove(bak)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
os.replace(LOG_FILE, bak)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
ts = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||||
with open(LOG_FILE, 'a', encoding='utf-8') as f:
|
with open(LOG_FILE, 'a', encoding='utf-8') as f:
|
||||||
f.write(f"[{ts}] {msg}\n")
|
f.write(f"[{ts}] {msg}\n")
|
||||||
@@ -57,28 +85,153 @@ def venv_pythonw():
|
|||||||
return venv_python()
|
return venv_python()
|
||||||
|
|
||||||
|
|
||||||
def ensure_script_agent():
|
def _settings_path():
|
||||||
"""Ensure LocalSystem script_agent.py is running; restart if not."""
|
return os.path.join(ROOT, 'agent_settings.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_settings():
|
||||||
|
cfg = {}
|
||||||
|
try:
|
||||||
|
path = _settings_path()
|
||||||
|
if os.path.isfile(path):
|
||||||
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
cfg = json.load(f)
|
||||||
|
except Exception:
|
||||||
|
cfg = {}
|
||||||
|
return cfg or {}
|
||||||
|
|
||||||
|
|
||||||
|
def _psutil_process_exists(pid: int) -> bool:
|
||||||
try:
|
try:
|
||||||
# best-effort: avoid duplicate spawns
|
|
||||||
import psutil # type: ignore
|
import psutil # type: ignore
|
||||||
for p in psutil.process_iter(['name', 'cmdline']):
|
if pid <= 0:
|
||||||
|
return False
|
||||||
|
p = psutil.Process(pid)
|
||||||
|
return p.is_running() and (p.status() != psutil.STATUS_ZOMBIE)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _win_process_exists(pid: int) -> bool:
|
||||||
|
try:
|
||||||
|
if pid <= 0:
|
||||||
|
return False
|
||||||
|
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||||
|
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
|
||||||
|
OpenProcess = kernel32.OpenProcess
|
||||||
|
OpenProcess.restype = wintypes.HANDLE
|
||||||
|
OpenProcess.argtypes = (wintypes.DWORD, wintypes.BOOL, wintypes.DWORD)
|
||||||
|
CloseHandle = kernel32.CloseHandle
|
||||||
|
CloseHandle.argtypes = (wintypes.HANDLE,)
|
||||||
|
h = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||||
|
if h:
|
||||||
try:
|
try:
|
||||||
cl = (p.info.get('cmdline') or [])
|
CloseHandle(h)
|
||||||
if any('script_agent.py' in (part or '') for part in cl):
|
|
||||||
return
|
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def process_exists(pid: int) -> bool:
|
||||||
|
# Prefer psutil if available; else Win32 API
|
||||||
|
return _psutil_process_exists(pid) or _win_process_exists(pid)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_pid_file() -> int:
|
||||||
|
try:
|
||||||
|
if os.path.isfile(PID_FILE):
|
||||||
|
with open(PID_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
s = f.read().strip()
|
||||||
|
return int(s)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _write_pid_file(pid: int):
|
||||||
|
try:
|
||||||
|
with open(PID_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(str(pid))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _clear_pid_file():
|
||||||
|
try:
|
||||||
|
if os.path.isfile(PID_FILE):
|
||||||
|
os.remove(PID_FILE)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_script_agent():
|
||||||
|
"""Ensure LocalSystem script_agent.py is running; restart if not, with backoff and PID tracking."""
|
||||||
|
global _script_proc, _spawn_backoff, _next_spawn_time, _last_disable_log, _last_fail_log
|
||||||
|
|
||||||
|
# Allow disabling via config
|
||||||
|
try:
|
||||||
|
cfg = load_settings()
|
||||||
|
if not cfg.get('enable_system_script_agent', True):
|
||||||
|
now = time.time()
|
||||||
|
if now - _last_disable_log > 60:
|
||||||
|
log('System script agent disabled by config (enable_system_script_agent=false)')
|
||||||
|
_last_disable_log = now
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If we have a running child process, keep it
|
||||||
|
try:
|
||||||
|
if _script_proc is not None:
|
||||||
|
if _script_proc.poll() is None:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# Child exited; clear PID file for safety
|
||||||
|
_clear_pid_file()
|
||||||
|
_script_proc = None
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If PID file points to a living process, don't spawn
|
||||||
|
try:
|
||||||
|
pid = _read_pid_file()
|
||||||
|
if pid and process_exists(pid):
|
||||||
|
return
|
||||||
|
elif pid and not process_exists(pid):
|
||||||
|
_clear_pid_file()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Honor backoff window
|
||||||
|
if time.time() < _next_spawn_time:
|
||||||
|
return
|
||||||
|
|
||||||
py = venv_python()
|
py = venv_python()
|
||||||
script = os.path.join(ROOT, 'Data', 'Agent', 'script_agent.py')
|
script = os.path.join(ROOT, 'Data', 'Agent', 'script_agent.py')
|
||||||
try:
|
try:
|
||||||
subprocess.Popen([py, '-W', 'ignore::SyntaxWarning', script], creationflags=(0x08000000 if os.name == 'nt' else 0))
|
proc = subprocess.Popen(
|
||||||
log('Launched script_agent.py')
|
[py, '-W', 'ignore::SyntaxWarning', script],
|
||||||
|
creationflags=(0x08000000 if os.name == 'nt' else 0),
|
||||||
|
)
|
||||||
|
_script_proc = proc
|
||||||
|
_write_pid_file(proc.pid)
|
||||||
|
log(f'Launched script_agent.py (pid {proc.pid})')
|
||||||
|
# reset backoff on success
|
||||||
|
_spawn_backoff = 5
|
||||||
|
_next_spawn_time = 0.0
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log(f'Failed to launch script_agent.py: {e}')
|
msg = f'Failed to launch script_agent.py: {e}'
|
||||||
|
now = time.time()
|
||||||
|
# rate-limit identical failure logs to once per 10s
|
||||||
|
if now - _last_fail_log > 10:
|
||||||
|
log(msg)
|
||||||
|
_last_fail_log = now
|
||||||
|
# exponential backoff
|
||||||
|
_spawn_backoff = min(_spawn_backoff * 2, _max_backoff)
|
||||||
|
_next_spawn_time = time.time() + _spawn_backoff
|
||||||
|
|
||||||
|
|
||||||
def _enable_privileges():
|
def _enable_privileges():
|
||||||
@@ -166,4 +319,3 @@ def main():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
@@ -1065,6 +1065,8 @@ async def _run_powershell_via_user_task(content: str):
|
|||||||
ps = "powershell.exe"
|
ps = "powershell.exe"
|
||||||
else:
|
else:
|
||||||
return -999, '', 'Windows only'
|
return -999, '', 'Windows only'
|
||||||
|
path = None
|
||||||
|
out_path = None
|
||||||
try:
|
try:
|
||||||
temp_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'Temp')
|
temp_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'Temp')
|
||||||
temp_dir = os.path.abspath(temp_dir)
|
temp_dir = os.path.abspath(temp_dir)
|
||||||
@@ -1110,6 +1112,18 @@ Get-ScheduledTask -TaskName $task | Out-Null
|
|||||||
return 0, out_data or '', ''
|
return 0, out_data or '', ''
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return -999, '', str(e)
|
return -999, '', str(e)
|
||||||
|
finally:
|
||||||
|
# Best-effort cleanup of temp script and output files
|
||||||
|
try:
|
||||||
|
if path and os.path.isfile(path):
|
||||||
|
os.remove(path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
if out_path and os.path.isfile(out_path):
|
||||||
|
os.remove(out_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# ---------------- Dummy Qt Widget to Prevent Exit ----------------
|
# ---------------- Dummy Qt Widget to Prevent Exit ----------------
|
||||||
class PersistentWindow(QtWidgets.QWidget):
|
class PersistentWindow(QtWidgets.QWidget):
|
||||||
|
@@ -6,12 +6,14 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import socketio
|
import socketio
|
||||||
import platform
|
import platform
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import contextlib
|
||||||
|
|
||||||
|
|
||||||
def get_project_root():
|
def get_project_root():
|
||||||
@@ -54,6 +56,13 @@ def run_powershell_script_content(content: str):
|
|||||||
return proc.returncode, proc.stdout or "", proc.stderr or ""
|
return proc.returncode, proc.stdout or "", proc.stderr or ""
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return -1, "", str(e)
|
return -1, "", str(e)
|
||||||
|
finally:
|
||||||
|
# Best-effort cleanup of the ephemeral script
|
||||||
|
try:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
os.remove(path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
@@ -201,10 +210,49 @@ Get-ScheduledTask -TaskName $task | Out-Null
|
|||||||
# Cleanup task (best-effort)
|
# Cleanup task (best-effort)
|
||||||
cleanup_ps = f"try {{ Unregister-ScheduledTask -TaskName '{task_name}' -Confirm:$false }} catch {{}}"
|
cleanup_ps = f"try {{ Unregister-ScheduledTask -TaskName '{task_name}' -Confirm:$false }} catch {{}}"
|
||||||
subprocess.run([ps_exe, '-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', cleanup_ps], capture_output=True, text=True)
|
subprocess.run([ps_exe, '-NoProfile', '-ExecutionPolicy', 'Bypass', '-Command', cleanup_ps], capture_output=True, text=True)
|
||||||
|
# Best-effort removal of temp script and output files
|
||||||
|
try:
|
||||||
|
if os.path.isfile(script_path):
|
||||||
|
os.remove(script_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
if os.path.isfile(out_path):
|
||||||
|
os.remove(out_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return 0, out_data or '', ''
|
return 0, out_data or '', ''
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return -999, '', str(e)
|
return -999, '', str(e)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# Ensure only a single instance of the script agent runs (Windows-only lock)
|
||||||
|
def _acquire_singleton_lock() -> bool:
|
||||||
|
try:
|
||||||
|
lock_dir = os.path.join(get_project_root(), 'Logs', 'Agent')
|
||||||
|
os.makedirs(lock_dir, exist_ok=True)
|
||||||
|
lock_path = os.path.join(lock_dir, 'script_agent.lock')
|
||||||
|
# Keep handle open for process lifetime
|
||||||
|
fh = open(lock_path, 'a')
|
||||||
|
try:
|
||||||
|
import msvcrt # type: ignore
|
||||||
|
# Lock 1 byte non-blocking; released on handle close/process exit
|
||||||
|
msvcrt.locking(fh.fileno(), msvcrt.LK_NBLCK, 1)
|
||||||
|
globals()['_LOCK_FH'] = fh
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
fh.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
# If we cannot establish a lock, continue (do not prevent agent)
|
||||||
|
return True
|
||||||
|
|
||||||
|
if not _acquire_singleton_lock():
|
||||||
|
print('[ScriptAgent] Another instance is running; exiting.')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
@@ -0,0 +1,8 @@
|
|||||||
|
# Dynamically get the current user's Desktop path
|
||||||
|
$desktopPath = "C:\Users\nicole.rappe\Desktop"
|
||||||
|
|
||||||
|
# Define the file path relative to the Desktop
|
||||||
|
$filePath = Join-Path $desktopPath "Canary.txt"
|
||||||
|
|
||||||
|
# Write some content into the file
|
||||||
|
"USER Canary is alive." | Out-File -FilePath $filePath -Encoding UTF8
|
Reference in New Issue
Block a user