Fixed Invisible Screenshot Agent Overlay

This commit is contained in:
2025-05-02 21:31:24 -06:00
parent e44425dd37
commit 536e838970
2 changed files with 160 additions and 172 deletions

View File

@ -3,9 +3,11 @@ requests
python-socketio python-socketio
websocket-client websocket-client
eventlet eventlet
aiohttp
# GUI-related dependencies (Qt for GUI components) # GUI-related dependencies (Qt for GUI components)
PyQt5 PyQt5
qasync
# Computer Vision & OCR Dependencies # Computer Vision & OCR Dependencies
Pillow Pillow

View File

@ -2,87 +2,113 @@
import sys import sys
import uuid import uuid
import time
import base64
import threading
import socketio
from io import BytesIO
import socket import socket
import os import os
import json import json
import asyncio
import concurrent.futures
from functools import partial
from io import BytesIO
import base64
import socketio
from qasync import QEventLoop
from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5 import QtCore, QtGui, QtWidgets
from PIL import ImageGrab from PIL import ImageGrab
# ---------------- Configuration ---------------- # //////////////////////////////////////////////////////////////////////////
# CORE SECTION: CONFIG MANAGER (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json") CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json")
DEFAULT_SERVER_URL = "http://localhost:5000" DEFAULT_CONFIG = {
"SERVER_URL": "http://localhost:5000",
"max_workers": 8,
"config_watch_interval": 2
}
def load_config(): class ConfigManager:
""" def __init__(self, path):
Load agent_settings.json or prompt the user for SERVER_URL. self.path = path
Returns a config dictionary with at least {"SERVER_URL": ...} self._last_mtime = None
""" self.data = {}
config = {} self.load()
if os.path.exists(CONFIG_PATH): def load(self):
if not os.path.exists(self.path):
self.data = DEFAULT_CONFIG.copy()
self._write()
else:
try: try:
with open(CONFIG_PATH, "r") as f: with open(self.path, 'r') as f:
config = json.load(f) self.data = json.load(f)
if isinstance(config, dict) and "SERVER_URL" in config:
return config
except Exception as e: except Exception as e:
print(f"[WARN] Failed to parse agent_settings.json: {e}") print(f"[WARN] Failed to parse config: {e}")
self.data = DEFAULT_CONFIG.copy()
try: try:
user_input = input(f"Enter Borealis Server URL [{DEFAULT_SERVER_URL}]: ").strip() self._last_mtime = os.path.getmtime(self.path)
config["SERVER_URL"] = user_input if user_input else DEFAULT_SERVER_URL except Exception:
with open(CONFIG_PATH, "w") as f: self._last_mtime = None
json.dump(config, f, indent=2)
def _write(self):
try:
with open(self.path, 'w') as f:
json.dump(self.data, f, indent=2)
except Exception as e: except Exception as e:
print(f"[ERROR] Prompt failed: {e}") print(f"[ERROR] Could not write config: {e}")
config["SERVER_URL"] = DEFAULT_SERVER_URL
return config def watch(self):
try:
mtime = os.path.getmtime(self.path)
if self._last_mtime is None or mtime != self._last_mtime:
print("[CONFIG] Detected config change, reloading.")
self.load()
return True
except Exception:
pass
return False
config = load_config() CONFIG = ConfigManager(CONFIG_PATH)
SERVER_URL = config["SERVER_URL"] # //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: CONFIG MANAGER
# //////////////////////////////////////////////////////////////////////////
HOSTNAME = socket.gethostname().lower() # //////////////////////////////////////////////////////////////////////////
RANDOM_SUFFIX = uuid.uuid4().hex[:8] # CORE SECTION: WEBSOCKET SETUP & HANDLERS (do not modify unless absolutely necessary)
AGENT_ID = f"{HOSTNAME}-agent-{RANDOM_SUFFIX}" # //////////////////////////////////////////////////////////////////////////
AGENT_ID = f"{socket.gethostname().lower()}-agent-{uuid.uuid4().hex[:8]}"
# ---------------- App State ---------------- sio = socketio.AsyncClient(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
app_instance = None role_tasks = {}
overlay_widgets = {}
region_launchers = {}
running_roles = {}
running_threads = {}
# ---------------- Socket Setup ----------------
# Enable automatic reconnection with retries in background
sio = socketio.Client(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
@sio.event @sio.event
def connect(): async def connect():
print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.") print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.")
sio.emit('connect_agent', {"agent_id": AGENT_ID, "hostname": HOSTNAME}) await sio.emit('connect_agent', {"agent_id": AGENT_ID})
sio.emit('request_config', {"agent_id": AGENT_ID}) await sio.emit('request_config', {"agent_id": AGENT_ID})
@sio.event @sio.event
def disconnect(): async def disconnect():
print("[WebSocket] Lost connection to Borealis server.") print("[WebSocket] Lost connection to Borealis server.")
@sio.on('agent_config') @sio.on('agent_config')
def on_agent_config(config): async def on_agent_config(cfg):
print("[PROVISIONED] Received new configuration from Borealis.") print("[PROVISIONED] Received new configuration from Borealis.")
# cancel existing role tasks
for task in list(role_tasks.values()):
task.cancel()
role_tasks.clear()
# start new tasks
for role_cfg in cfg.get('roles', []):
role = role_cfg.get('role')
node_id = role_cfg.get('node_id')
if role == 'screenshot' and node_id:
task = asyncio.create_task(screenshot_task(role_cfg))
role_tasks[node_id] = task
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: WEBSOCKET SETUP & HANDLERS
# //////////////////////////////////////////////////////////////////////////
roles = config.get("roles", []) # ---------------- Overlay Widget ----------------
stop_all_roles()
for role in roles:
start_role_thread(role)
# ---------------- Overlay Class ----------------
class ScreenshotRegion(QtWidgets.QWidget): class ScreenshotRegion(QtWidgets.QWidget):
def __init__(self, node_id, x=100, y=100, w=300, h=200): def __init__(self, node_id, x=100, y=100, w=300, h=200):
super().__init__() super().__init__()
@ -99,7 +125,6 @@ class ScreenshotRegion(QtWidgets.QWidget):
self.label.setText(f"{node_id[:8]}") self.label.setText(f"{node_id[:8]}")
self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;") self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;")
self.label.move(8, 4) self.label.move(8, 4)
self.setMouseTracking(True) self.setMouseTracking(True)
def paintEvent(self, event): def paintEvent(self, event):
@ -108,28 +133,25 @@ class ScreenshotRegion(QtWidgets.QWidget):
painter.setBrush(QtCore.Qt.transparent) painter.setBrush(QtCore.Qt.transparent)
painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2)) painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2))
painter.drawRect(self.rect()) painter.drawRect(self.rect())
handle = QtCore.QRect(self.width() - self.resize_handle_size,
handle_rect = QtCore.QRect(
self.width() - self.resize_handle_size,
self.height() - self.resize_handle_size, self.height() - self.resize_handle_size,
self.resize_handle_size, self.resize_handle_size, self.resize_handle_size)
self.resize_handle_size painter.fillRect(handle, QtGui.QColor(0, 255, 0))
)
painter.fillRect(handle_rect, QtGui.QColor(0, 255, 0))
def mousePressEvent(self, event): def mousePressEvent(self, event):
if event.button() == QtCore.Qt.LeftButton: if event.button() == QtCore.Qt.LeftButton:
if event.pos().x() > self.width() - self.resize_handle_size and \ px, py = event.pos().x(), event.pos().y()
event.pos().y() > self.height() - self.resize_handle_size: if px > self.width() - self.resize_handle_size and \
py > self.height() - self.resize_handle_size:
self.resizing = True self.resizing = True
else: else:
self.drag_offset = event.globalPos() - self.frameGeometry().topLeft() self.drag_offset = event.globalPos() - self.frameGeometry().topLeft()
def mouseMoveEvent(self, event): def mouseMoveEvent(self, event):
if self.resizing: if self.resizing:
new_width = max(event.pos().x(), 100) nw = max(event.pos().x(), 100)
new_height = max(event.pos().y(), 80) nh = max(event.pos().y(), 80)
self.resize(new_width, new_height) self.resize(nw, nh)
elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset: elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset:
self.move(event.globalPos() - self.drag_offset) self.move(event.globalPos() - self.drag_offset)
@ -141,117 +163,81 @@ class ScreenshotRegion(QtWidgets.QWidget):
geo = self.geometry() geo = self.geometry()
return geo.x(), geo.y(), geo.width(), geo.height() return geo.x(), geo.y(), geo.width(), geo.height()
# ---------------- Region UI Handler ---------------- # ---------------- Helper Functions ----------------
class RegionLauncher(QtCore.QObject): app = None
trigger = QtCore.pyqtSignal(int, int, int, int) overlay_widgets = {}
def __init__(self, node_id): def create_overlay(node_id, region):
super().__init__() if node_id in overlay_widgets:
self.node_id = node_id
self.trigger.connect(self.handle)
def handle(self, x, y, w, h):
print(f"[Overlay] Launching overlay for {self.node_id} at ({x},{y},{w},{h})")
if self.node_id in overlay_widgets:
return return
widget = ScreenshotRegion(self.node_id, x, y, w, h) x, y, w, h = region
overlay_widgets[self.node_id] = widget widget = ScreenshotRegion(node_id, x, y, w, h)
overlay_widgets[node_id] = widget
widget.show() widget.show()
# ---------------- GUI Thread Helpers ---------------- def get_overlay_geometry(node_id):
def gui_create_launcher(node_id, x, y, w, h):
launcher = RegionLauncher(node_id)
region_launchers[node_id] = launcher
launcher.handle(x, y, w, h)
def gui_update_widget(node_id, x, y, w, h, visible):
widget = overlay_widgets.get(node_id) widget = overlay_widgets.get(node_id)
if widget: if widget:
widget.setGeometry(x, y, w, h) return widget.get_geometry()
widget.setVisible(visible) return (0, 0, 0, 0)
# ---------------- Role Management ---------------- # ---------------- Screenshot Task ----------------
def stop_all_roles(): async def screenshot_task(cfg):
for node_id, thread in running_threads.items(): interval = cfg.get('interval', 1000) / 1000.0
if thread and thread.is_alive(): node_id = cfg.get('node_id')
print(f"[Role] Terminating previous task: {node_id}") region = (cfg.get('x', 100), cfg.get('y', 100), cfg.get('w', 300), cfg.get('h', 200))
running_roles.clear() create_overlay(node_id, region)
running_threads.clear() loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=CONFIG.data.get('max_workers', 8))
def start_role_thread(role_cfg):
role = role_cfg.get("role")
node_id = role_cfg.get("node_id")
if not role or not node_id:
print("[ERROR] Invalid role configuration (missing role or node_id).")
return
if role == "screenshot":
thread = threading.Thread(target=run_screenshot_loop, args=(node_id, role_cfg), daemon=True)
else:
print(f"[SKIP] Unknown role: {role}")
return
running_roles[node_id] = role_cfg
running_threads[node_id] = thread
thread.start()
print(f"[Role] Started task: {role} ({node_id})")
# ---------------- Screenshot Role Loop ----------------
def run_screenshot_loop(node_id, cfg):
interval = cfg.get("interval", 1000)
visible = cfg.get("visible", True)
x = cfg.get("x", 100)
y = cfg.get("y", 100)
w = cfg.get("w", 300)
h = cfg.get("h", 200)
# Schedule launcher creation in GUI thread
if node_id not in region_launchers:
QtCore.QTimer.singleShot(0, lambda nid=node_id, xx=x, yy=y, ww=w, hh=h: gui_create_launcher(nid, xx, yy, ww, hh))
while True:
try: try:
# Use current widget geometry if available (after user moves/resizes) while True:
if node_id in overlay_widgets: x, y, w, h = get_overlay_geometry(node_id)
widget = overlay_widgets[node_id] grab = partial(ImageGrab.grab, bbox=(x, y, x + w, y + h))
x, y, w, h = widget.get_geometry() img = await loop.run_in_executor(executor, grab)
buf = BytesIO()
print(f"[Capture] Screenshot task {node_id} at ({x},{y},{w},{h})") img.save(buf, format='PNG')
img = ImageGrab.grab(bbox=(x, y, x + w, y + h)) encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
buffer = BytesIO() await sio.emit('agent_screenshot_task', {
img.save(buffer, format="PNG") 'agent_id': AGENT_ID,
encoded = base64.b64encode(buffer.getvalue()).decode("utf-8") 'node_id': node_id,
'image_base64': encoded
sio.emit("agent_screenshot_task", {
"agent_id": AGENT_ID,
"node_id": node_id,
"image_base64": encoded
}) })
await asyncio.sleep(interval)
# Schedule any visibility or geometry updates in GUI thread except asyncio.CancelledError:
QtCore.QTimer.singleShot( return
0,
lambda nid=node_id, xx=x, yy=y, ww=w, hh=h, vis=visible: gui_update_widget(nid, xx, yy, ww, hh, vis)
)
except Exception as e: except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}") print(f"[ERROR] Screenshot task {node_id} failed: {e}")
time.sleep(interval / 1000) # ---------------- Config Watcher ----------------
async def config_watcher():
while True:
if CONFIG.watch():
# settings updated, e.g., executor pool size will apply on next task run
pass
await asyncio.sleep(CONFIG.data.get('config_watch_interval', 2))
# ---------------- Main ---------------- # //////////////////////////////////////////////////////////////////////////
if __name__ == "__main__": # CORE SECTION: MAIN & EVENT LOOP (do not modify unless you know what youre doing)
app_instance = QtWidgets.QApplication(sys.argv) # //////////////////////////////////////////////////////////////////////////
retry_interval = 5 # seconds between connection attempts async def connect_loop():
retry = 5
def connect_loop():
while True: while True:
try: try:
print(f"[WebSocket] Connecting to {SERVER_URL}...") print(f"[WebSocket] Connecting to {CONFIG.data['SERVER_URL']}...")
sio.connect(SERVER_URL, transports=["websocket"], wait=False) await sio.connect(CONFIG.data['SERVER_URL'], transports=['websocket'])
break break
except Exception: except Exception:
print(f"[WebSocket] Borealis Server is Not Running - Retrying in {retry_interval} seconds...") print(f"[WebSocket] Server not available, retrying in {retry}s...")
time.sleep(retry_interval) await asyncio.sleep(retry)
threading.Thread(target=connect_loop, daemon=True).start() if __name__ == '__main__':
sys.exit(app_instance.exec_()) app = QtWidgets.QApplication(sys.argv)
loop = QEventLoop(app)
asyncio.set_event_loop(loop)
with loop:
loop.create_task(config_watcher())
loop.create_task(connect_loop())
loop.run_forever()
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: MAIN & EVENT LOOP
# //////////////////////////////////////////////////////////////////////////