Fixed Invisible Screenshot Agent Overlay

This commit is contained in:
2025-05-02 21:31:24 -06:00
parent e44425dd37
commit 536e838970
2 changed files with 160 additions and 172 deletions

View File

@ -3,9 +3,11 @@ requests
python-socketio python-socketio
websocket-client websocket-client
eventlet eventlet
aiohttp
# GUI-related dependencies (Qt for GUI components) # GUI-related dependencies (Qt for GUI components)
PyQt5 PyQt5
qasync
# Computer Vision & OCR Dependencies # Computer Vision & OCR Dependencies
Pillow Pillow

View File

@ -2,87 +2,113 @@
import sys import sys
import uuid import uuid
import time
import base64
import threading
import socketio
from io import BytesIO
import socket import socket
import os import os
import json import json
import asyncio
import concurrent.futures
from functools import partial
from io import BytesIO
import base64
import socketio
from qasync import QEventLoop
from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5 import QtCore, QtGui, QtWidgets
from PIL import ImageGrab from PIL import ImageGrab
# ---------------- Configuration ---------------- # //////////////////////////////////////////////////////////////////////////
# CORE SECTION: CONFIG MANAGER (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json") CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json")
DEFAULT_SERVER_URL = "http://localhost:5000" DEFAULT_CONFIG = {
"SERVER_URL": "http://localhost:5000",
"max_workers": 8,
"config_watch_interval": 2
}
def load_config(): class ConfigManager:
""" def __init__(self, path):
Load agent_settings.json or prompt the user for SERVER_URL. self.path = path
Returns a config dictionary with at least {"SERVER_URL": ...} self._last_mtime = None
""" self.data = {}
config = {} self.load()
if os.path.exists(CONFIG_PATH): def load(self):
if not os.path.exists(self.path):
self.data = DEFAULT_CONFIG.copy()
self._write()
else:
try:
with open(self.path, 'r') as f:
self.data = json.load(f)
except Exception as e:
print(f"[WARN] Failed to parse config: {e}")
self.data = DEFAULT_CONFIG.copy()
try: try:
with open(CONFIG_PATH, "r") as f: self._last_mtime = os.path.getmtime(self.path)
config = json.load(f) except Exception:
if isinstance(config, dict) and "SERVER_URL" in config: self._last_mtime = None
return config
def _write(self):
try:
with open(self.path, 'w') as f:
json.dump(self.data, f, indent=2)
except Exception as e: except Exception as e:
print(f"[WARN] Failed to parse agent_settings.json: {e}") print(f"[ERROR] Could not write config: {e}")
try: def watch(self):
user_input = input(f"Enter Borealis Server URL [{DEFAULT_SERVER_URL}]: ").strip() try:
config["SERVER_URL"] = user_input if user_input else DEFAULT_SERVER_URL mtime = os.path.getmtime(self.path)
with open(CONFIG_PATH, "w") as f: if self._last_mtime is None or mtime != self._last_mtime:
json.dump(config, f, indent=2) print("[CONFIG] Detected config change, reloading.")
except Exception as e: self.load()
print(f"[ERROR] Prompt failed: {e}") return True
config["SERVER_URL"] = DEFAULT_SERVER_URL except Exception:
pass
return False
return config CONFIG = ConfigManager(CONFIG_PATH)
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: CONFIG MANAGER
# //////////////////////////////////////////////////////////////////////////
config = load_config() # //////////////////////////////////////////////////////////////////////////
SERVER_URL = config["SERVER_URL"] # CORE SECTION: WEBSOCKET SETUP & HANDLERS (do not modify unless absolutely necessary)
# //////////////////////////////////////////////////////////////////////////
AGENT_ID = f"{socket.gethostname().lower()}-agent-{uuid.uuid4().hex[:8]}"
HOSTNAME = socket.gethostname().lower() sio = socketio.AsyncClient(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
RANDOM_SUFFIX = uuid.uuid4().hex[:8] role_tasks = {}
AGENT_ID = f"{HOSTNAME}-agent-{RANDOM_SUFFIX}"
# ---------------- App State ----------------
app_instance = None
overlay_widgets = {}
region_launchers = {}
running_roles = {}
running_threads = {}
# ---------------- Socket Setup ----------------
# Enable automatic reconnection with retries in background
sio = socketio.Client(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
@sio.event @sio.event
def connect(): async def connect():
print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.") print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.")
sio.emit('connect_agent', {"agent_id": AGENT_ID, "hostname": HOSTNAME}) await sio.emit('connect_agent', {"agent_id": AGENT_ID})
sio.emit('request_config', {"agent_id": AGENT_ID}) await sio.emit('request_config', {"agent_id": AGENT_ID})
@sio.event @sio.event
def disconnect(): async def disconnect():
print("[WebSocket] Lost connection to Borealis server.") print("[WebSocket] Lost connection to Borealis server.")
@sio.on('agent_config') @sio.on('agent_config')
def on_agent_config(config): async def on_agent_config(cfg):
print("[PROVISIONED] Received new configuration from Borealis.") print("[PROVISIONED] Received new configuration from Borealis.")
# cancel existing role tasks
for task in list(role_tasks.values()):
task.cancel()
role_tasks.clear()
# start new tasks
for role_cfg in cfg.get('roles', []):
role = role_cfg.get('role')
node_id = role_cfg.get('node_id')
if role == 'screenshot' and node_id:
task = asyncio.create_task(screenshot_task(role_cfg))
role_tasks[node_id] = task
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: WEBSOCKET SETUP & HANDLERS
# //////////////////////////////////////////////////////////////////////////
roles = config.get("roles", []) # ---------------- Overlay Widget ----------------
stop_all_roles()
for role in roles:
start_role_thread(role)
# ---------------- Overlay Class ----------------
class ScreenshotRegion(QtWidgets.QWidget): class ScreenshotRegion(QtWidgets.QWidget):
def __init__(self, node_id, x=100, y=100, w=300, h=200): def __init__(self, node_id, x=100, y=100, w=300, h=200):
super().__init__() super().__init__()
@ -99,7 +125,6 @@ class ScreenshotRegion(QtWidgets.QWidget):
self.label.setText(f"{node_id[:8]}") self.label.setText(f"{node_id[:8]}")
self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;") self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;")
self.label.move(8, 4) self.label.move(8, 4)
self.setMouseTracking(True) self.setMouseTracking(True)
def paintEvent(self, event): def paintEvent(self, event):
@ -108,28 +133,25 @@ class ScreenshotRegion(QtWidgets.QWidget):
painter.setBrush(QtCore.Qt.transparent) painter.setBrush(QtCore.Qt.transparent)
painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2)) painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2))
painter.drawRect(self.rect()) painter.drawRect(self.rect())
handle = QtCore.QRect(self.width() - self.resize_handle_size,
handle_rect = QtCore.QRect( self.height() - self.resize_handle_size,
self.width() - self.resize_handle_size, self.resize_handle_size, self.resize_handle_size)
self.height() - self.resize_handle_size, painter.fillRect(handle, QtGui.QColor(0, 255, 0))
self.resize_handle_size,
self.resize_handle_size
)
painter.fillRect(handle_rect, QtGui.QColor(0, 255, 0))
def mousePressEvent(self, event): def mousePressEvent(self, event):
if event.button() == QtCore.Qt.LeftButton: if event.button() == QtCore.Qt.LeftButton:
if event.pos().x() > self.width() - self.resize_handle_size and \ px, py = event.pos().x(), event.pos().y()
event.pos().y() > self.height() - self.resize_handle_size: if px > self.width() - self.resize_handle_size and \
py > self.height() - self.resize_handle_size:
self.resizing = True self.resizing = True
else: else:
self.drag_offset = event.globalPos() - self.frameGeometry().topLeft() self.drag_offset = event.globalPos() - self.frameGeometry().topLeft()
def mouseMoveEvent(self, event): def mouseMoveEvent(self, event):
if self.resizing: if self.resizing:
new_width = max(event.pos().x(), 100) nw = max(event.pos().x(), 100)
new_height = max(event.pos().y(), 80) nh = max(event.pos().y(), 80)
self.resize(new_width, new_height) self.resize(nw, nh)
elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset: elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset:
self.move(event.globalPos() - self.drag_offset) self.move(event.globalPos() - self.drag_offset)
@ -141,117 +163,81 @@ class ScreenshotRegion(QtWidgets.QWidget):
geo = self.geometry() geo = self.geometry()
return geo.x(), geo.y(), geo.width(), geo.height() return geo.x(), geo.y(), geo.width(), geo.height()
# ---------------- Region UI Handler ---------------- # ---------------- Helper Functions ----------------
class RegionLauncher(QtCore.QObject): app = None
trigger = QtCore.pyqtSignal(int, int, int, int) overlay_widgets = {}
def __init__(self, node_id): def create_overlay(node_id, region):
super().__init__() if node_id in overlay_widgets:
self.node_id = node_id return
self.trigger.connect(self.handle) x, y, w, h = region
widget = ScreenshotRegion(node_id, x, y, w, h)
overlay_widgets[node_id] = widget
widget.show()
def handle(self, x, y, w, h): def get_overlay_geometry(node_id):
print(f"[Overlay] Launching overlay for {self.node_id} at ({x},{y},{w},{h})")
if self.node_id in overlay_widgets:
return
widget = ScreenshotRegion(self.node_id, x, y, w, h)
overlay_widgets[self.node_id] = widget
widget.show()
# ---------------- GUI Thread Helpers ----------------
def gui_create_launcher(node_id, x, y, w, h):
launcher = RegionLauncher(node_id)
region_launchers[node_id] = launcher
launcher.handle(x, y, w, h)
def gui_update_widget(node_id, x, y, w, h, visible):
widget = overlay_widgets.get(node_id) widget = overlay_widgets.get(node_id)
if widget: if widget:
widget.setGeometry(x, y, w, h) return widget.get_geometry()
widget.setVisible(visible) return (0, 0, 0, 0)
# ---------------- Role Management ---------------- # ---------------- Screenshot Task ----------------
def stop_all_roles(): async def screenshot_task(cfg):
for node_id, thread in running_threads.items(): interval = cfg.get('interval', 1000) / 1000.0
if thread and thread.is_alive(): node_id = cfg.get('node_id')
print(f"[Role] Terminating previous task: {node_id}") region = (cfg.get('x', 100), cfg.get('y', 100), cfg.get('w', 300), cfg.get('h', 200))
running_roles.clear() create_overlay(node_id, region)
running_threads.clear() loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=CONFIG.data.get('max_workers', 8))
def start_role_thread(role_cfg): try:
role = role_cfg.get("role") while True:
node_id = role_cfg.get("node_id") x, y, w, h = get_overlay_geometry(node_id)
if not role or not node_id: grab = partial(ImageGrab.grab, bbox=(x, y, x + w, y + h))
print("[ERROR] Invalid role configuration (missing role or node_id).") img = await loop.run_in_executor(executor, grab)
buf = BytesIO()
img.save(buf, format='PNG')
encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
await sio.emit('agent_screenshot_task', {
'agent_id': AGENT_ID,
'node_id': node_id,
'image_base64': encoded
})
await asyncio.sleep(interval)
except asyncio.CancelledError:
return return
except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}")
if role == "screenshot": # ---------------- Config Watcher ----------------
thread = threading.Thread(target=run_screenshot_loop, args=(node_id, role_cfg), daemon=True) async def config_watcher():
else: while True:
print(f"[SKIP] Unknown role: {role}") if CONFIG.watch():
return # settings updated, e.g., executor pool size will apply on next task run
pass
running_roles[node_id] = role_cfg await asyncio.sleep(CONFIG.data.get('config_watch_interval', 2))
running_threads[node_id] = thread
thread.start()
print(f"[Role] Started task: {role} ({node_id})")
# ---------------- Screenshot Role Loop ----------------
def run_screenshot_loop(node_id, cfg):
interval = cfg.get("interval", 1000)
visible = cfg.get("visible", True)
x = cfg.get("x", 100)
y = cfg.get("y", 100)
w = cfg.get("w", 300)
h = cfg.get("h", 200)
# Schedule launcher creation in GUI thread
if node_id not in region_launchers:
QtCore.QTimer.singleShot(0, lambda nid=node_id, xx=x, yy=y, ww=w, hh=h: gui_create_launcher(nid, xx, yy, ww, hh))
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: MAIN & EVENT LOOP (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
async def connect_loop():
retry = 5
while True: while True:
try: try:
# Use current widget geometry if available (after user moves/resizes) print(f"[WebSocket] Connecting to {CONFIG.data['SERVER_URL']}...")
if node_id in overlay_widgets: await sio.connect(CONFIG.data['SERVER_URL'], transports=['websocket'])
widget = overlay_widgets[node_id] break
x, y, w, h = widget.get_geometry() except Exception:
print(f"[WebSocket] Server not available, retrying in {retry}s...")
await asyncio.sleep(retry)
print(f"[Capture] Screenshot task {node_id} at ({x},{y},{w},{h})") if __name__ == '__main__':
img = ImageGrab.grab(bbox=(x, y, x + w, y + h)) app = QtWidgets.QApplication(sys.argv)
buffer = BytesIO() loop = QEventLoop(app)
img.save(buffer, format="PNG") asyncio.set_event_loop(loop)
encoded = base64.b64encode(buffer.getvalue()).decode("utf-8") with loop:
loop.create_task(config_watcher())
sio.emit("agent_screenshot_task", { loop.create_task(connect_loop())
"agent_id": AGENT_ID, loop.run_forever()
"node_id": node_id, # //////////////////////////////////////////////////////////////////////////
"image_base64": encoded # END CORE SECTION: MAIN & EVENT LOOP
}) # //////////////////////////////////////////////////////////////////////////
# Schedule any visibility or geometry updates in GUI thread
QtCore.QTimer.singleShot(
0,
lambda nid=node_id, xx=x, yy=y, ww=w, hh=h, vis=visible: gui_update_widget(nid, xx, yy, ww, hh, vis)
)
except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}")
time.sleep(interval / 1000)
# ---------------- Main ----------------
if __name__ == "__main__":
app_instance = QtWidgets.QApplication(sys.argv)
retry_interval = 5 # seconds between connection attempts
def connect_loop():
while True:
try:
print(f"[WebSocket] Connecting to {SERVER_URL}...")
sio.connect(SERVER_URL, transports=["websocket"], wait=False)
break
except Exception:
print(f"[WebSocket] Borealis Server is Not Running - Retrying in {retry_interval} seconds...")
time.sleep(retry_interval)
threading.Thread(target=connect_loop, daemon=True).start()
sys.exit(app_instance.exec_())