Fixed Invisible Screenshot Agent Overlay

This commit is contained in:
Nicole Rappe 2025-05-02 21:31:24 -06:00
parent e44425dd37
commit 536e838970
2 changed files with 160 additions and 172 deletions

View File

@ -3,9 +3,11 @@ requests
python-socketio
websocket-client
eventlet
aiohttp
# GUI-related dependencies (Qt for GUI components)
PyQt5
qasync
# Computer Vision & OCR Dependencies
Pillow

View File

@ -2,87 +2,113 @@
import sys
import uuid
import time
import base64
import threading
import socketio
from io import BytesIO
import socket
import os
import json
import asyncio
import concurrent.futures
from functools import partial
from io import BytesIO
import base64
import socketio
from qasync import QEventLoop
from PyQt5 import QtCore, QtGui, QtWidgets
from PIL import ImageGrab
# ---------------- Configuration ----------------
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: CONFIG MANAGER (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json")
DEFAULT_SERVER_URL = "http://localhost:5000"
DEFAULT_CONFIG = {
"SERVER_URL": "http://localhost:5000",
"max_workers": 8,
"config_watch_interval": 2
}
def load_config():
"""
Load agent_settings.json or prompt the user for SERVER_URL.
Returns a config dictionary with at least {"SERVER_URL": ...}
"""
config = {}
class ConfigManager:
def __init__(self, path):
self.path = path
self._last_mtime = None
self.data = {}
self.load()
if os.path.exists(CONFIG_PATH):
def load(self):
if not os.path.exists(self.path):
self.data = DEFAULT_CONFIG.copy()
self._write()
else:
try:
with open(self.path, 'r') as f:
self.data = json.load(f)
except Exception as e:
print(f"[WARN] Failed to parse config: {e}")
self.data = DEFAULT_CONFIG.copy()
try:
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
if isinstance(config, dict) and "SERVER_URL" in config:
return config
self._last_mtime = os.path.getmtime(self.path)
except Exception:
self._last_mtime = None
def _write(self):
try:
with open(self.path, 'w') as f:
json.dump(self.data, f, indent=2)
except Exception as e:
print(f"[WARN] Failed to parse agent_settings.json: {e}")
print(f"[ERROR] Could not write config: {e}")
try:
user_input = input(f"Enter Borealis Server URL [{DEFAULT_SERVER_URL}]: ").strip()
config["SERVER_URL"] = user_input if user_input else DEFAULT_SERVER_URL
with open(CONFIG_PATH, "w") as f:
json.dump(config, f, indent=2)
except Exception as e:
print(f"[ERROR] Prompt failed: {e}")
config["SERVER_URL"] = DEFAULT_SERVER_URL
def watch(self):
try:
mtime = os.path.getmtime(self.path)
if self._last_mtime is None or mtime != self._last_mtime:
print("[CONFIG] Detected config change, reloading.")
self.load()
return True
except Exception:
pass
return False
return config
CONFIG = ConfigManager(CONFIG_PATH)
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: CONFIG MANAGER
# //////////////////////////////////////////////////////////////////////////
config = load_config()
SERVER_URL = config["SERVER_URL"]
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: WEBSOCKET SETUP & HANDLERS (do not modify unless absolutely necessary)
# //////////////////////////////////////////////////////////////////////////
AGENT_ID = f"{socket.gethostname().lower()}-agent-{uuid.uuid4().hex[:8]}"
HOSTNAME = socket.gethostname().lower()
RANDOM_SUFFIX = uuid.uuid4().hex[:8]
AGENT_ID = f"{HOSTNAME}-agent-{RANDOM_SUFFIX}"
# ---------------- App State ----------------
app_instance = None
overlay_widgets = {}
region_launchers = {}
running_roles = {}
running_threads = {}
# ---------------- Socket Setup ----------------
# Enable automatic reconnection with retries in background
sio = socketio.Client(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
sio = socketio.AsyncClient(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
role_tasks = {}
@sio.event
def connect():
async def connect():
print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.")
sio.emit('connect_agent', {"agent_id": AGENT_ID, "hostname": HOSTNAME})
sio.emit('request_config', {"agent_id": AGENT_ID})
await sio.emit('connect_agent', {"agent_id": AGENT_ID})
await sio.emit('request_config', {"agent_id": AGENT_ID})
@sio.event
def disconnect():
async def disconnect():
print("[WebSocket] Lost connection to Borealis server.")
@sio.on('agent_config')
def on_agent_config(config):
async def on_agent_config(cfg):
print("[PROVISIONED] Received new configuration from Borealis.")
# cancel existing role tasks
for task in list(role_tasks.values()):
task.cancel()
role_tasks.clear()
# start new tasks
for role_cfg in cfg.get('roles', []):
role = role_cfg.get('role')
node_id = role_cfg.get('node_id')
if role == 'screenshot' and node_id:
task = asyncio.create_task(screenshot_task(role_cfg))
role_tasks[node_id] = task
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: WEBSOCKET SETUP & HANDLERS
# //////////////////////////////////////////////////////////////////////////
roles = config.get("roles", [])
stop_all_roles()
for role in roles:
start_role_thread(role)
# ---------------- Overlay Class ----------------
# ---------------- Overlay Widget ----------------
class ScreenshotRegion(QtWidgets.QWidget):
def __init__(self, node_id, x=100, y=100, w=300, h=200):
super().__init__()
@ -99,7 +125,6 @@ class ScreenshotRegion(QtWidgets.QWidget):
self.label.setText(f"{node_id[:8]}")
self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;")
self.label.move(8, 4)
self.setMouseTracking(True)
def paintEvent(self, event):
@ -108,28 +133,25 @@ class ScreenshotRegion(QtWidgets.QWidget):
painter.setBrush(QtCore.Qt.transparent)
painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2))
painter.drawRect(self.rect())
handle_rect = QtCore.QRect(
self.width() - self.resize_handle_size,
self.height() - self.resize_handle_size,
self.resize_handle_size,
self.resize_handle_size
)
painter.fillRect(handle_rect, QtGui.QColor(0, 255, 0))
handle = QtCore.QRect(self.width() - self.resize_handle_size,
self.height() - self.resize_handle_size,
self.resize_handle_size, self.resize_handle_size)
painter.fillRect(handle, QtGui.QColor(0, 255, 0))
def mousePressEvent(self, event):
if event.button() == QtCore.Qt.LeftButton:
if event.pos().x() > self.width() - self.resize_handle_size and \
event.pos().y() > self.height() - self.resize_handle_size:
px, py = event.pos().x(), event.pos().y()
if px > self.width() - self.resize_handle_size and \
py > self.height() - self.resize_handle_size:
self.resizing = True
else:
self.drag_offset = event.globalPos() - self.frameGeometry().topLeft()
def mouseMoveEvent(self, event):
if self.resizing:
new_width = max(event.pos().x(), 100)
new_height = max(event.pos().y(), 80)
self.resize(new_width, new_height)
nw = max(event.pos().x(), 100)
nh = max(event.pos().y(), 80)
self.resize(nw, nh)
elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset:
self.move(event.globalPos() - self.drag_offset)
@ -141,117 +163,81 @@ class ScreenshotRegion(QtWidgets.QWidget):
geo = self.geometry()
return geo.x(), geo.y(), geo.width(), geo.height()
# ---------------- Region UI Handler ----------------
class RegionLauncher(QtCore.QObject):
trigger = QtCore.pyqtSignal(int, int, int, int)
# ---------------- Helper Functions ----------------
app = None
overlay_widgets = {}
def __init__(self, node_id):
super().__init__()
self.node_id = node_id
self.trigger.connect(self.handle)
def create_overlay(node_id, region):
if node_id in overlay_widgets:
return
x, y, w, h = region
widget = ScreenshotRegion(node_id, x, y, w, h)
overlay_widgets[node_id] = widget
widget.show()
def handle(self, x, y, w, h):
print(f"[Overlay] Launching overlay for {self.node_id} at ({x},{y},{w},{h})")
if self.node_id in overlay_widgets:
return
widget = ScreenshotRegion(self.node_id, x, y, w, h)
overlay_widgets[self.node_id] = widget
widget.show()
# ---------------- GUI Thread Helpers ----------------
def gui_create_launcher(node_id, x, y, w, h):
launcher = RegionLauncher(node_id)
region_launchers[node_id] = launcher
launcher.handle(x, y, w, h)
def gui_update_widget(node_id, x, y, w, h, visible):
def get_overlay_geometry(node_id):
widget = overlay_widgets.get(node_id)
if widget:
widget.setGeometry(x, y, w, h)
widget.setVisible(visible)
return widget.get_geometry()
return (0, 0, 0, 0)
# ---------------- Role Management ----------------
def stop_all_roles():
for node_id, thread in running_threads.items():
if thread and thread.is_alive():
print(f"[Role] Terminating previous task: {node_id}")
running_roles.clear()
running_threads.clear()
def start_role_thread(role_cfg):
role = role_cfg.get("role")
node_id = role_cfg.get("node_id")
if not role or not node_id:
print("[ERROR] Invalid role configuration (missing role or node_id).")
# ---------------- Screenshot Task ----------------
async def screenshot_task(cfg):
interval = cfg.get('interval', 1000) / 1000.0
node_id = cfg.get('node_id')
region = (cfg.get('x', 100), cfg.get('y', 100), cfg.get('w', 300), cfg.get('h', 200))
create_overlay(node_id, region)
loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=CONFIG.data.get('max_workers', 8))
try:
while True:
x, y, w, h = get_overlay_geometry(node_id)
grab = partial(ImageGrab.grab, bbox=(x, y, x + w, y + h))
img = await loop.run_in_executor(executor, grab)
buf = BytesIO()
img.save(buf, format='PNG')
encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
await sio.emit('agent_screenshot_task', {
'agent_id': AGENT_ID,
'node_id': node_id,
'image_base64': encoded
})
await asyncio.sleep(interval)
except asyncio.CancelledError:
return
except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}")
if role == "screenshot":
thread = threading.Thread(target=run_screenshot_loop, args=(node_id, role_cfg), daemon=True)
else:
print(f"[SKIP] Unknown role: {role}")
return
running_roles[node_id] = role_cfg
running_threads[node_id] = thread
thread.start()
print(f"[Role] Started task: {role} ({node_id})")
# ---------------- Screenshot Role Loop ----------------
def run_screenshot_loop(node_id, cfg):
interval = cfg.get("interval", 1000)
visible = cfg.get("visible", True)
x = cfg.get("x", 100)
y = cfg.get("y", 100)
w = cfg.get("w", 300)
h = cfg.get("h", 200)
# Schedule launcher creation in GUI thread
if node_id not in region_launchers:
QtCore.QTimer.singleShot(0, lambda nid=node_id, xx=x, yy=y, ww=w, hh=h: gui_create_launcher(nid, xx, yy, ww, hh))
# ---------------- Config Watcher ----------------
async def config_watcher():
while True:
if CONFIG.watch():
# settings updated, e.g., executor pool size will apply on next task run
pass
await asyncio.sleep(CONFIG.data.get('config_watch_interval', 2))
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: MAIN & EVENT LOOP (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
async def connect_loop():
retry = 5
while True:
try:
# Use current widget geometry if available (after user moves/resizes)
if node_id in overlay_widgets:
widget = overlay_widgets[node_id]
x, y, w, h = widget.get_geometry()
print(f"[WebSocket] Connecting to {CONFIG.data['SERVER_URL']}...")
await sio.connect(CONFIG.data['SERVER_URL'], transports=['websocket'])
break
except Exception:
print(f"[WebSocket] Server not available, retrying in {retry}s...")
await asyncio.sleep(retry)
print(f"[Capture] Screenshot task {node_id} at ({x},{y},{w},{h})")
img = ImageGrab.grab(bbox=(x, y, x + w, y + h))
buffer = BytesIO()
img.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
sio.emit("agent_screenshot_task", {
"agent_id": AGENT_ID,
"node_id": node_id,
"image_base64": encoded
})
# Schedule any visibility or geometry updates in GUI thread
QtCore.QTimer.singleShot(
0,
lambda nid=node_id, xx=x, yy=y, ww=w, hh=h, vis=visible: gui_update_widget(nid, xx, yy, ww, hh, vis)
)
except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}")
time.sleep(interval / 1000)
# ---------------- Main ----------------
if __name__ == "__main__":
app_instance = QtWidgets.QApplication(sys.argv)
retry_interval = 5 # seconds between connection attempts
def connect_loop():
while True:
try:
print(f"[WebSocket] Connecting to {SERVER_URL}...")
sio.connect(SERVER_URL, transports=["websocket"], wait=False)
break
except Exception:
print(f"[WebSocket] Borealis Server is Not Running - Retrying in {retry_interval} seconds...")
time.sleep(retry_interval)
threading.Thread(target=connect_loop, daemon=True).start()
sys.exit(app_instance.exec_())
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
loop = QEventLoop(app)
asyncio.set_event_loop(loop)
with loop:
loop.create_task(config_watcher())
loop.create_task(connect_loop())
loop.run_forever()
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: MAIN & EVENT LOOP
# //////////////////////////////////////////////////////////////////////////