Fixed Invisible Screenshot Agent Overlay

This commit is contained in:
2025-05-02 21:31:24 -06:00
parent e44425dd37
commit 536e838970
2 changed files with 160 additions and 172 deletions

View File

@ -3,9 +3,11 @@ requests
python-socketio
websocket-client
eventlet
aiohttp
# GUI-related dependencies (Qt for GUI components)
PyQt5
qasync
# Computer Vision & OCR Dependencies
Pillow

View File

@ -2,87 +2,113 @@
import sys
import uuid
import time
import base64
import threading
import socketio
from io import BytesIO
import socket
import os
import json
import asyncio
import concurrent.futures
from functools import partial
from io import BytesIO
import base64
import socketio
from qasync import QEventLoop
from PyQt5 import QtCore, QtGui, QtWidgets
from PIL import ImageGrab
# ---------------- Configuration ----------------
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: CONFIG MANAGER (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
CONFIG_PATH = os.path.join(os.path.dirname(__file__), "agent_settings.json")
DEFAULT_SERVER_URL = "http://localhost:5000"
DEFAULT_CONFIG = {
"SERVER_URL": "http://localhost:5000",
"max_workers": 8,
"config_watch_interval": 2
}
def load_config():
"""
Load agent_settings.json or prompt the user for SERVER_URL.
Returns a config dictionary with at least {"SERVER_URL": ...}
"""
config = {}
class ConfigManager:
def __init__(self, path):
self.path = path
self._last_mtime = None
self.data = {}
self.load()
if os.path.exists(CONFIG_PATH):
def load(self):
if not os.path.exists(self.path):
self.data = DEFAULT_CONFIG.copy()
self._write()
else:
try:
with open(CONFIG_PATH, "r") as f:
config = json.load(f)
if isinstance(config, dict) and "SERVER_URL" in config:
return config
with open(self.path, 'r') as f:
self.data = json.load(f)
except Exception as e:
print(f"[WARN] Failed to parse agent_settings.json: {e}")
print(f"[WARN] Failed to parse config: {e}")
self.data = DEFAULT_CONFIG.copy()
try:
user_input = input(f"Enter Borealis Server URL [{DEFAULT_SERVER_URL}]: ").strip()
config["SERVER_URL"] = user_input if user_input else DEFAULT_SERVER_URL
with open(CONFIG_PATH, "w") as f:
json.dump(config, f, indent=2)
self._last_mtime = os.path.getmtime(self.path)
except Exception:
self._last_mtime = None
def _write(self):
try:
with open(self.path, 'w') as f:
json.dump(self.data, f, indent=2)
except Exception as e:
print(f"[ERROR] Prompt failed: {e}")
config["SERVER_URL"] = DEFAULT_SERVER_URL
print(f"[ERROR] Could not write config: {e}")
return config
def watch(self):
try:
mtime = os.path.getmtime(self.path)
if self._last_mtime is None or mtime != self._last_mtime:
print("[CONFIG] Detected config change, reloading.")
self.load()
return True
except Exception:
pass
return False
config = load_config()
SERVER_URL = config["SERVER_URL"]
CONFIG = ConfigManager(CONFIG_PATH)
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: CONFIG MANAGER
# //////////////////////////////////////////////////////////////////////////
HOSTNAME = socket.gethostname().lower()
RANDOM_SUFFIX = uuid.uuid4().hex[:8]
AGENT_ID = f"{HOSTNAME}-agent-{RANDOM_SUFFIX}"
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: WEBSOCKET SETUP & HANDLERS (do not modify unless absolutely necessary)
# //////////////////////////////////////////////////////////////////////////
AGENT_ID = f"{socket.gethostname().lower()}-agent-{uuid.uuid4().hex[:8]}"
# ---------------- App State ----------------
app_instance = None
overlay_widgets = {}
region_launchers = {}
running_roles = {}
running_threads = {}
# ---------------- Socket Setup ----------------
# Enable automatic reconnection with retries in background
sio = socketio.Client(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
sio = socketio.AsyncClient(reconnection=True, reconnection_attempts=0, reconnection_delay=5)
role_tasks = {}
@sio.event
def connect():
async def connect():
print(f"[WebSocket] Agent ID: {AGENT_ID} connected to Borealis.")
sio.emit('connect_agent', {"agent_id": AGENT_ID, "hostname": HOSTNAME})
sio.emit('request_config', {"agent_id": AGENT_ID})
await sio.emit('connect_agent', {"agent_id": AGENT_ID})
await sio.emit('request_config', {"agent_id": AGENT_ID})
@sio.event
def disconnect():
async def disconnect():
print("[WebSocket] Lost connection to Borealis server.")
@sio.on('agent_config')
def on_agent_config(config):
async def on_agent_config(cfg):
print("[PROVISIONED] Received new configuration from Borealis.")
# cancel existing role tasks
for task in list(role_tasks.values()):
task.cancel()
role_tasks.clear()
# start new tasks
for role_cfg in cfg.get('roles', []):
role = role_cfg.get('role')
node_id = role_cfg.get('node_id')
if role == 'screenshot' and node_id:
task = asyncio.create_task(screenshot_task(role_cfg))
role_tasks[node_id] = task
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: WEBSOCKET SETUP & HANDLERS
# //////////////////////////////////////////////////////////////////////////
roles = config.get("roles", [])
stop_all_roles()
for role in roles:
start_role_thread(role)
# ---------------- Overlay Class ----------------
# ---------------- Overlay Widget ----------------
class ScreenshotRegion(QtWidgets.QWidget):
def __init__(self, node_id, x=100, y=100, w=300, h=200):
super().__init__()
@ -99,7 +125,6 @@ class ScreenshotRegion(QtWidgets.QWidget):
self.label.setText(f"{node_id[:8]}")
self.label.setStyleSheet("color: lime; background: transparent; font-size: 10px;")
self.label.move(8, 4)
self.setMouseTracking(True)
def paintEvent(self, event):
@ -108,28 +133,25 @@ class ScreenshotRegion(QtWidgets.QWidget):
painter.setBrush(QtCore.Qt.transparent)
painter.setPen(QtGui.QPen(QtGui.QColor(0, 255, 0), 2))
painter.drawRect(self.rect())
handle_rect = QtCore.QRect(
self.width() - self.resize_handle_size,
handle = QtCore.QRect(self.width() - self.resize_handle_size,
self.height() - self.resize_handle_size,
self.resize_handle_size,
self.resize_handle_size
)
painter.fillRect(handle_rect, QtGui.QColor(0, 255, 0))
self.resize_handle_size, self.resize_handle_size)
painter.fillRect(handle, QtGui.QColor(0, 255, 0))
def mousePressEvent(self, event):
if event.button() == QtCore.Qt.LeftButton:
if event.pos().x() > self.width() - self.resize_handle_size and \
event.pos().y() > self.height() - self.resize_handle_size:
px, py = event.pos().x(), event.pos().y()
if px > self.width() - self.resize_handle_size and \
py > self.height() - self.resize_handle_size:
self.resizing = True
else:
self.drag_offset = event.globalPos() - self.frameGeometry().topLeft()
def mouseMoveEvent(self, event):
if self.resizing:
new_width = max(event.pos().x(), 100)
new_height = max(event.pos().y(), 80)
self.resize(new_width, new_height)
nw = max(event.pos().x(), 100)
nh = max(event.pos().y(), 80)
self.resize(nw, nh)
elif event.buttons() & QtCore.Qt.LeftButton and self.drag_offset:
self.move(event.globalPos() - self.drag_offset)
@ -141,117 +163,81 @@ class ScreenshotRegion(QtWidgets.QWidget):
geo = self.geometry()
return geo.x(), geo.y(), geo.width(), geo.height()
# ---------------- Region UI Handler ----------------
class RegionLauncher(QtCore.QObject):
trigger = QtCore.pyqtSignal(int, int, int, int)
# ---------------- Helper Functions ----------------
app = None
overlay_widgets = {}
def __init__(self, node_id):
super().__init__()
self.node_id = node_id
self.trigger.connect(self.handle)
def handle(self, x, y, w, h):
print(f"[Overlay] Launching overlay for {self.node_id} at ({x},{y},{w},{h})")
if self.node_id in overlay_widgets:
def create_overlay(node_id, region):
if node_id in overlay_widgets:
return
widget = ScreenshotRegion(self.node_id, x, y, w, h)
overlay_widgets[self.node_id] = widget
x, y, w, h = region
widget = ScreenshotRegion(node_id, x, y, w, h)
overlay_widgets[node_id] = widget
widget.show()
# ---------------- GUI Thread Helpers ----------------
def gui_create_launcher(node_id, x, y, w, h):
launcher = RegionLauncher(node_id)
region_launchers[node_id] = launcher
launcher.handle(x, y, w, h)
def gui_update_widget(node_id, x, y, w, h, visible):
def get_overlay_geometry(node_id):
widget = overlay_widgets.get(node_id)
if widget:
widget.setGeometry(x, y, w, h)
widget.setVisible(visible)
return widget.get_geometry()
return (0, 0, 0, 0)
# ---------------- Role Management ----------------
def stop_all_roles():
for node_id, thread in running_threads.items():
if thread and thread.is_alive():
print(f"[Role] Terminating previous task: {node_id}")
running_roles.clear()
running_threads.clear()
def start_role_thread(role_cfg):
role = role_cfg.get("role")
node_id = role_cfg.get("node_id")
if not role or not node_id:
print("[ERROR] Invalid role configuration (missing role or node_id).")
return
if role == "screenshot":
thread = threading.Thread(target=run_screenshot_loop, args=(node_id, role_cfg), daemon=True)
else:
print(f"[SKIP] Unknown role: {role}")
return
running_roles[node_id] = role_cfg
running_threads[node_id] = thread
thread.start()
print(f"[Role] Started task: {role} ({node_id})")
# ---------------- Screenshot Role Loop ----------------
def run_screenshot_loop(node_id, cfg):
interval = cfg.get("interval", 1000)
visible = cfg.get("visible", True)
x = cfg.get("x", 100)
y = cfg.get("y", 100)
w = cfg.get("w", 300)
h = cfg.get("h", 200)
# Schedule launcher creation in GUI thread
if node_id not in region_launchers:
QtCore.QTimer.singleShot(0, lambda nid=node_id, xx=x, yy=y, ww=w, hh=h: gui_create_launcher(nid, xx, yy, ww, hh))
while True:
# ---------------- Screenshot Task ----------------
async def screenshot_task(cfg):
interval = cfg.get('interval', 1000) / 1000.0
node_id = cfg.get('node_id')
region = (cfg.get('x', 100), cfg.get('y', 100), cfg.get('w', 300), cfg.get('h', 200))
create_overlay(node_id, region)
loop = asyncio.get_event_loop()
executor = concurrent.futures.ThreadPoolExecutor(max_workers=CONFIG.data.get('max_workers', 8))
try:
# Use current widget geometry if available (after user moves/resizes)
if node_id in overlay_widgets:
widget = overlay_widgets[node_id]
x, y, w, h = widget.get_geometry()
print(f"[Capture] Screenshot task {node_id} at ({x},{y},{w},{h})")
img = ImageGrab.grab(bbox=(x, y, x + w, y + h))
buffer = BytesIO()
img.save(buffer, format="PNG")
encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
sio.emit("agent_screenshot_task", {
"agent_id": AGENT_ID,
"node_id": node_id,
"image_base64": encoded
while True:
x, y, w, h = get_overlay_geometry(node_id)
grab = partial(ImageGrab.grab, bbox=(x, y, x + w, y + h))
img = await loop.run_in_executor(executor, grab)
buf = BytesIO()
img.save(buf, format='PNG')
encoded = base64.b64encode(buf.getvalue()).decode('utf-8')
await sio.emit('agent_screenshot_task', {
'agent_id': AGENT_ID,
'node_id': node_id,
'image_base64': encoded
})
# Schedule any visibility or geometry updates in GUI thread
QtCore.QTimer.singleShot(
0,
lambda nid=node_id, xx=x, yy=y, ww=w, hh=h, vis=visible: gui_update_widget(nid, xx, yy, ww, hh, vis)
)
await asyncio.sleep(interval)
except asyncio.CancelledError:
return
except Exception as e:
print(f"[ERROR] Screenshot task {node_id} failed: {e}")
time.sleep(interval / 1000)
# ---------------- Config Watcher ----------------
async def config_watcher():
while True:
if CONFIG.watch():
# settings updated, e.g., executor pool size will apply on next task run
pass
await asyncio.sleep(CONFIG.data.get('config_watch_interval', 2))
# ---------------- Main ----------------
if __name__ == "__main__":
app_instance = QtWidgets.QApplication(sys.argv)
retry_interval = 5 # seconds between connection attempts
def connect_loop():
# //////////////////////////////////////////////////////////////////////////
# CORE SECTION: MAIN & EVENT LOOP (do not modify unless you know what youre doing)
# //////////////////////////////////////////////////////////////////////////
async def connect_loop():
retry = 5
while True:
try:
print(f"[WebSocket] Connecting to {SERVER_URL}...")
sio.connect(SERVER_URL, transports=["websocket"], wait=False)
print(f"[WebSocket] Connecting to {CONFIG.data['SERVER_URL']}...")
await sio.connect(CONFIG.data['SERVER_URL'], transports=['websocket'])
break
except Exception:
print(f"[WebSocket] Borealis Server is Not Running - Retrying in {retry_interval} seconds...")
time.sleep(retry_interval)
print(f"[WebSocket] Server not available, retrying in {retry}s...")
await asyncio.sleep(retry)
threading.Thread(target=connect_loop, daemon=True).start()
sys.exit(app_instance.exec_())
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
loop = QEventLoop(app)
asyncio.set_event_loop(loop)
with loop:
loop.create_task(config_watcher())
loop.create_task(connect_loop())
loop.run_forever()
# //////////////////////////////////////////////////////////////////////////
# END CORE SECTION: MAIN & EVENT LOOP
# //////////////////////////////////////////////////////////////////////////