Added Identification Overlay Node

2025-02-26 01:15:25 -07:00
parent c7cbd1ae0d
commit 981d5cb573
6 changed files with 720 additions and 15 deletions
--- a/Modules/pycache/data_collector.cpython-312.pyc
+++ b/Modules/pycache/data_collector.cpython-312.pyc
--- a/Modules/data_collector.py
+++ b/Modules/data_collector.py
@ -25,6 +25,7 @@ regions = {}

 app_instance = None

+
 def _ensure_qapplication():
    """
    Ensures that QApplication is initialized before creating widgets.
@ -34,12 +35,14 @@ def _ensure_qapplication():
        app_instance = QApplication(sys.argv)
        threading.Thread(target=app_instance.exec_, daemon=True).start()

-def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT):
+
+def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT, color=(255, 255, 0)):
    """
    Creates an OCR region with a visible, resizable box on the screen.
+    The color parameter allows customization (default yellow, blue for overlays).
    """
-    
-    _ensure_qapplication()  # Ensure QApplication is running first
+
+    _ensure_qapplication()

    collector_mutex.lock()
    if region_id in regions:
@ -48,10 +51,11 @@ def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT)
    regions[region_id] = {
        'bbox': [x, y, w, h],
        'raw_text': "",
-        'widget': OCRRegionWidget(x, y, w, h, region_id)
+        'widget': OCRRegionWidget(x, y, w, h, region_id, color)
    }
    collector_mutex.unlock()

+
 def get_raw_text(region_id):
    collector_mutex.lock()
    if region_id not in regions:
@ -61,10 +65,12 @@ def get_raw_text(region_id):
    collector_mutex.unlock()
    return text

+
 def start_collector():
    t = threading.Thread(target=_update_ocr_loop, daemon=True)
    t.start()

+
 def _update_ocr_loop():
    while True:
        collector_mutex.lock()
@ -79,25 +85,102 @@ def _update_ocr_loop():
            x, y, w, h = bbox
            screenshot = ImageGrab.grab(bbox=(x, y, x + w, y + h))
            processed = _preprocess_image(screenshot)
-            raw_text = pytesseract.image_to_string(processed, config='--psm 4 --oem 1')
+            raw_text = pytesseract.image_to_string(processed, config='--psm 6 --oem 1')

            collector_mutex.lock()
            if rid in regions:
                regions[rid]['raw_text'] = raw_text
            collector_mutex.unlock()

-#            print(f"OCR Text for {rid}: {raw_text}") # SHOW RAW OCR OUTPUT IN TERMINAL FOR DEBUGGING
-
        time.sleep(0.7)

+
 def _preprocess_image(image):
    gray = image.convert("L")
    scaled = gray.resize((gray.width * 3, gray.height * 3))
    thresh = scaled.point(lambda p: 255 if p > 200 else 0)
    return thresh.filter(ImageFilter.MedianFilter(3))

+
+def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5):
+    """
+    Finds positions of a specific word within the OCR region.
+    Applies user-defined offset and margin adjustments.
+    Returns a list of bounding box coordinates relative to the OCR box.
+    """
+    collector_mutex.lock()
+    if region_id not in regions:
+        collector_mutex.unlock()
+        return []
+    
+    bbox = regions[region_id]['bbox']
+    collector_mutex.unlock()
+
+    # Extract OCR region position and size
+    x, y, w, h = bbox
+    left, top, right, bottom = x, y, x + w, y + h
+
+    if right <= left or bottom <= top:
+        print(f"[ERROR] Invalid OCR region bounds: {bbox}")
+        return []
+
+    try:
+        image = ImageGrab.grab(bbox=(left, top, right, bottom))
+        processed = _preprocess_image(image)
+
+        # Get original and processed image sizes
+        orig_width, orig_height = image.size
+        proc_width, proc_height = processed.size
+
+        # Scale factor between processed image and original screenshot
+        scale_x = orig_width / proc_width
+        scale_y = orig_height / proc_height
+
+        data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
+
+        word_positions = []
+        for i in range(len(data['text'])):
+            if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
+                # Scale the detected coordinates back to region-relative positions
+                x_scaled = int(data['left'][i] * scale_x)
+                y_scaled = int(data['top'][i] * scale_y)
+                w_scaled = int(data['width'][i] * scale_x)
+                h_scaled = int(data['height'][i] * scale_y)
+
+                # Apply user-configured margin
+                x_margin = max(0, x_scaled - margin)
+                y_margin = max(0, y_scaled - margin)
+                w_margin = w_scaled + (margin * 2)
+                h_margin = h_scaled + (margin * 2)
+
+                # Apply user-configured offset
+                x_final = x_margin + offset_x
+                y_final = y_margin + offset_y
+
+                word_positions.append((x_final, y_final, w_margin, h_margin))
+
+        return word_positions
+    except Exception as e:
+        print(f"[ERROR] Failed to capture OCR region: {e}")
+        return []
+
+
+
+
+
+def draw_identification_boxes(region_id, positions, color=(0, 0, 255)):
+    """
+    Draws non-interactive rectangles at specified positions within the given OCR region.
+    """
+    collector_mutex.lock()
+    if region_id in regions and 'widget' in regions[region_id]:
+        widget = regions[region_id]['widget']
+        widget.set_draw_positions(positions, color)
+    collector_mutex.unlock()
+
+
 class OCRRegionWidget(QWidget):
-    def __init__(self, x, y, w, h, region_id):
+    def __init__(self, x, y, w, h, region_id, color):
        super().__init__()

        self.setGeometry(x, y, w, h)
@ -108,23 +191,41 @@ class OCRRegionWidget(QWidget):
        self.drag_offset = None
        self.selected_handle = None
        self.region_id = region_id
+        self.box_color = QColor(*color)
+        self.draw_positions = []

        self.show()

    def paintEvent(self, event):
        painter = QPainter(self)
-        pen = QPen(QColor(255, 255, 0)) # COLOR OF THE BOX ITSELF
-        pen.setWidth(5) # WIDTH OF THE BOX BORDER
+        pen = QPen(self.box_color)
+        pen.setWidth(5)
        painter.setPen(pen)

        # Draw main rectangle
        painter.drawRect(0, 0, self.width(), self.height())

+        # Draw detected word overlays
+        pen.setWidth(2)
+        pen.setColor(QColor(0, 0, 255))
+        painter.setPen(pen)
+
+        for x, y, w, h in self.draw_positions:
+            painter.drawRect(x, y, w, h)
+
        # Draw resize handles
-        painter.setBrush(QColor(255, 255, 0)) # COLOR OF THE RESIZE HANDLES
+        painter.setBrush(self.box_color)
        for handle in self._resize_handles():
            painter.drawRect(handle)

+    def set_draw_positions(self, positions, color):
+        """
+        Update the positions where identification boxes should be drawn.
+        """
+        self.draw_positions = positions
+        self.box_color = QColor(*color)
+        self.update()
+
    def _resize_handles(self):
        w, h = self.width(), self.height()
        return [
@ -174,7 +275,3 @@ class OCRRegionWidget(QWidget):
            if self.region_id in regions:
                regions[self.region_id]['bbox'] = [new_x, new_y, self.width(), self.height()]
            collector_mutex.unlock()
-
-    def mouseReleaseEvent(self, event):
-        self.selected_handle = None
-        self.drag_offset = None