Successfully implemented multi-core OCR functionality.

2025-02-26 03:36:59 -07:00
parent 5c23653d59
commit b6ef14b559
5 changed files with 176 additions and 65 deletions
--- a/Modules/pycache/data_collector.cpython-312.pyc
+++ b/Modules/pycache/data_collector.cpython-312.pyc
--- a/Modules/data_collector.py
+++ b/Modules/data_collector.py
@@ -6,6 +6,7 @@ import re
 import sys
 import numpy as np
 import cv2
+import concurrent.futures

 # Vision-related Imports
 import pytesseract
@@ -111,11 +112,9 @@ def _preprocess_image(image):
    return thresh.filter(ImageFilter.MedianFilter(3))


-def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
+def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU", num_slices=1):
    """
-    Optimized function to detect word positions in an OCR region.
-    Uses raw screen data without preprocessing for max performance.
-    Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection.
+    Uses user-defined horizontal slices and threading for faster inference.
    """
    collector_mutex.lock()
    if region_id not in regions:
@@ -125,7 +124,6 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
    bbox = regions[region_id]['bbox']
    collector_mutex.unlock()

-    # Extract OCR region position and size
    x, y, w, h = bbox
    left, top, right, bottom = x, y, x + w, y + h

@@ -134,61 +132,84 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
        return []

    try:
-        # Capture raw screen image (NO preprocessing)
        image = ImageGrab.grab(bbox=(left, top, right, bottom))
-
-        # Get original image size
        orig_width, orig_height = image.size

        word_positions = []

-        if ocr_engine == "CPU":
-            # Use Tesseract directly on raw PIL image (no preprocessing)
-            data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
+        # Ensure number of slices does not exceed image height
+        num_slices = min(num_slices, orig_height)
+        strip_height = max(1, orig_height // num_slices)  

-            for i in range(len(data['text'])):
-                if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
-                    x_scaled = int(data['left'][i])
-                    y_scaled = int(data['top'][i])
-                    w_scaled = int(data['width'][i])
-                    h_scaled = int(data['height'][i])
+        def process_strip(strip_id):
+            strip_y = strip_id * strip_height
+            strip = image.crop((0, strip_y, orig_width, strip_y + strip_height))

-                    word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
+            strip_np = np.array(strip)  

-        else:
-            # Convert PIL image to NumPy array for EasyOCR
-            image_np = np.array(image)
+            detected_positions = []
+            if ocr_engine == "CPU":
+                ocr_data = pytesseract.image_to_data(strip, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)

-            # Run GPU OCR
-            results = reader_gpu.readtext(image_np)
+                for i in range(len(ocr_data['text'])):
+                    if re.search(rf"\b{word}\b", ocr_data['text'][i], re.IGNORECASE):
+                        x_scaled = int(ocr_data['left'][i])
+                        y_scaled = int(ocr_data['top'][i]) + strip_y  
+                        w_scaled = int(ocr_data['width'][i])
+                        h_scaled = int(ocr_data['height'][i])

-            for (bbox, text, _) in results:
-                if re.search(rf"\b{word}\b", text, re.IGNORECASE):
-                    (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
+                        detected_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))

-                    x_scaled = int(x_min)
-                    y_scaled = int(y_min)
-                    w_scaled = int(x_max - x_min)
-                    h_scaled = int(y_max - y_min)
+            else:
+                results = reader_gpu.readtext(strip_np)
+                for (bbox, text, _) in results:
+                    if re.search(rf"\b{word}\b", text, re.IGNORECASE):
+                        (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]

-                    word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
+                        x_scaled = int(x_min)
+                        y_scaled = int(y_min) + strip_y
+                        w_scaled = int(x_max - x_min)
+                        h_scaled = int(y_max - y_min)
+
+                        detected_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
+
+            return detected_positions
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=num_slices) as executor:
+            strip_results = list(executor.map(process_strip, range(num_slices)))
+
+        for strip_result in strip_results:
+            word_positions.extend(strip_result)

        return word_positions
+
    except Exception as e:
        print(f"[ERROR] Failed to capture OCR region: {e}")
        return []


+
+
 def draw_identification_boxes(region_id, positions, color=(0, 0, 255), thickness=2):
    """
    Draws non-interactive rectangles at specified positions within the given OCR region.
+    Uses a separate rendering thread to prevent blocking OCR processing.
    """
    collector_mutex.lock()
    if region_id in regions and 'widget' in regions[region_id]:
        widget = regions[region_id]['widget']
-        widget.set_draw_positions(positions, color, thickness)
+        widget.update_draw_positions(positions, color, thickness)
    collector_mutex.unlock()

+def update_region_slices(region_id, num_slices):
+    """
+    Updates the number of visual slices in the OCR region.
+    """
+    collector_mutex.lock()
+    if region_id in regions and 'widget' in regions[region_id]:
+        widget = regions[region_id]['widget']
+        widget.set_num_slices(num_slices)
+    collector_mutex.unlock()

 class OCRRegionWidget(QWidget):
    def __init__(self, x, y, w, h, region_id, color, thickness):
@@ -199,12 +220,12 @@ class OCRRegionWidget(QWidget):
        self.setAttribute(Qt.WA_TranslucentBackground, True)
        self.setAttribute(Qt.WA_TransparentForMouseEvents, False)

-        self.drag_offset = None
-        self.selected_handle = None
        self.region_id = region_id
        self.box_color = QColor(*color)
        self.line_thickness = thickness
        self.draw_positions = []
+        self.previous_positions = []  # This prevents redundant redraws
+        self.num_slices = 1  # Ensures slice count is initialized

        self.show()

@@ -221,6 +242,16 @@ class OCRRegionWidget(QWidget):
        for x, y, w, h in self.draw_positions:
            painter.drawRect(x, y, w, h)

+        # Draw faint slice division lines
+        if self.num_slices > 1:
+            strip_height = self.height() // self.num_slices
+            pen.setColor(QColor(150, 150, 150, 100))  # Light gray, semi-transparent
+            pen.setWidth(1)
+            painter.setPen(pen)
+
+            for i in range(1, self.num_slices):  # Do not draw the last one at the bottom
+                painter.drawLine(0, i * strip_height, self.width(), i * strip_height)
+
    def set_draw_positions(self, positions, color, thickness):
        """
        Updates the overlay positions and visual settings.
@@ -230,6 +261,27 @@ class OCRRegionWidget(QWidget):
        self.line_thickness = thickness
        self.update()

+    def update_draw_positions(self, positions, color, thickness):
+        """
+        Updates the overlay positions and redraws only if the positions have changed.
+        This prevents unnecessary flickering.
+        """
+        if positions == self.previous_positions:
+            return  # No change, do not update
+
+        self.previous_positions = positions  # Store last known positions
+        self.draw_positions = positions
+        self.box_color = QColor(*color)
+        self.line_thickness = thickness
+        self.update()  # Redraw only if needed
+
+    def set_num_slices(self, num_slices):
+        """
+        Updates the number of horizontal slices for visualization.
+        """
+        self.num_slices = num_slices
+        self.update()
+
    def _resize_handles(self):
        w, h = self.width(), self.height()
        return [
@@ -254,19 +306,23 @@ class OCRRegionWidget(QWidget):
                new_h = h + (self.y() - event.globalY())
                new_x = event.globalX()
                new_y = event.globalY()
-                if new_w < 20: new_w = 20
-                if new_h < 20: new_h = 20
+                if new_w < 20:
+                    new_w = 20
+                if new_h < 20:
+                    new_h = 20
                self.setGeometry(new_x, new_y, new_w, new_h)
            elif self.selected_handle == 1:  # Bottom-right
                new_w = event.globalX() - self.x()
                new_h = event.globalY() - self.y()
-                if new_w < 20: new_w = 20
-                if new_h < 20: new_h = 20
+                if new_w < 20:
+                    new_w = 20
+                if new_h < 20:
+                    new_h = 20
                self.setGeometry(self.x(), self.y(), new_w, new_h)

            collector_mutex.lock()
            if self.region_id in regions:
-                regions[self.region_id]['bbox'] = [self.x(), self.y(), self.width(), self.height()]
+                regions[self.region_id]["bbox"] = [self.x(), self.y(), self.width(), self.height()]
            collector_mutex.unlock()

            self.update()
@@ -277,5 +333,7 @@ class OCRRegionWidget(QWidget):

            collector_mutex.lock()
            if self.region_id in regions:
-                regions[self.region_id]['bbox'] = [new_x, new_y, self.width(), self.height()]
+                regions[self.region_id]["bbox"] = [new_x, new_y, self.width(), self.height()]
            collector_mutex.unlock()
+
+
--- a/Purpose/pycache/identification_overlay.cpython-312.pyc
+++ b/Purpose/pycache/identification_overlay.cpython-312.pyc
--- a/Purpose/identification_overlay.py
+++ b/Purpose/identification_overlay.py
@@ -1,9 +1,7 @@
 #!/usr/bin/env python3
 """
 Identification Overlay Node:
- Creates an OCR region in data_collector with a blue overlay.
- Detects instances of a specified word and draws adjustable overlays.
- Users can configure offset, margin, polling frequency, overlay color, and thickness.
+- Users can configure threads/slices for parallel processing.
 """

 import re
@@ -31,6 +29,7 @@ class IdentificationOverlayNode(BaseNode):
        # Custom overlay options
        self.add_text_input("overlay_color", "Overlay Color (RGB)", text="0,0,255")  # Default blue
        self.add_text_input("thickness", "Line Thickness", text="2")  # Default 2px
+        self.add_text_input("threads_slices", "Threads / Slices", text="8")  # Default 8 threads/slices

        self.region_id = "identification_overlay"
        data_collector.create_ocr_region(self.region_id, x=250, y=50, w=300, h=200, color=(0, 0, 255), thickness=2)
@@ -46,62 +45,59 @@ class IdentificationOverlayNode(BaseNode):
        self.update_polling_frequency()

    def update_polling_frequency(self):
-        """
-        Reads the user-defined polling frequency and updates the timer interval.
-        """
        polling_text = self.get_property("polling_freq")
        try:
-            polling_interval = max(50, int(polling_text))  # Minimum 50ms for near real-time
+            polling_interval = max(50, int(polling_text))  
        except ValueError:
-            polling_interval = 500  # Default to 500ms
+            polling_interval = 500  

        self.timer.start(polling_interval)

    def update_overlay(self):
-        """
-        Updates the overlay with detected word positions.
-        """
        search_term = self.get_property("search_term")
        offset_text = self.get_property("offset_value")
        margin_text = self.get_property("margin")
        ocr_engine = self.get_property("ocr_engine")
+        threads_slices_text = self.get_property("threads_slices")

-        # Read and apply polling frequency updates
        self.update_polling_frequency()

-        # Parse user-defined offset
        try:
            offset_x, offset_y = map(int, offset_text.split(","))
        except ValueError:
-            offset_x, offset_y = 0, 0  # Default to no offset if invalid input
+            offset_x, offset_y = 0, 0  

-        # Parse user-defined margin
        try:
            margin = int(margin_text)
        except ValueError:
-            margin = 5  # Default margin if invalid input
+            margin = 5  

-        # Parse overlay color
        color_text = self.get_property("overlay_color")
        try:
-            color = tuple(map(int, color_text.split(",")))  # Convert "255,0,0" -> (255,0,0)
+            color = tuple(map(int, color_text.split(",")))  
        except ValueError:
-            color = (0, 0, 255)  # Default to blue if invalid input
+            color = (0, 0, 255)  

-        # Parse thickness
        thickness_text = self.get_property("thickness")
        try:
-            thickness = max(1, int(thickness_text))  # Ensure at least 1px thickness
+            thickness = max(1, int(thickness_text))  
        except ValueError:
-            thickness = 2  # Default thickness
+            thickness = 2  
+
+        try:
+            num_slices = max(1, int(threads_slices_text))  # Ensure at least 1 slice
+        except ValueError:
+            num_slices = 1  

        if not search_term:
            return

-        # Get detected word positions using the selected OCR engine
        detected_positions = data_collector.find_word_positions(
-            self.region_id, search_term, offset_x, offset_y, margin, ocr_engine
+            self.region_id, search_term, offset_x, offset_y, margin, ocr_engine, num_slices
        )

-        # Draw detected word boxes with custom color & thickness
+        # Ensure slice count is updated visually in the region widget
+        data_collector.update_region_slices(self.region_id, num_slices)
+
        data_collector.draw_identification_boxes(self.region_id, detected_positions, color=color, thickness=thickness)
+
--- a/Workflows/Testing/Identification_Overlay.json
+++ b/Workflows/Testing/Identification_Overlay.json
@@ -0,0 +1,57 @@
+{
+  "graph":{
+    "layout_direction":0,
+    "acyclic":true,
+    "pipe_collision":false,
+    "pipe_slicing":true,
+    "pipe_style":1,
+    "accept_connection_types":{},
+    "reject_connection_types":{}
+  },
+  "nodes":{
+    "0x20c129abb30":{
+      "type_":"bunny-lab.io.identification_overlay_node.IdentificationOverlayNode",
+      "icon":null,
+      "name":"Identification Overlay",
+      "color":[
+        13,
+        18,
+        23,
+        255
+      ],
+      "border_color":[
+        74,
+        84,
+        85,
+        255
+      ],
+      "text_color":[
+        255,
+        255,
+        255,
+        180
+      ],
+      "disabled":false,
+      "selected":false,
+      "visible":true,
+      "width":271.0,
+      "height":330.40000000000003,
+      "pos":[
+        44.64929777820301,
+        256.49596595988965
+      ],
+      "layout_direction":0,
+      "port_deletion_allowed":false,
+      "subgraph_session":{},
+      "custom":{
+        "search_term":"Aibatt",
+        "offset_value":"-10,-10",
+        "margin":"10",
+        "polling_freq":"50",
+        "ocr_engine":"GPU",
+        "overlay_color":"255,255,255",
+        "thickness":"5"
+      }
+    }
+  }
+}