Further optimized identification_overlay node GPU acceleration / processing.

2025-02-26 02:08:00 -07:00 · 2025-02-26 02:08:00 -07:00 · ce392d7a04
commit ce392d7a04
parent 0515f8feeb
2 changed files with 19 additions and 22 deletions
--- a/Modules/pycache/data_collector.cpython-312.pyc
+++ b/Modules/pycache/data_collector.cpython-312.pyc
--- a/Modules/data_collector.py
+++ b/Modules/data_collector.py
@ -113,9 +113,9 @@ def _preprocess_image(image):

 def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
    """
-    Finds positions of a specific word within the OCR region.
-    Applies user-defined offset and margin adjustments.
-    Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine.
+    Optimized function to detect word positions in an OCR region.
+    Uses raw screen data without preprocessing for max performance.
+    Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection.
    """
    collector_mutex.lock()
    if region_id not in regions:
@ -134,45 +134,42 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
        return []

    try:
+        # Capture raw screen image (NO preprocessing)
        image = ImageGrab.grab(bbox=(left, top, right, bottom))
-        processed = _preprocess_image(image)

-        # Get original and processed image sizes
+        # Get original image size
        orig_width, orig_height = image.size
-        proc_width, proc_height = processed.size
-
-        # Scale factor between processed image and original screenshot
-        scale_x = orig_width / proc_width
-        scale_y = orig_height / proc_height

        word_positions = []

        if ocr_engine == "CPU":
-            # Use Tesseract (CPU)
-            data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
+            # Use Tesseract directly on raw PIL image (no preprocessing)
+            data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)

            for i in range(len(data['text'])):
                if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
-                    x_scaled = int(data['left'][i] * scale_x)
-                    y_scaled = int(data['top'][i] * scale_y)
-                    w_scaled = int(data['width'][i] * scale_x)
-                    h_scaled = int(data['height'][i] * scale_y)
+                    x_scaled = int(data['left'][i])
+                    y_scaled = int(data['top'][i])
+                    w_scaled = int(data['width'][i])
+                    h_scaled = int(data['height'][i])

                    word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))

        else:
-            # Use EasyOCR (GPU) - Convert PIL image to NumPy array
-            image_np = np.array(processed)
+            # Convert PIL image to NumPy array for EasyOCR
+            image_np = np.array(image)
+
+            # Run GPU OCR
            results = reader_gpu.readtext(image_np)

            for (bbox, text, _) in results:
                if re.search(rf"\b{word}\b", text, re.IGNORECASE):
                    (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]

-                    x_scaled = int(x_min * scale_x)
-                    y_scaled = int(y_min * scale_y)
-                    w_scaled = int((x_max - x_min) * scale_x)
-                    h_scaled = int((y_max - y_min) * scale_y)
+                    x_scaled = int(x_min)
+                    y_scaled = int(y_min)
+                    w_scaled = int(x_max - x_min)
+                    h_scaled = int(y_max - y_min)

                    word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))