diff --git a/Installation Requirements.txt b/Installation Requirements.txt new file mode 100644 index 0000000..94ef1d6 --- /dev/null +++ b/Installation Requirements.txt @@ -0,0 +1,5 @@ +We need to run the following commands to install the prerequisites for the project. + + +This command is used to install pytorch and torchvision for the purposes of GPU-accelerated vision tasks. +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 diff --git a/Modules/__pycache__/data_collector.cpython-312.pyc b/Modules/__pycache__/data_collector.cpython-312.pyc index f99d78d..107d094 100644 Binary files a/Modules/__pycache__/data_collector.cpython-312.pyc and b/Modules/__pycache__/data_collector.cpython-312.pyc differ diff --git a/Modules/data_collector.py b/Modules/data_collector.py index 5f9df55..0f598df 100644 --- a/Modules/data_collector.py +++ b/Modules/data_collector.py @@ -6,13 +6,22 @@ import re import sys import numpy as np import cv2 + +# Vision-related Imports import pytesseract +import easyocr +import torch + from PIL import Image, ImageGrab, ImageFilter from PyQt5.QtWidgets import QApplication, QWidget from PyQt5.QtCore import QRect, QPoint, Qt, QMutex, QTimer from PyQt5.QtGui import QPainter, QPen, QColor, QFont +# Initialize EasyOCR with CUDA support +reader_cpu = easyocr.Reader(['en'], gpu=False) +reader_gpu = easyocr.Reader(['en'], gpu=True if torch.cuda.is_available() else False) + pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" DEFAULT_WIDTH = 180 @@ -29,11 +38,11 @@ app_instance = None def _ensure_qapplication(): """ Ensures that QApplication is initialized before creating widgets. + Must be called from the main thread. """ global app_instance - if QApplication.instance() is None: - app_instance = QApplication(sys.argv) - threading.Thread(target=app_instance.exec_, daemon=True).start() + if app_instance is None: + app_instance = QApplication(sys.argv) # Start in main thread def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT, color=(255, 255, 0)): @@ -102,11 +111,11 @@ def _preprocess_image(image): return thresh.filter(ImageFilter.MedianFilter(3)) -def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5): +def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"): """ Finds positions of a specific word within the OCR region. Applies user-defined offset and margin adjustments. - Returns a list of bounding box coordinates relative to the OCR box. + Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine. """ collector_mutex.lock() if region_id not in regions: @@ -136,28 +145,36 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5): scale_x = orig_width / proc_width scale_y = orig_height / proc_height - data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT) - word_positions = [] - for i in range(len(data['text'])): - if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE): - # Scale the detected coordinates back to region-relative positions - x_scaled = int(data['left'][i] * scale_x) - y_scaled = int(data['top'][i] * scale_y) - w_scaled = int(data['width'][i] * scale_x) - h_scaled = int(data['height'][i] * scale_y) - # Apply user-configured margin - x_margin = max(0, x_scaled - margin) - y_margin = max(0, y_scaled - margin) - w_margin = w_scaled + (margin * 2) - h_margin = h_scaled + (margin * 2) + if ocr_engine == "CPU": + # Use Tesseract (CPU) + data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT) - # Apply user-configured offset - x_final = x_margin + offset_x - y_final = y_margin + offset_y + for i in range(len(data['text'])): + if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE): + x_scaled = int(data['left'][i] * scale_x) + y_scaled = int(data['top'][i] * scale_y) + w_scaled = int(data['width'][i] * scale_x) + h_scaled = int(data['height'][i] * scale_y) - word_positions.append((x_final, y_final, w_margin, h_margin)) + word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2))) + + else: + # Use EasyOCR (GPU) - Convert PIL image to NumPy array + image_np = np.array(processed) + results = reader_gpu.readtext(image_np) + + for (bbox, text, _) in results: + if re.search(rf"\b{word}\b", text, re.IGNORECASE): + (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2] + + x_scaled = int(x_min * scale_x) + y_scaled = int(y_min * scale_y) + w_scaled = int((x_max - x_min) * scale_x) + h_scaled = int((y_max - y_min) * scale_y) + + word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2))) return word_positions except Exception as e: @@ -165,9 +182,6 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5): return [] - - - def draw_identification_boxes(region_id, positions, color=(0, 0, 255)): """ Draws non-interactive rectangles at specified positions within the given OCR region. diff --git a/Nodes/General Purpose/__pycache__/identification_overlay.cpython-312.pyc b/Nodes/General Purpose/__pycache__/identification_overlay.cpython-312.pyc index 8fbc8dd..681be49 100644 Binary files a/Nodes/General Purpose/__pycache__/identification_overlay.cpython-312.pyc and b/Nodes/General Purpose/__pycache__/identification_overlay.cpython-312.pyc differ diff --git a/Nodes/General Purpose/identification_overlay.py b/Nodes/General Purpose/identification_overlay.py index 8bccf67..1f70b00 100644 --- a/Nodes/General Purpose/identification_overlay.py +++ b/Nodes/General Purpose/identification_overlay.py @@ -3,14 +3,13 @@ Identification Overlay Node: - Creates an OCR region in data_collector with a blue overlay. - Detects instances of a specified word and draws adjustable overlays. -- Users can configure offset, margin, and polling frequency dynamically. +- Users can configure offset, margin, polling frequency, and select OCR engine. """ import re from OdenGraphQt import BaseNode -from PyQt5.QtWidgets import QMessageBox from PyQt5.QtCore import QTimer -from Modules import data_manager, data_collector +from Modules import data_collector class IdentificationOverlayNode(BaseNode): @@ -24,7 +23,9 @@ class IdentificationOverlayNode(BaseNode): self.add_text_input("search_term", "Search Term", text="Aibatt") self.add_text_input("offset_value", "Offset Value (X,Y)", text="0,0") # X,Y Offset self.add_text_input("margin", "Margin", text="5") # Box Margin - self.add_text_input("polling_freq", "Polling Frequency (ms)", text="500") # New input + self.add_text_input("polling_freq", "Polling Frequency (ms)", text="500") # Polling Rate + self.add_combo_menu("ocr_engine", "Type", items=["CPU", "GPU"]) + self.set_property("ocr_engine", "CPU") # Set default value after adding the menu self.region_id = "identification_overlay" data_collector.create_ocr_region(self.region_id, x=250, y=50, w=300, h=200, color=(0, 0, 255)) @@ -45,7 +46,7 @@ class IdentificationOverlayNode(BaseNode): """ polling_text = self.get_property("polling_freq") try: - polling_interval = max(100, int(polling_text)) # Minimum 100ms to avoid overloading + polling_interval = max(50, int(polling_text)) # Minimum 50ms for near real-time except ValueError: polling_interval = 500 # Default to 500ms @@ -58,6 +59,7 @@ class IdentificationOverlayNode(BaseNode): search_term = self.get_property("search_term") offset_text = self.get_property("offset_value") margin_text = self.get_property("margin") + ocr_engine = self.get_property("ocr_engine") # Read and apply polling frequency updates self.update_polling_frequency() @@ -77,8 +79,10 @@ class IdentificationOverlayNode(BaseNode): if not search_term: return - # Get detected word positions - detected_positions = data_collector.find_word_positions(self.region_id, search_term, offset_x, offset_y, margin) + # Get detected word positions using the selected OCR engine + detected_positions = data_collector.find_word_positions( + self.region_id, search_term, offset_x, offset_y, margin, ocr_engine + ) # Draw detected word boxes data_collector.draw_identification_boxes(self.region_id, detected_positions, color=(0, 0, 255))