diff --git a/Modules/__pycache__/data_collector.cpython-312.pyc b/Modules/__pycache__/data_collector.cpython-312.pyc index 107d094..2341494 100644 Binary files a/Modules/__pycache__/data_collector.cpython-312.pyc and b/Modules/__pycache__/data_collector.cpython-312.pyc differ diff --git a/Modules/data_collector.py b/Modules/data_collector.py index 0f598df..82d9760 100644 --- a/Modules/data_collector.py +++ b/Modules/data_collector.py @@ -113,9 +113,9 @@ def _preprocess_image(image): def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"): """ - Finds positions of a specific word within the OCR region. - Applies user-defined offset and margin adjustments. - Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine. + Optimized function to detect word positions in an OCR region. + Uses raw screen data without preprocessing for max performance. + Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection. """ collector_mutex.lock() if region_id not in regions: @@ -134,45 +134,42 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e return [] try: + # Capture raw screen image (NO preprocessing) image = ImageGrab.grab(bbox=(left, top, right, bottom)) - processed = _preprocess_image(image) - # Get original and processed image sizes + # Get original image size orig_width, orig_height = image.size - proc_width, proc_height = processed.size - - # Scale factor between processed image and original screenshot - scale_x = orig_width / proc_width - scale_y = orig_height / proc_height word_positions = [] if ocr_engine == "CPU": - # Use Tesseract (CPU) - data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT) + # Use Tesseract directly on raw PIL image (no preprocessing) + data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT) for i in range(len(data['text'])): if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE): - x_scaled = int(data['left'][i] * scale_x) - y_scaled = int(data['top'][i] * scale_y) - w_scaled = int(data['width'][i] * scale_x) - h_scaled = int(data['height'][i] * scale_y) + x_scaled = int(data['left'][i]) + y_scaled = int(data['top'][i]) + w_scaled = int(data['width'][i]) + h_scaled = int(data['height'][i]) word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2))) else: - # Use EasyOCR (GPU) - Convert PIL image to NumPy array - image_np = np.array(processed) + # Convert PIL image to NumPy array for EasyOCR + image_np = np.array(image) + + # Run GPU OCR results = reader_gpu.readtext(image_np) for (bbox, text, _) in results: if re.search(rf"\b{word}\b", text, re.IGNORECASE): (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2] - x_scaled = int(x_min * scale_x) - y_scaled = int(y_min * scale_y) - w_scaled = int((x_max - x_min) * scale_x) - h_scaled = int((y_max - y_min) * scale_y) + x_scaled = int(x_min) + y_scaled = int(y_min) + w_scaled = int(x_max - x_min) + h_scaled = int(y_max - y_min) word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))