Further optimized identification_overlay node GPU acceleration / processing.

This commit is contained in:
Nicole Rappe 2025-02-26 02:08:00 -07:00
parent 0515f8feeb
commit ce392d7a04
2 changed files with 19 additions and 22 deletions

View File

@ -113,9 +113,9 @@ def _preprocess_image(image):
def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
"""
Finds positions of a specific word within the OCR region.
Applies user-defined offset and margin adjustments.
Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine.
Optimized function to detect word positions in an OCR region.
Uses raw screen data without preprocessing for max performance.
Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection.
"""
collector_mutex.lock()
if region_id not in regions:
@ -134,45 +134,42 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
return []
try:
# Capture raw screen image (NO preprocessing)
image = ImageGrab.grab(bbox=(left, top, right, bottom))
processed = _preprocess_image(image)
# Get original and processed image sizes
# Get original image size
orig_width, orig_height = image.size
proc_width, proc_height = processed.size
# Scale factor between processed image and original screenshot
scale_x = orig_width / proc_width
scale_y = orig_height / proc_height
word_positions = []
if ocr_engine == "CPU":
# Use Tesseract (CPU)
data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
# Use Tesseract directly on raw PIL image (no preprocessing)
data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
for i in range(len(data['text'])):
if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
x_scaled = int(data['left'][i] * scale_x)
y_scaled = int(data['top'][i] * scale_y)
w_scaled = int(data['width'][i] * scale_x)
h_scaled = int(data['height'][i] * scale_y)
x_scaled = int(data['left'][i])
y_scaled = int(data['top'][i])
w_scaled = int(data['width'][i])
h_scaled = int(data['height'][i])
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
else:
# Use EasyOCR (GPU) - Convert PIL image to NumPy array
image_np = np.array(processed)
# Convert PIL image to NumPy array for EasyOCR
image_np = np.array(image)
# Run GPU OCR
results = reader_gpu.readtext(image_np)
for (bbox, text, _) in results:
if re.search(rf"\b{word}\b", text, re.IGNORECASE):
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
x_scaled = int(x_min * scale_x)
y_scaled = int(y_min * scale_y)
w_scaled = int((x_max - x_min) * scale_x)
h_scaled = int((y_max - y_min) * scale_y)
x_scaled = int(x_min)
y_scaled = int(y_min)
w_scaled = int(x_max - x_min)
h_scaled = int(y_max - y_min)
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))