Further optimized identification_overlay node GPU acceleration / processing.

This commit is contained in:
Nicole Rappe 2025-02-26 02:08:00 -07:00
parent 0515f8feeb
commit ce392d7a04
2 changed files with 19 additions and 22 deletions

View File

@ -113,9 +113,9 @@ def _preprocess_image(image):
def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"): def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
""" """
Finds positions of a specific word within the OCR region. Optimized function to detect word positions in an OCR region.
Applies user-defined offset and margin adjustments. Uses raw screen data without preprocessing for max performance.
Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine. Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection.
""" """
collector_mutex.lock() collector_mutex.lock()
if region_id not in regions: if region_id not in regions:
@ -134,45 +134,42 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
return [] return []
try: try:
# Capture raw screen image (NO preprocessing)
image = ImageGrab.grab(bbox=(left, top, right, bottom)) image = ImageGrab.grab(bbox=(left, top, right, bottom))
processed = _preprocess_image(image)
# Get original and processed image sizes # Get original image size
orig_width, orig_height = image.size orig_width, orig_height = image.size
proc_width, proc_height = processed.size
# Scale factor between processed image and original screenshot
scale_x = orig_width / proc_width
scale_y = orig_height / proc_height
word_positions = [] word_positions = []
if ocr_engine == "CPU": if ocr_engine == "CPU":
# Use Tesseract (CPU) # Use Tesseract directly on raw PIL image (no preprocessing)
data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT) data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
for i in range(len(data['text'])): for i in range(len(data['text'])):
if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE): if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
x_scaled = int(data['left'][i] * scale_x) x_scaled = int(data['left'][i])
y_scaled = int(data['top'][i] * scale_y) y_scaled = int(data['top'][i])
w_scaled = int(data['width'][i] * scale_x) w_scaled = int(data['width'][i])
h_scaled = int(data['height'][i] * scale_y) h_scaled = int(data['height'][i])
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2))) word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
else: else:
# Use EasyOCR (GPU) - Convert PIL image to NumPy array # Convert PIL image to NumPy array for EasyOCR
image_np = np.array(processed) image_np = np.array(image)
# Run GPU OCR
results = reader_gpu.readtext(image_np) results = reader_gpu.readtext(image_np)
for (bbox, text, _) in results: for (bbox, text, _) in results:
if re.search(rf"\b{word}\b", text, re.IGNORECASE): if re.search(rf"\b{word}\b", text, re.IGNORECASE):
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2] (x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
x_scaled = int(x_min * scale_x) x_scaled = int(x_min)
y_scaled = int(y_min * scale_y) y_scaled = int(y_min)
w_scaled = int((x_max - x_min) * scale_x) w_scaled = int(x_max - x_min)
h_scaled = int((y_max - y_min) * scale_y) h_scaled = int(y_max - y_min)
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2))) word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))