Further optimized identification_overlay node GPU acceleration / processing.
This commit is contained in:
parent
0515f8feeb
commit
ce392d7a04
Binary file not shown.
@ -113,9 +113,9 @@ def _preprocess_image(image):
|
|||||||
|
|
||||||
def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
|
def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_engine="CPU"):
|
||||||
"""
|
"""
|
||||||
Finds positions of a specific word within the OCR region.
|
Optimized function to detect word positions in an OCR region.
|
||||||
Applies user-defined offset and margin adjustments.
|
Uses raw screen data without preprocessing for max performance.
|
||||||
Uses Tesseract (CPU) or EasyOCR (GPU) depending on the selected engine.
|
Uses Tesseract (CPU) or EasyOCR (GPU) depending on user selection.
|
||||||
"""
|
"""
|
||||||
collector_mutex.lock()
|
collector_mutex.lock()
|
||||||
if region_id not in regions:
|
if region_id not in regions:
|
||||||
@ -134,45 +134,42 @@ def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5, ocr_e
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Capture raw screen image (NO preprocessing)
|
||||||
image = ImageGrab.grab(bbox=(left, top, right, bottom))
|
image = ImageGrab.grab(bbox=(left, top, right, bottom))
|
||||||
processed = _preprocess_image(image)
|
|
||||||
|
|
||||||
# Get original and processed image sizes
|
# Get original image size
|
||||||
orig_width, orig_height = image.size
|
orig_width, orig_height = image.size
|
||||||
proc_width, proc_height = processed.size
|
|
||||||
|
|
||||||
# Scale factor between processed image and original screenshot
|
|
||||||
scale_x = orig_width / proc_width
|
|
||||||
scale_y = orig_height / proc_height
|
|
||||||
|
|
||||||
word_positions = []
|
word_positions = []
|
||||||
|
|
||||||
if ocr_engine == "CPU":
|
if ocr_engine == "CPU":
|
||||||
# Use Tesseract (CPU)
|
# Use Tesseract directly on raw PIL image (no preprocessing)
|
||||||
data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
|
data = pytesseract.image_to_data(image, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
|
||||||
|
|
||||||
for i in range(len(data['text'])):
|
for i in range(len(data['text'])):
|
||||||
if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
|
if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
|
||||||
x_scaled = int(data['left'][i] * scale_x)
|
x_scaled = int(data['left'][i])
|
||||||
y_scaled = int(data['top'][i] * scale_y)
|
y_scaled = int(data['top'][i])
|
||||||
w_scaled = int(data['width'][i] * scale_x)
|
w_scaled = int(data['width'][i])
|
||||||
h_scaled = int(data['height'][i] * scale_y)
|
h_scaled = int(data['height'][i])
|
||||||
|
|
||||||
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
|
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Use EasyOCR (GPU) - Convert PIL image to NumPy array
|
# Convert PIL image to NumPy array for EasyOCR
|
||||||
image_np = np.array(processed)
|
image_np = np.array(image)
|
||||||
|
|
||||||
|
# Run GPU OCR
|
||||||
results = reader_gpu.readtext(image_np)
|
results = reader_gpu.readtext(image_np)
|
||||||
|
|
||||||
for (bbox, text, _) in results:
|
for (bbox, text, _) in results:
|
||||||
if re.search(rf"\b{word}\b", text, re.IGNORECASE):
|
if re.search(rf"\b{word}\b", text, re.IGNORECASE):
|
||||||
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
|
(x_min, y_min), (x_max, y_max) = bbox[0], bbox[2]
|
||||||
|
|
||||||
x_scaled = int(x_min * scale_x)
|
x_scaled = int(x_min)
|
||||||
y_scaled = int(y_min * scale_y)
|
y_scaled = int(y_min)
|
||||||
w_scaled = int((x_max - x_min) * scale_x)
|
w_scaled = int(x_max - x_min)
|
||||||
h_scaled = int((y_max - y_min) * scale_y)
|
h_scaled = int(y_max - y_min)
|
||||||
|
|
||||||
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
|
word_positions.append((x_scaled + offset_x, y_scaled + offset_y, w_scaled + (margin * 2), h_scaled + (margin * 2)))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user