Added Identification Overlay Node
This commit is contained in:
Binary file not shown.
@ -25,6 +25,7 @@ regions = {}
|
||||
|
||||
app_instance = None
|
||||
|
||||
|
||||
def _ensure_qapplication():
|
||||
"""
|
||||
Ensures that QApplication is initialized before creating widgets.
|
||||
@ -34,12 +35,14 @@ def _ensure_qapplication():
|
||||
app_instance = QApplication(sys.argv)
|
||||
threading.Thread(target=app_instance.exec_, daemon=True).start()
|
||||
|
||||
def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT):
|
||||
|
||||
def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT, color=(255, 255, 0)):
|
||||
"""
|
||||
Creates an OCR region with a visible, resizable box on the screen.
|
||||
The color parameter allows customization (default yellow, blue for overlays).
|
||||
"""
|
||||
|
||||
_ensure_qapplication() # Ensure QApplication is running first
|
||||
|
||||
_ensure_qapplication()
|
||||
|
||||
collector_mutex.lock()
|
||||
if region_id in regions:
|
||||
@ -48,10 +51,11 @@ def create_ocr_region(region_id, x=250, y=50, w=DEFAULT_WIDTH, h=DEFAULT_HEIGHT)
|
||||
regions[region_id] = {
|
||||
'bbox': [x, y, w, h],
|
||||
'raw_text': "",
|
||||
'widget': OCRRegionWidget(x, y, w, h, region_id)
|
||||
'widget': OCRRegionWidget(x, y, w, h, region_id, color)
|
||||
}
|
||||
collector_mutex.unlock()
|
||||
|
||||
|
||||
def get_raw_text(region_id):
|
||||
collector_mutex.lock()
|
||||
if region_id not in regions:
|
||||
@ -61,10 +65,12 @@ def get_raw_text(region_id):
|
||||
collector_mutex.unlock()
|
||||
return text
|
||||
|
||||
|
||||
def start_collector():
|
||||
t = threading.Thread(target=_update_ocr_loop, daemon=True)
|
||||
t.start()
|
||||
|
||||
|
||||
def _update_ocr_loop():
|
||||
while True:
|
||||
collector_mutex.lock()
|
||||
@ -79,25 +85,102 @@ def _update_ocr_loop():
|
||||
x, y, w, h = bbox
|
||||
screenshot = ImageGrab.grab(bbox=(x, y, x + w, y + h))
|
||||
processed = _preprocess_image(screenshot)
|
||||
raw_text = pytesseract.image_to_string(processed, config='--psm 4 --oem 1')
|
||||
raw_text = pytesseract.image_to_string(processed, config='--psm 6 --oem 1')
|
||||
|
||||
collector_mutex.lock()
|
||||
if rid in regions:
|
||||
regions[rid]['raw_text'] = raw_text
|
||||
collector_mutex.unlock()
|
||||
|
||||
# print(f"OCR Text for {rid}: {raw_text}") # SHOW RAW OCR OUTPUT IN TERMINAL FOR DEBUGGING
|
||||
|
||||
time.sleep(0.7)
|
||||
|
||||
|
||||
def _preprocess_image(image):
|
||||
gray = image.convert("L")
|
||||
scaled = gray.resize((gray.width * 3, gray.height * 3))
|
||||
thresh = scaled.point(lambda p: 255 if p > 200 else 0)
|
||||
return thresh.filter(ImageFilter.MedianFilter(3))
|
||||
|
||||
|
||||
def find_word_positions(region_id, word, offset_x=0, offset_y=0, margin=5):
|
||||
"""
|
||||
Finds positions of a specific word within the OCR region.
|
||||
Applies user-defined offset and margin adjustments.
|
||||
Returns a list of bounding box coordinates relative to the OCR box.
|
||||
"""
|
||||
collector_mutex.lock()
|
||||
if region_id not in regions:
|
||||
collector_mutex.unlock()
|
||||
return []
|
||||
|
||||
bbox = regions[region_id]['bbox']
|
||||
collector_mutex.unlock()
|
||||
|
||||
# Extract OCR region position and size
|
||||
x, y, w, h = bbox
|
||||
left, top, right, bottom = x, y, x + w, y + h
|
||||
|
||||
if right <= left or bottom <= top:
|
||||
print(f"[ERROR] Invalid OCR region bounds: {bbox}")
|
||||
return []
|
||||
|
||||
try:
|
||||
image = ImageGrab.grab(bbox=(left, top, right, bottom))
|
||||
processed = _preprocess_image(image)
|
||||
|
||||
# Get original and processed image sizes
|
||||
orig_width, orig_height = image.size
|
||||
proc_width, proc_height = processed.size
|
||||
|
||||
# Scale factor between processed image and original screenshot
|
||||
scale_x = orig_width / proc_width
|
||||
scale_y = orig_height / proc_height
|
||||
|
||||
data = pytesseract.image_to_data(processed, config='--psm 6 --oem 1', output_type=pytesseract.Output.DICT)
|
||||
|
||||
word_positions = []
|
||||
for i in range(len(data['text'])):
|
||||
if re.search(rf"\b{word}\b", data['text'][i], re.IGNORECASE):
|
||||
# Scale the detected coordinates back to region-relative positions
|
||||
x_scaled = int(data['left'][i] * scale_x)
|
||||
y_scaled = int(data['top'][i] * scale_y)
|
||||
w_scaled = int(data['width'][i] * scale_x)
|
||||
h_scaled = int(data['height'][i] * scale_y)
|
||||
|
||||
# Apply user-configured margin
|
||||
x_margin = max(0, x_scaled - margin)
|
||||
y_margin = max(0, y_scaled - margin)
|
||||
w_margin = w_scaled + (margin * 2)
|
||||
h_margin = h_scaled + (margin * 2)
|
||||
|
||||
# Apply user-configured offset
|
||||
x_final = x_margin + offset_x
|
||||
y_final = y_margin + offset_y
|
||||
|
||||
word_positions.append((x_final, y_final, w_margin, h_margin))
|
||||
|
||||
return word_positions
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to capture OCR region: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def draw_identification_boxes(region_id, positions, color=(0, 0, 255)):
|
||||
"""
|
||||
Draws non-interactive rectangles at specified positions within the given OCR region.
|
||||
"""
|
||||
collector_mutex.lock()
|
||||
if region_id in regions and 'widget' in regions[region_id]:
|
||||
widget = regions[region_id]['widget']
|
||||
widget.set_draw_positions(positions, color)
|
||||
collector_mutex.unlock()
|
||||
|
||||
|
||||
class OCRRegionWidget(QWidget):
|
||||
def __init__(self, x, y, w, h, region_id):
|
||||
def __init__(self, x, y, w, h, region_id, color):
|
||||
super().__init__()
|
||||
|
||||
self.setGeometry(x, y, w, h)
|
||||
@ -108,23 +191,41 @@ class OCRRegionWidget(QWidget):
|
||||
self.drag_offset = None
|
||||
self.selected_handle = None
|
||||
self.region_id = region_id
|
||||
self.box_color = QColor(*color)
|
||||
self.draw_positions = []
|
||||
|
||||
self.show()
|
||||
|
||||
def paintEvent(self, event):
|
||||
painter = QPainter(self)
|
||||
pen = QPen(QColor(255, 255, 0)) # COLOR OF THE BOX ITSELF
|
||||
pen.setWidth(5) # WIDTH OF THE BOX BORDER
|
||||
pen = QPen(self.box_color)
|
||||
pen.setWidth(5)
|
||||
painter.setPen(pen)
|
||||
|
||||
# Draw main rectangle
|
||||
painter.drawRect(0, 0, self.width(), self.height())
|
||||
|
||||
# Draw detected word overlays
|
||||
pen.setWidth(2)
|
||||
pen.setColor(QColor(0, 0, 255))
|
||||
painter.setPen(pen)
|
||||
|
||||
for x, y, w, h in self.draw_positions:
|
||||
painter.drawRect(x, y, w, h)
|
||||
|
||||
# Draw resize handles
|
||||
painter.setBrush(QColor(255, 255, 0)) # COLOR OF THE RESIZE HANDLES
|
||||
painter.setBrush(self.box_color)
|
||||
for handle in self._resize_handles():
|
||||
painter.drawRect(handle)
|
||||
|
||||
def set_draw_positions(self, positions, color):
|
||||
"""
|
||||
Update the positions where identification boxes should be drawn.
|
||||
"""
|
||||
self.draw_positions = positions
|
||||
self.box_color = QColor(*color)
|
||||
self.update()
|
||||
|
||||
def _resize_handles(self):
|
||||
w, h = self.width(), self.height()
|
||||
return [
|
||||
@ -174,7 +275,3 @@ class OCRRegionWidget(QWidget):
|
||||
if self.region_id in regions:
|
||||
regions[self.region_id]['bbox'] = [new_x, new_y, self.width(), self.height()]
|
||||
collector_mutex.unlock()
|
||||
|
||||
def mouseReleaseEvent(self, event):
|
||||
self.selected_handle = None
|
||||
self.drag_offset = None
|
||||
|
Reference in New Issue
Block a user