From 943967be60050530c78a6312b8a002f03ef92a15 Mon Sep 17 00:00:00 2001 From: Nicole Rappe Date: Fri, 7 Feb 2025 00:25:55 -0700 Subject: [PATCH] Added Additional OCR Engine --- borealis_overlay.py | 257 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 220 insertions(+), 37 deletions(-) diff --git a/borealis_overlay.py b/borealis_overlay.py index 3e9c684..ae8f7e5 100644 --- a/borealis_overlay.py +++ b/borealis_overlay.py @@ -1,21 +1,32 @@ import sys -from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QCheckBox, QPushButton -from PyQt5.QtCore import Qt, QRect, QPoint +import pytesseract +import easyocr +import numpy as np +from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QCheckBox, QTextEdit, QComboBox, QVBoxLayout +from PyQt5.QtCore import Qt, QRect, QPoint, QTimer from PyQt5.QtGui import QPainter, QPen, QColor, QFont -from ctypes import windll +from PIL import Image, ImageGrab, ImageEnhance, ImageFilter # For screen capture and image processing HANDLE_SIZE = 10 # Size of the resize handle squares LABEL_HEIGHT = 20 # Height of the label area above the rectangle +DEFAULT_WIDTH = 150 # Default width for the regions +DEFAULT_HEIGHT = 50 # Default height for the regions +DEFAULT_SPACING = 20 # Default horizontal spacing between regions + +# Set the path for tesseract manually +pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" class Region: - def __init__(self, x, y, w, h, label="Region", color=QColor(0, 0, 255)): + def __init__(self, x, y, label="Region", color=QColor(0, 0, 255)): self.x = x self.y = y - self.w = w - self.h = h + self.w = DEFAULT_WIDTH + self.h = DEFAULT_HEIGHT self.label = label self.color = color + self.visible = True # Track the visibility of the region + self.data = "" # Store OCR data for this region def rect(self): return QRect(self.x, self.y, self.w, self.h) @@ -51,21 +62,22 @@ class OverlayCanvas(QWidget): painter.setRenderHint(QPainter.Antialiasing) for region in self.regions: - # Draw the rectangle - pen = QPen(region.color) - pen.setWidth(3) - painter.setPen(pen) - painter.drawRect(region.x, region.y, region.w, region.h) + if region.visible: # Only draw visible regions + # Draw the rectangle + pen = QPen(region.color) + pen.setWidth(3) + painter.setPen(pen) + painter.drawRect(region.x, region.y, region.w, region.h) - # Draw the label above the rectangle - painter.setFont(QFont("Arial", 12, QFont.Bold)) - painter.setPen(region.color) - painter.drawText(region.x + 5, region.y - 5, region.label) + # Draw the label above the rectangle, aligned with the left edge of the region + painter.setFont(QFont("Arial", 12, QFont.Bold)) + painter.setPen(region.color) + painter.drawText(region.x, region.y - 5, region.label) # Aligned to the left of the region - # Draw resize handles if in edit mode - if self.edit_mode: - for handle in region.resize_handles(): - painter.fillRect(handle, region.color) + # Draw resize handles if in edit mode + if self.edit_mode: + for handle in region.resize_handles(): + painter.fillRect(handle, region.color) def mousePressEvent(self, event): if not self.edit_mode: @@ -146,49 +158,220 @@ class BorealisOverlay(QWidget): self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint) self.setAttribute(Qt.WA_TranslucentBackground, True) # Transparent background - # Use Win32 API to keep the window on top - hwnd = self.winId().__int__() - windll.user32.SetWindowPos(hwnd, -1, 0, 0, 0, 0, 0x0001 | 0x0002) - - # Create regions to draw and interact with + # Create regions to draw and interact with, all regions have the same size and are moved higher up self.regions = [ - Region(200, 200, 150, 50, label="Region 01"), - Region(400, 300, 200, 80, label="Region 02"), + Region(250, 50, label="Experience"), # Moved slightly to the right + Region(450, 50, label="Region 02"), # Moved slightly to the right + Region(650, 50, label="Region 03") # Moved slightly to the right ] # Create canvas and attach to window self.canvas = OverlayCanvas(self.regions, self) self.canvas.setGeometry(self.rect()) # Match the canvas size to the full window - # Add title, Edit Mode UI, and close button + # Add title, Edit Mode UI, and buttons (Overlay Visibility checkbox) self.init_ui() + # Timer for polling OCR data + self.timer = QTimer(self) + self.timer.timeout.connect(self.collect_ocr_data) + self.timer.start(1000) # Poll every second (default) + + self.reader = None # Default OCR reader (None until an engine is selected) + def init_ui(self): """Initialize UI components.""" # Title label self.title_label = QLabel("Borealis Overlay", self) - self.title_label.setStyleSheet("QLabel { color: white; font-size: 18px; font-weight: bold; }") + self.title_label.setStyleSheet("QLabel { color: white; font-size: 20px; font-weight: bold; }") # Adjusted title size self.title_label.move(10, 5) + # OCR Engine label and selection dropdown + self.engine_label = QLabel("OCR Engine:", self) + self.engine_label.setStyleSheet("QLabel { color: white; font-size: 14px; }") + self.engine_label.move(10, 60) # Moved OCR Engine label slightly higher + + self.engine_dropdown = QComboBox(self) + self.engine_dropdown.setStyleSheet(""" + QComboBox { + color: white; + background-color: #2b2b2b; + border: 1px solid #3c3f41; + font-size: 14px; + } + QComboBox QAbstractItemView { + color: white; + background-color: #2b2b2b; + } + """) + self.engine_dropdown.addItem("Select OCR Engine") # Placeholder option + self.engine_dropdown.addItem("Tesseract") # Only Tesseract for now + self.engine_dropdown.addItem("EasyOCR") # Adding EasyOCR option + self.engine_dropdown.move(100, 90) # Dropdown moved slightly up and aligned with label + self.engine_dropdown.currentIndexChanged.connect(self.on_engine_selected) + + # Polling rate dropdown + self.polling_rate_label = QLabel("Polling Rate:", self) + self.polling_rate_label.setStyleSheet("QLabel { color: white; font-size: 14px; }") + self.polling_rate_label.move(10, 120) + + self.polling_rate_dropdown = QComboBox(self) + self.polling_rate_dropdown.setStyleSheet(""" + QComboBox { + color: white; + background-color: #2b2b2b; + border: 1px solid #3c3f41; + font-size: 14px; + } + QComboBox QAbstractItemView { + color: white; + background-color: #2b2b2b; + } + """) + self.polling_rate_dropdown.addItem("0.1 Seconds") + self.polling_rate_dropdown.addItem("0.5 Seconds") + self.polling_rate_dropdown.addItem("1 Second") + self.polling_rate_dropdown.addItem("2 Seconds") + self.polling_rate_dropdown.addItem("5 Seconds") + self.polling_rate_dropdown.move(100, 150) # Dropdown moved slightly up + self.polling_rate_dropdown.currentIndexChanged.connect(self.on_polling_rate_selected) + + # Options label + self.options_label = QLabel("Options", self) + self.options_label.setStyleSheet("QLabel { color: white; font-size: 16px; font-weight: bold; }") + self.options_label.move(10, 180) # Positioned above checkboxes + # Edit mode checkbox self.mode_toggle = QCheckBox("Edit Mode", self) self.mode_toggle.setStyleSheet("QCheckBox { color: white; }") - self.mode_toggle.move(10, 30) + self.mode_toggle.move(10, 210) self.mode_toggle.stateChanged.connect(self.toggle_edit_mode) - # Close button - self.close_button = QPushButton("Close", self) - self.close_button.setStyleSheet( - "QPushButton { background-color: #40E0D0; color: white; font-weight: bold; border-radius: 5px; }" - ) # Turquoise color - self.close_button.move(10, 60) # Place it below the Edit Mode checkbox - self.close_button.clicked.connect(self.close) + # Overlay Visibility checkbox + self.visibility_checkbox = QCheckBox("Overlay Visibility", self) + self.visibility_checkbox.setStyleSheet("QCheckBox { color: white; }") + self.visibility_checkbox.move(10, 240) # Positioned below Edit Mode + self.visibility_checkbox.setChecked(True) # Default to visible + self.visibility_checkbox.stateChanged.connect(self.toggle_overlay_visibility) + + # Collect Data checkbox for OCR functionality (disabled initially) + self.collect_data_checkbox = QCheckBox("Collect Data", self) + self.collect_data_checkbox.setStyleSheet("QCheckBox { color: white; }") + self.collect_data_checkbox.move(10, 270) # Positioned below Overlay Visibility + self.collect_data_checkbox.setEnabled(False) # Initially disabled + self.collect_data_checkbox.stateChanged.connect(self.toggle_ocr) + + # Data Collection Output label + self.output_label = QLabel("Data Collection Output:", self) + self.output_label.setStyleSheet("QLabel { color: white; font-size: 14px; font-weight: bold; }") + self.output_label.move(10, 330) # Moved down by 50px + + # Text area for OCR data display (with transparent background) + self.ocr_display = QTextEdit(self) + self.ocr_display.setStyleSheet("QTextEdit { color: white; background-color: transparent; font-size: 14px; }") + self.ocr_display.setReadOnly(True) + self.ocr_display.setGeometry(10, 360, 300, 400) + + def on_engine_selected(self): + """Enable the Collect Data checkbox when an OCR engine is selected.""" + selected_engine = self.engine_dropdown.currentText() + if selected_engine == "Tesseract": + self.reader = pytesseract + elif selected_engine == "EasyOCR": + self.reader = easyocr.Reader(['en']) # Initialize EasyOCR reader for English language + elif selected_engine == "Select OCR Engine": + self.reader = None + + if self.reader: + self.collect_data_checkbox.setEnabled(True) + else: + self.collect_data_checkbox.setEnabled(False) + + def on_polling_rate_selected(self): + """Update the polling rate based on dropdown selection.""" + polling_rate = self.polling_rate_dropdown.currentText() + if polling_rate == "0.1 Seconds": + self.timer.setInterval(100) + elif polling_rate == "0.5 Seconds": + self.timer.setInterval(500) + elif polling_rate == "1 Second": + self.timer.setInterval(1000) + elif polling_rate == "2 Seconds": + self.timer.setInterval(2000) + elif polling_rate == "5 Seconds": + self.timer.setInterval(5000) def toggle_edit_mode(self, state): """Enable or disable edit mode for dragging and resizing rectangles.""" editing = (state == 2) self.canvas.edit_mode = editing # Pass the state to the canvas - print(f"Borealis Overlay Edit Mode: {'ON' if editing else 'OFF'}") # Debugging output + + def toggle_overlay_visibility(self): + """Toggle the visibility of the regions.""" + visible = self.visibility_checkbox.isChecked() + for region in self.regions: + region.visible = visible # Toggle visibility based on checkbox + self.update() # Trigger a repaint + + def toggle_ocr(self, state): + """Enable or disable OCR collection.""" + if state == Qt.Checked: + self.collect_ocr_data() + else: + self.clear_ocr_data() + + def collect_ocr_data(self): + """Collect OCR data from each visible region.""" + if self.collect_data_checkbox.isChecked() and self.reader: # Only collect data if checkbox is checked + for region in self.regions: + if region.visible: + # Capture the image of the region + screenshot = ImageGrab.grab(bbox=(region.x, region.y, region.x + region.w, region.y + region.h)) + + # Preprocess the image (Convert to grayscale and apply threshold) + processed_image = self.preprocess_image(screenshot) + + # Convert the processed image to a numpy array for EasyOCR + numpy_image = np.array(processed_image) + + # Perform OCR on the preprocessed numpy image + if self.reader == pytesseract: + text = pytesseract.image_to_string(processed_image, config='--psm 6 --oem 1') + else: + # Get text from EasyOCR + results = self.reader.readtext(numpy_image) + + if results: + text = results[0][1] # If OCR detects text, use it + else: + text = "No text detected" # Fallback if no text is detected + + region.data = text.strip() + + self.display_ocr_data() + + def preprocess_image(self, image): + """Preprocess the image to enhance text recognition.""" + # Convert image to grayscale + gray_image = image.convert("L") + + # Apply threshold to make the image binary (black and white) + threshold_image = gray_image.point(lambda p: p > 200 and 255) + + # Apply noise reduction (filter) + processed_image = threshold_image.filter(ImageFilter.MedianFilter(3)) + return processed_image + + def clear_ocr_data(self): + """Clear the displayed OCR data.""" + self.ocr_display.clear() + + def display_ocr_data(self): + """Display OCR data in the text area on the left-hand side.""" + ocr_text = "" + for region in self.regions: + ocr_text += f"{region.label} Output:\n{region.data}\n\n" # Updated headers with "Output" + self.ocr_display.setText(ocr_text) def main():