Added Additional OCR Engine

This commit is contained in:
Nicole Rappe 2025-02-07 00:25:55 -07:00
parent 6625d29a0e
commit 943967be60

View File

@ -1,21 +1,32 @@
import sys import sys
from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QCheckBox, QPushButton import pytesseract
from PyQt5.QtCore import Qt, QRect, QPoint import easyocr
import numpy as np
from PyQt5.QtWidgets import QApplication, QWidget, QLabel, QCheckBox, QTextEdit, QComboBox, QVBoxLayout
from PyQt5.QtCore import Qt, QRect, QPoint, QTimer
from PyQt5.QtGui import QPainter, QPen, QColor, QFont from PyQt5.QtGui import QPainter, QPen, QColor, QFont
from ctypes import windll from PIL import Image, ImageGrab, ImageEnhance, ImageFilter # For screen capture and image processing
HANDLE_SIZE = 10 # Size of the resize handle squares HANDLE_SIZE = 10 # Size of the resize handle squares
LABEL_HEIGHT = 20 # Height of the label area above the rectangle LABEL_HEIGHT = 20 # Height of the label area above the rectangle
DEFAULT_WIDTH = 150 # Default width for the regions
DEFAULT_HEIGHT = 50 # Default height for the regions
DEFAULT_SPACING = 20 # Default horizontal spacing between regions
# Set the path for tesseract manually
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
class Region: class Region:
def __init__(self, x, y, w, h, label="Region", color=QColor(0, 0, 255)): def __init__(self, x, y, label="Region", color=QColor(0, 0, 255)):
self.x = x self.x = x
self.y = y self.y = y
self.w = w self.w = DEFAULT_WIDTH
self.h = h self.h = DEFAULT_HEIGHT
self.label = label self.label = label
self.color = color self.color = color
self.visible = True # Track the visibility of the region
self.data = "" # Store OCR data for this region
def rect(self): def rect(self):
return QRect(self.x, self.y, self.w, self.h) return QRect(self.x, self.y, self.w, self.h)
@ -51,21 +62,22 @@ class OverlayCanvas(QWidget):
painter.setRenderHint(QPainter.Antialiasing) painter.setRenderHint(QPainter.Antialiasing)
for region in self.regions: for region in self.regions:
# Draw the rectangle if region.visible: # Only draw visible regions
pen = QPen(region.color) # Draw the rectangle
pen.setWidth(3) pen = QPen(region.color)
painter.setPen(pen) pen.setWidth(3)
painter.drawRect(region.x, region.y, region.w, region.h) painter.setPen(pen)
painter.drawRect(region.x, region.y, region.w, region.h)
# Draw the label above the rectangle # Draw the label above the rectangle, aligned with the left edge of the region
painter.setFont(QFont("Arial", 12, QFont.Bold)) painter.setFont(QFont("Arial", 12, QFont.Bold))
painter.setPen(region.color) painter.setPen(region.color)
painter.drawText(region.x + 5, region.y - 5, region.label) painter.drawText(region.x, region.y - 5, region.label) # Aligned to the left of the region
# Draw resize handles if in edit mode # Draw resize handles if in edit mode
if self.edit_mode: if self.edit_mode:
for handle in region.resize_handles(): for handle in region.resize_handles():
painter.fillRect(handle, region.color) painter.fillRect(handle, region.color)
def mousePressEvent(self, event): def mousePressEvent(self, event):
if not self.edit_mode: if not self.edit_mode:
@ -146,49 +158,220 @@ class BorealisOverlay(QWidget):
self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint) self.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
self.setAttribute(Qt.WA_TranslucentBackground, True) # Transparent background self.setAttribute(Qt.WA_TranslucentBackground, True) # Transparent background
# Use Win32 API to keep the window on top # Create regions to draw and interact with, all regions have the same size and are moved higher up
hwnd = self.winId().__int__()
windll.user32.SetWindowPos(hwnd, -1, 0, 0, 0, 0, 0x0001 | 0x0002)
# Create regions to draw and interact with
self.regions = [ self.regions = [
Region(200, 200, 150, 50, label="Region 01"), Region(250, 50, label="Experience"), # Moved slightly to the right
Region(400, 300, 200, 80, label="Region 02"), Region(450, 50, label="Region 02"), # Moved slightly to the right
Region(650, 50, label="Region 03") # Moved slightly to the right
] ]
# Create canvas and attach to window # Create canvas and attach to window
self.canvas = OverlayCanvas(self.regions, self) self.canvas = OverlayCanvas(self.regions, self)
self.canvas.setGeometry(self.rect()) # Match the canvas size to the full window self.canvas.setGeometry(self.rect()) # Match the canvas size to the full window
# Add title, Edit Mode UI, and close button # Add title, Edit Mode UI, and buttons (Overlay Visibility checkbox)
self.init_ui() self.init_ui()
# Timer for polling OCR data
self.timer = QTimer(self)
self.timer.timeout.connect(self.collect_ocr_data)
self.timer.start(1000) # Poll every second (default)
self.reader = None # Default OCR reader (None until an engine is selected)
def init_ui(self): def init_ui(self):
"""Initialize UI components.""" """Initialize UI components."""
# Title label # Title label
self.title_label = QLabel("Borealis Overlay", self) self.title_label = QLabel("Borealis Overlay", self)
self.title_label.setStyleSheet("QLabel { color: white; font-size: 18px; font-weight: bold; }") self.title_label.setStyleSheet("QLabel { color: white; font-size: 20px; font-weight: bold; }") # Adjusted title size
self.title_label.move(10, 5) self.title_label.move(10, 5)
# OCR Engine label and selection dropdown
self.engine_label = QLabel("OCR Engine:", self)
self.engine_label.setStyleSheet("QLabel { color: white; font-size: 14px; }")
self.engine_label.move(10, 60) # Moved OCR Engine label slightly higher
self.engine_dropdown = QComboBox(self)
self.engine_dropdown.setStyleSheet("""
QComboBox {
color: white;
background-color: #2b2b2b;
border: 1px solid #3c3f41;
font-size: 14px;
}
QComboBox QAbstractItemView {
color: white;
background-color: #2b2b2b;
}
""")
self.engine_dropdown.addItem("Select OCR Engine") # Placeholder option
self.engine_dropdown.addItem("Tesseract") # Only Tesseract for now
self.engine_dropdown.addItem("EasyOCR") # Adding EasyOCR option
self.engine_dropdown.move(100, 90) # Dropdown moved slightly up and aligned with label
self.engine_dropdown.currentIndexChanged.connect(self.on_engine_selected)
# Polling rate dropdown
self.polling_rate_label = QLabel("Polling Rate:", self)
self.polling_rate_label.setStyleSheet("QLabel { color: white; font-size: 14px; }")
self.polling_rate_label.move(10, 120)
self.polling_rate_dropdown = QComboBox(self)
self.polling_rate_dropdown.setStyleSheet("""
QComboBox {
color: white;
background-color: #2b2b2b;
border: 1px solid #3c3f41;
font-size: 14px;
}
QComboBox QAbstractItemView {
color: white;
background-color: #2b2b2b;
}
""")
self.polling_rate_dropdown.addItem("0.1 Seconds")
self.polling_rate_dropdown.addItem("0.5 Seconds")
self.polling_rate_dropdown.addItem("1 Second")
self.polling_rate_dropdown.addItem("2 Seconds")
self.polling_rate_dropdown.addItem("5 Seconds")
self.polling_rate_dropdown.move(100, 150) # Dropdown moved slightly up
self.polling_rate_dropdown.currentIndexChanged.connect(self.on_polling_rate_selected)
# Options label
self.options_label = QLabel("Options", self)
self.options_label.setStyleSheet("QLabel { color: white; font-size: 16px; font-weight: bold; }")
self.options_label.move(10, 180) # Positioned above checkboxes
# Edit mode checkbox # Edit mode checkbox
self.mode_toggle = QCheckBox("Edit Mode", self) self.mode_toggle = QCheckBox("Edit Mode", self)
self.mode_toggle.setStyleSheet("QCheckBox { color: white; }") self.mode_toggle.setStyleSheet("QCheckBox { color: white; }")
self.mode_toggle.move(10, 30) self.mode_toggle.move(10, 210)
self.mode_toggle.stateChanged.connect(self.toggle_edit_mode) self.mode_toggle.stateChanged.connect(self.toggle_edit_mode)
# Close button # Overlay Visibility checkbox
self.close_button = QPushButton("Close", self) self.visibility_checkbox = QCheckBox("Overlay Visibility", self)
self.close_button.setStyleSheet( self.visibility_checkbox.setStyleSheet("QCheckBox { color: white; }")
"QPushButton { background-color: #40E0D0; color: white; font-weight: bold; border-radius: 5px; }" self.visibility_checkbox.move(10, 240) # Positioned below Edit Mode
) # Turquoise color self.visibility_checkbox.setChecked(True) # Default to visible
self.close_button.move(10, 60) # Place it below the Edit Mode checkbox self.visibility_checkbox.stateChanged.connect(self.toggle_overlay_visibility)
self.close_button.clicked.connect(self.close)
# Collect Data checkbox for OCR functionality (disabled initially)
self.collect_data_checkbox = QCheckBox("Collect Data", self)
self.collect_data_checkbox.setStyleSheet("QCheckBox { color: white; }")
self.collect_data_checkbox.move(10, 270) # Positioned below Overlay Visibility
self.collect_data_checkbox.setEnabled(False) # Initially disabled
self.collect_data_checkbox.stateChanged.connect(self.toggle_ocr)
# Data Collection Output label
self.output_label = QLabel("Data Collection Output:", self)
self.output_label.setStyleSheet("QLabel { color: white; font-size: 14px; font-weight: bold; }")
self.output_label.move(10, 330) # Moved down by 50px
# Text area for OCR data display (with transparent background)
self.ocr_display = QTextEdit(self)
self.ocr_display.setStyleSheet("QTextEdit { color: white; background-color: transparent; font-size: 14px; }")
self.ocr_display.setReadOnly(True)
self.ocr_display.setGeometry(10, 360, 300, 400)
def on_engine_selected(self):
"""Enable the Collect Data checkbox when an OCR engine is selected."""
selected_engine = self.engine_dropdown.currentText()
if selected_engine == "Tesseract":
self.reader = pytesseract
elif selected_engine == "EasyOCR":
self.reader = easyocr.Reader(['en']) # Initialize EasyOCR reader for English language
elif selected_engine == "Select OCR Engine":
self.reader = None
if self.reader:
self.collect_data_checkbox.setEnabled(True)
else:
self.collect_data_checkbox.setEnabled(False)
def on_polling_rate_selected(self):
"""Update the polling rate based on dropdown selection."""
polling_rate = self.polling_rate_dropdown.currentText()
if polling_rate == "0.1 Seconds":
self.timer.setInterval(100)
elif polling_rate == "0.5 Seconds":
self.timer.setInterval(500)
elif polling_rate == "1 Second":
self.timer.setInterval(1000)
elif polling_rate == "2 Seconds":
self.timer.setInterval(2000)
elif polling_rate == "5 Seconds":
self.timer.setInterval(5000)
def toggle_edit_mode(self, state): def toggle_edit_mode(self, state):
"""Enable or disable edit mode for dragging and resizing rectangles.""" """Enable or disable edit mode for dragging and resizing rectangles."""
editing = (state == 2) editing = (state == 2)
self.canvas.edit_mode = editing # Pass the state to the canvas self.canvas.edit_mode = editing # Pass the state to the canvas
print(f"Borealis Overlay Edit Mode: {'ON' if editing else 'OFF'}") # Debugging output
def toggle_overlay_visibility(self):
"""Toggle the visibility of the regions."""
visible = self.visibility_checkbox.isChecked()
for region in self.regions:
region.visible = visible # Toggle visibility based on checkbox
self.update() # Trigger a repaint
def toggle_ocr(self, state):
"""Enable or disable OCR collection."""
if state == Qt.Checked:
self.collect_ocr_data()
else:
self.clear_ocr_data()
def collect_ocr_data(self):
"""Collect OCR data from each visible region."""
if self.collect_data_checkbox.isChecked() and self.reader: # Only collect data if checkbox is checked
for region in self.regions:
if region.visible:
# Capture the image of the region
screenshot = ImageGrab.grab(bbox=(region.x, region.y, region.x + region.w, region.y + region.h))
# Preprocess the image (Convert to grayscale and apply threshold)
processed_image = self.preprocess_image(screenshot)
# Convert the processed image to a numpy array for EasyOCR
numpy_image = np.array(processed_image)
# Perform OCR on the preprocessed numpy image
if self.reader == pytesseract:
text = pytesseract.image_to_string(processed_image, config='--psm 6 --oem 1')
else:
# Get text from EasyOCR
results = self.reader.readtext(numpy_image)
if results:
text = results[0][1] # If OCR detects text, use it
else:
text = "No text detected" # Fallback if no text is detected
region.data = text.strip()
self.display_ocr_data()
def preprocess_image(self, image):
"""Preprocess the image to enhance text recognition."""
# Convert image to grayscale
gray_image = image.convert("L")
# Apply threshold to make the image binary (black and white)
threshold_image = gray_image.point(lambda p: p > 200 and 255)
# Apply noise reduction (filter)
processed_image = threshold_image.filter(ImageFilter.MedianFilter(3))
return processed_image
def clear_ocr_data(self):
"""Clear the displayed OCR data."""
self.ocr_display.clear()
def display_ocr_data(self):
"""Display OCR data in the text area on the left-hand side."""
ocr_text = ""
for region in self.regions:
ocr_text += f"{region.label} Output:\n{region.data}\n\n" # Updated headers with "Output"
self.ocr_display.setText(ocr_text)
def main(): def main():