95 lines
3.5 KiB
Python
95 lines
3.5 KiB
Python
#////////// PROJECT FILE SEPARATION LINE ////////// CODE AFTER THIS LINE ARE FROM: <ProjectRoot>/Data/Python_API_Endpoints/ocr_engines.py
|
|
|
|
import os
|
|
import io
|
|
import base64
|
|
import torch
|
|
import pytesseract
|
|
import easyocr
|
|
import numpy as np
|
|
import platform
|
|
from PIL import Image
|
|
|
|
# ---------------------------------------------------------------------
|
|
# Configure cross-platform Tesseract path
|
|
# ---------------------------------------------------------------------
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
SYSTEM = platform.system()
|
|
|
|
if SYSTEM == "Windows":
|
|
TESSERACT_FOLDER = os.path.join(BASE_DIR, "Tesseract-OCR")
|
|
TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
|
|
TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
|
|
|
|
if not os.path.isfile(TESSERACT_EXE):
|
|
raise EnvironmentError("Missing tesseract.exe in /Tesseract-OCR. Ensure the full folder is copied.")
|
|
|
|
pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
|
|
os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
|
|
else:
|
|
# Assume Linux/macOS with system-installed Tesseract
|
|
pytesseract.pytesseract.tesseract_cmd = "tesseract"
|
|
|
|
# ---------------------------------------------------------------------
|
|
# EasyOCR Global Instances
|
|
# ---------------------------------------------------------------------
|
|
easyocr_reader_cpu = None
|
|
easyocr_reader_gpu = None
|
|
|
|
def initialize_ocr_engines():
|
|
global easyocr_reader_cpu, easyocr_reader_gpu
|
|
if easyocr_reader_cpu is None:
|
|
easyocr_reader_cpu = easyocr.Reader(['en'], gpu=False)
|
|
if easyocr_reader_gpu is None:
|
|
easyocr_reader_gpu = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
|
|
|
# ---------------------------------------------------------------------
|
|
# Main OCR Handler
|
|
# ---------------------------------------------------------------------
|
|
def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str = "cpu") -> list[str]:
|
|
if not image_b64:
|
|
raise ValueError("No base64 image data provided.")
|
|
|
|
try:
|
|
raw_bytes = base64.b64decode(image_b64)
|
|
image = Image.open(io.BytesIO(raw_bytes)).convert("RGB")
|
|
except Exception as e:
|
|
raise ValueError(f"Invalid base64 image input: {e}")
|
|
|
|
engine = engine.lower().strip()
|
|
backend = backend.lower().strip()
|
|
|
|
if engine in ["tesseract", "tesseractocr"]:
|
|
try:
|
|
text = pytesseract.image_to_string(image, config="--psm 6 --oem 1")
|
|
except pytesseract.TesseractNotFoundError:
|
|
raise RuntimeError("Tesseract binary not found or not available on this platform.")
|
|
elif engine == "easyocr":
|
|
initialize_ocr_engines()
|
|
reader = easyocr_reader_gpu if backend == "gpu" else easyocr_reader_cpu
|
|
result = reader.readtext(np.array(image), detail=1)
|
|
|
|
# Group by Y position (line-aware sorting)
|
|
result = sorted(result, key=lambda r: r[0][0][1])
|
|
lines = []
|
|
current_line = []
|
|
last_y = None
|
|
line_threshold = 10
|
|
|
|
for (bbox, text, _) in result:
|
|
y = bbox[0][1]
|
|
if last_y is None or abs(y - last_y) < line_threshold:
|
|
current_line.append(text)
|
|
else:
|
|
lines.append(" ".join(current_line))
|
|
current_line = [text]
|
|
last_y = y
|
|
|
|
if current_line:
|
|
lines.append(" ".join(current_line))
|
|
text = "\n".join(lines)
|
|
else:
|
|
raise ValueError(f"OCR engine '{engine}' not recognized.")
|
|
|
|
return [line.strip() for line in text.splitlines() if line.strip()]
|