mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-12-16 19:25:48 -07:00
Removed Legacy Server Codebase
This commit is contained in:
@@ -1,104 +0,0 @@
|
||||
#////////// PROJECT FILE SEPARATION LINE ////////// CODE AFTER THIS LINE ARE FROM: <ProjectRoot>/Data/Python_API_Endpoints/ocr_engines.py
|
||||
|
||||
import os
|
||||
import io
|
||||
import sys
|
||||
import base64
|
||||
import torch
|
||||
import pytesseract
|
||||
import easyocr
|
||||
import numpy as np
|
||||
import platform
|
||||
from PIL import Image
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Configure cross-platform Tesseract path
|
||||
# ---------------------------------------------------------------------
|
||||
SYSTEM = platform.system()
|
||||
|
||||
def get_tesseract_folder():
|
||||
if getattr(sys, 'frozen', False):
|
||||
# PyInstaller EXE
|
||||
base_path = sys._MEIPASS
|
||||
return os.path.join(base_path, "Borealis", "Python_API_Endpoints", "Tesseract-OCR")
|
||||
else:
|
||||
# Normal Python environment
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
return os.path.join(base_dir, "Tesseract-OCR")
|
||||
|
||||
if SYSTEM == "Windows":
|
||||
TESSERACT_FOLDER = get_tesseract_folder()
|
||||
TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
|
||||
TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
|
||||
|
||||
if not os.path.isfile(TESSERACT_EXE):
|
||||
raise EnvironmentError(f"Missing tesseract.exe at expected path: {TESSERACT_EXE}")
|
||||
|
||||
pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
|
||||
os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
|
||||
else:
|
||||
# Assume Linux/macOS with system-installed Tesseract
|
||||
pytesseract.pytesseract.tesseract_cmd = "tesseract"
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# EasyOCR Global Instances
|
||||
# ---------------------------------------------------------------------
|
||||
easyocr_reader_cpu = None
|
||||
easyocr_reader_gpu = None
|
||||
|
||||
def initialize_ocr_engines():
|
||||
global easyocr_reader_cpu, easyocr_reader_gpu
|
||||
if easyocr_reader_cpu is None:
|
||||
easyocr_reader_cpu = easyocr.Reader(['en'], gpu=False)
|
||||
if easyocr_reader_gpu is None:
|
||||
easyocr_reader_gpu = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Main OCR Handler
|
||||
# ---------------------------------------------------------------------
|
||||
def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str = "cpu") -> list[str]:
|
||||
if not image_b64:
|
||||
raise ValueError("No base64 image data provided.")
|
||||
|
||||
try:
|
||||
raw_bytes = base64.b64decode(image_b64)
|
||||
image = Image.open(io.BytesIO(raw_bytes)).convert("RGB")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid base64 image input: {e}")
|
||||
|
||||
engine = engine.lower().strip()
|
||||
backend = backend.lower().strip()
|
||||
|
||||
if engine in ["tesseract", "tesseractocr"]:
|
||||
try:
|
||||
text = pytesseract.image_to_string(image, config="--psm 6 --oem 1")
|
||||
except pytesseract.TesseractNotFoundError:
|
||||
raise RuntimeError("Tesseract binary not found or not available on this platform.")
|
||||
elif engine == "easyocr":
|
||||
initialize_ocr_engines()
|
||||
reader = easyocr_reader_gpu if backend == "gpu" else easyocr_reader_cpu
|
||||
result = reader.readtext(np.array(image), detail=1)
|
||||
|
||||
# Group by Y position (line-aware sorting)
|
||||
result = sorted(result, key=lambda r: r[0][0][1])
|
||||
lines = []
|
||||
current_line = []
|
||||
last_y = None
|
||||
line_threshold = 10
|
||||
|
||||
for (bbox, text, _) in result:
|
||||
y = bbox[0][1]
|
||||
if last_y is None or abs(y - last_y) < line_threshold:
|
||||
current_line.append(text)
|
||||
else:
|
||||
lines.append(" ".join(current_line))
|
||||
current_line = [text]
|
||||
last_y = y
|
||||
|
||||
if current_line:
|
||||
lines.append(" ".join(current_line))
|
||||
text = "\n".join(lines)
|
||||
else:
|
||||
raise ValueError(f"OCR engine '{engine}' not recognized.")
|
||||
|
||||
return [line.strip() for line in text.splitlines() if line.strip()]
|
||||
@@ -1,57 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import platform
|
||||
|
||||
|
||||
def run_powershell_script(script_path: str):
|
||||
"""
|
||||
Execute a PowerShell script with ExecutionPolicy Bypass.
|
||||
|
||||
Returns (returncode, stdout, stderr)
|
||||
"""
|
||||
if not script_path or not os.path.isfile(script_path):
|
||||
raise FileNotFoundError(f"Script not found: {script_path}")
|
||||
|
||||
if not script_path.lower().endswith(".ps1"):
|
||||
raise ValueError("run_powershell_script only accepts .ps1 files")
|
||||
|
||||
system = platform.system()
|
||||
|
||||
# Choose powershell binary
|
||||
ps_bin = None
|
||||
if system == "Windows":
|
||||
# Prefer Windows PowerShell
|
||||
ps_bin = os.path.expandvars(r"%SystemRoot%\\System32\\WindowsPowerShell\\v1.0\\powershell.exe")
|
||||
if not os.path.isfile(ps_bin):
|
||||
ps_bin = "powershell.exe"
|
||||
else:
|
||||
# PowerShell Core (pwsh) may exist cross-platform
|
||||
ps_bin = "pwsh"
|
||||
|
||||
# Build command
|
||||
# -ExecutionPolicy Bypass (Windows only), -NoProfile, -File "script"
|
||||
cmd = [ps_bin]
|
||||
if system == "Windows":
|
||||
cmd += ["-ExecutionPolicy", "Bypass"]
|
||||
cmd += ["-NoProfile", "-File", script_path]
|
||||
|
||||
# Hide window on Windows
|
||||
creationflags = 0
|
||||
startupinfo = None
|
||||
if system == "Windows":
|
||||
creationflags = 0x08000000 # CREATE_NO_WINDOW
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
universal_newlines=True,
|
||||
creationflags=creationflags,
|
||||
startupinfo=startupinfo,
|
||||
)
|
||||
out, err = proc.communicate()
|
||||
return proc.returncode, out or "", err or ""
|
||||
|
||||
Reference in New Issue
Block a user