Added Linux OCR Dependencies

2026-02-04 09:30:30 -07:00 · 2025-04-13 03:18:13 -06:00
parent c741d81a45
commit e5fedab25b
2 changed files with 20 additions and 13 deletions
--- a/Data/Python_API_Endpoints/ocr_engines.py
+++ b/Data/Python_API_Endpoints/ocr_engines.py
@@ -5,21 +5,28 @@ import torch
 import pytesseract
 import easyocr
 import numpy as np
 import platform
 from PIL import Image
 # ---------------------------------------------------------------------
-# Configure internal Tesseract path
+# Configure cross-platform Tesseract path
 # ---------------------------------------------------------------------
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-TESSERACT_FOLDER = os.path.join(BASE_DIR, "Tesseract-OCR")
+SYSTEM = platform.system()
 TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
 TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
-if not os.path.isfile(TESSERACT_EXE):
+if SYSTEM == "Windows":
-    raise EnvironmentError("Missing tesseract.exe in /Tesseract-OCR. Ensure the full folder is copied.")
+    TESSERACT_FOLDER = os.path.join(BASE_DIR, "Tesseract-OCR")
    TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
    TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
-pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
+    if not os.path.isfile(TESSERACT_EXE):
-os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
+        raise EnvironmentError("Missing tesseract.exe in /Tesseract-OCR. Ensure the full folder is copied.")
    pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
    os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
 else:
    # Assume Linux/macOS with system-installed Tesseract
    pytesseract.pytesseract.tesseract_cmd = "tesseract"
 # ---------------------------------------------------------------------
 # EasyOCR Global Instances
@@ -54,7 +61,7 @@ def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str =
        try:
            text = pytesseract.image_to_string(image, config="--psm 6 --oem 1")
        except pytesseract.TesseractNotFoundError:
-            raise RuntimeError("Tesseract binary not found in internal folder.")
+            raise RuntimeError("Tesseract binary not found or not available on this platform.")
    elif engine == "easyocr":
        initialize_ocr_engines()
        reader = easyocr_reader_gpu if backend == "gpu" else easyocr_reader_cpu
@@ -82,4 +89,4 @@ def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str =
    else:
        raise ValueError(f"OCR engine '{engine}' not recognized.")
-    return [line.strip() for line in text.splitlines() if line.strip()]
+    return [line.strip() for line in text.splitlines() if line.strip()]
--- a/Launch-Borealis.sh
+++ b/Launch-Borealis.sh
@@ -42,13 +42,13 @@ install_core_dependencies() {
    case "$DISTRO_ID" in
        ubuntu|debian)
            sudo apt update -qq
-            sudo apt install -y python3 python3-venv python3-pip nodejs npm git curl
+            sudo apt install -y python3 python3-venv python3-pip nodejs npm git curl tesseract-ocr
            ;;
        rhel|centos|fedora|rocky)
-            sudo dnf install -y python3 python3-pip nodejs npm git curl
+            sudo dnf install -y python3 python3-pip nodejs npm git curl tesseract
            ;;
        arch)
-            sudo pacman -Sy --noconfirm python python-venv python-pip nodejs npm git curl
+            sudo pacman -Sy --noconfirm python python-venv python-pip nodejs npm git curl tesseract
            ;;
        *)
            echo -e "${RED}${CROSSMARK} Unsupported Linux distribution: ${DISTRO_ID}${RESET}"