mirror of
https://github.com/bunny-lab-io/Borealis.git
synced 2025-07-27 05:08:29 -06:00
Added Linux OCR Dependencies
This commit is contained in:
@ -5,21 +5,28 @@ import torch
|
|||||||
import pytesseract
|
import pytesseract
|
||||||
import easyocr
|
import easyocr
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import platform
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
# ---------------------------------------------------------------------
|
# ---------------------------------------------------------------------
|
||||||
# Configure internal Tesseract path
|
# Configure cross-platform Tesseract path
|
||||||
# ---------------------------------------------------------------------
|
# ---------------------------------------------------------------------
|
||||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
TESSERACT_FOLDER = os.path.join(BASE_DIR, "Tesseract-OCR")
|
SYSTEM = platform.system()
|
||||||
TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
|
|
||||||
TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
|
|
||||||
|
|
||||||
if not os.path.isfile(TESSERACT_EXE):
|
if SYSTEM == "Windows":
|
||||||
raise EnvironmentError("Missing tesseract.exe in /Tesseract-OCR. Ensure the full folder is copied.")
|
TESSERACT_FOLDER = os.path.join(BASE_DIR, "Tesseract-OCR")
|
||||||
|
TESSERACT_EXE = os.path.join(TESSERACT_FOLDER, "tesseract.exe")
|
||||||
|
TESSDATA_DIR = os.path.join(TESSERACT_FOLDER, "tessdata")
|
||||||
|
|
||||||
pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
|
if not os.path.isfile(TESSERACT_EXE):
|
||||||
os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
|
raise EnvironmentError("Missing tesseract.exe in /Tesseract-OCR. Ensure the full folder is copied.")
|
||||||
|
|
||||||
|
pytesseract.pytesseract.tesseract_cmd = TESSERACT_EXE
|
||||||
|
os.environ["TESSDATA_PREFIX"] = TESSDATA_DIR
|
||||||
|
else:
|
||||||
|
# Assume Linux/macOS with system-installed Tesseract
|
||||||
|
pytesseract.pytesseract.tesseract_cmd = "tesseract"
|
||||||
|
|
||||||
# ---------------------------------------------------------------------
|
# ---------------------------------------------------------------------
|
||||||
# EasyOCR Global Instances
|
# EasyOCR Global Instances
|
||||||
@ -54,7 +61,7 @@ def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str =
|
|||||||
try:
|
try:
|
||||||
text = pytesseract.image_to_string(image, config="--psm 6 --oem 1")
|
text = pytesseract.image_to_string(image, config="--psm 6 --oem 1")
|
||||||
except pytesseract.TesseractNotFoundError:
|
except pytesseract.TesseractNotFoundError:
|
||||||
raise RuntimeError("Tesseract binary not found in internal folder.")
|
raise RuntimeError("Tesseract binary not found or not available on this platform.")
|
||||||
elif engine == "easyocr":
|
elif engine == "easyocr":
|
||||||
initialize_ocr_engines()
|
initialize_ocr_engines()
|
||||||
reader = easyocr_reader_gpu if backend == "gpu" else easyocr_reader_cpu
|
reader = easyocr_reader_gpu if backend == "gpu" else easyocr_reader_cpu
|
||||||
@ -82,4 +89,4 @@ def run_ocr_on_base64(image_b64: str, engine: str = "tesseract", backend: str =
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"OCR engine '{engine}' not recognized.")
|
raise ValueError(f"OCR engine '{engine}' not recognized.")
|
||||||
|
|
||||||
return [line.strip() for line in text.splitlines() if line.strip()]
|
return [line.strip() for line in text.splitlines() if line.strip()]
|
||||||
|
@ -42,13 +42,13 @@ install_core_dependencies() {
|
|||||||
case "$DISTRO_ID" in
|
case "$DISTRO_ID" in
|
||||||
ubuntu|debian)
|
ubuntu|debian)
|
||||||
sudo apt update -qq
|
sudo apt update -qq
|
||||||
sudo apt install -y python3 python3-venv python3-pip nodejs npm git curl
|
sudo apt install -y python3 python3-venv python3-pip nodejs npm git curl tesseract-ocr
|
||||||
;;
|
;;
|
||||||
rhel|centos|fedora|rocky)
|
rhel|centos|fedora|rocky)
|
||||||
sudo dnf install -y python3 python3-pip nodejs npm git curl
|
sudo dnf install -y python3 python3-pip nodejs npm git curl tesseract
|
||||||
;;
|
;;
|
||||||
arch)
|
arch)
|
||||||
sudo pacman -Sy --noconfirm python python-venv python-pip nodejs npm git curl
|
sudo pacman -Sy --noconfirm python python-venv python-pip nodejs npm git curl tesseract
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo -e "${RED}${CROSSMARK} Unsupported Linux distribution: ${DISTRO_ID}${RESET}"
|
echo -e "${RED}${CROSSMARK} Unsupported Linux distribution: ${DISTRO_ID}${RESET}"
|
||||||
|
Reference in New Issue
Block a user