Mirage2/src/core/widgets/ocr_window.py

164 lines
5.0 KiB
Python

# Python imports
import os
import requests
import subprocess
# Lib imports
import gi
gi.require_version('Gtk', '3.0')
gi.require_version('GtkSource', '4')
from gi.repository import Gtk
from gi.repository import GLib
from gi.repository import GtkSource
# Application imports
class OCRWindow(Gtk.Window):
def __init__(self):
super(OCRWindow, self).__init__()
self.tesseract_path = f"{settings.get_home_config_path()}/tesseract-ocr.AppImage"
self.download_url = "https://github.com/AlexanderP/tesseract-appimage/releases/download/v5.3.3/tesseract-5.3.3-x86_64.AppImage"
self._setup_styling()
self._setup_signals()
self._subscribe_to_events()
self._load_widgets()
def _setup_styling(self):
self.set_title(f"Tesseract OCR")
self.set_icon_from_file( settings.get_window_icon() )
self.set_gravity(5) # 5 = CENTER
self.set_position(1) # 1 = CENTER, 4 = CENTER_ALWAYS
self.set_default_size(480, 600)
self.set_size_request(480, 600)
def _setup_signals(self):
self.connect("delete-event", self._tear_down)
def _subscribe_to_events(self):
event_system.subscribe("show_ocr", self._show_ocr)
def _load_widgets(self):
scrolled_window = Gtk.ScrolledWindow()
box = Gtk.Box()
download_button = Gtk.Button(label = "Download Tesseract OCR")
run_ocr_button = Gtk.Button(label = "Run OCR")
text_view = GtkSource.View()
download_button.set_tooltip_text("Download Tesseract OCR")
run_ocr_button.set_tooltip_text("Run OCR")
download_button.connect("clicked", self._download_tesseract_ocr)
run_ocr_button.connect("clicked", self._run_ocr_button)
box.set_orientation(Gtk.Orientation.VERTICAL)
text_view.set_vexpand(True)
text_view.set_show_line_numbers(True)
ctx = scrolled_window.get_style_context()
ctx.add_class("container-padding-5px")
scrolled_window.add(text_view)
box.add(download_button)
box.add(run_ocr_button)
box.add(scrolled_window)
self.add(box)
box.show_all()
if os.path.exists(self.tesseract_path):
download_button.hide()
else:
run_ocr_button.hide()
text_view.hide()
def _show_ocr(self):
self.show()
def _tear_down(self, widget = None, eve = None):
self.hide()
# Return True to NOT propigate call (would actually destroy window n children)
return True
def _download_tesseract_ocr(self, button):
parent = button.get_parent()
spinner = Gtk.Spinner.new()
parent.add(spinner)
GLib.idle_add(self.__download_file, self.download_url, spinner)
@daemon_threaded
def __download_file(self, url = None, spinner = None):
if not url: return
if not spinner: return
if not url == self.download_url: return
spinner.show()
spinner.start()
# NOTE the stream = True parameter below
with requests.get(url, stream = True) as r:
r.raise_for_status()
with open(self.tesseract_path, 'wb') as f:
for chunk in r.iter_content(chunk_size = 8192):
# If chunk encoded response uncomment and set chunk_size parameter to None.
# if chunk:
f.write(chunk)
spinner.stop()
GLib.idle_add(self.__process_dl_finished, spinner)
def __process_dl_finished(self, spinner = None):
if not spinner: return
parent = spinner.get_parent()
download_button, \
run_ocr_button, \
text_view, \
spinner = parent.get_children()
parent.remove(spinner)
if os.path.exists(self.tesseract_path):
self.__set_as_executable(self.tesseract_path)
run_ocr_button.show()
text_view.show()
download_button.hide()
def __set_as_executable(self, tesseract_path):
if not tesseract_path: return
os.access(tesseract_path, os.X_OK)
try:
command = ["chmod", "544", tesseract_path]
with subprocess.Popen(command, stdout = subprocess.PIPE) as proc:
result = proc.stdout.read().decode("UTF-8").strip()
except Exception as e:
logger.error(f"Couldn't chmod\nFile: {properties.file_uri}")
logger.error( repr(e) )
def _run_ocr_button(self, button):
active_image = event_system.emit_and_await("get_active_image_path")
if not active_image: return
scrolled_window = button.get_parent().get_children()[2]
text_view = scrolled_window.get_children()[0]
command = [self.tesseract_path, active_image, "stdout"]
result = subprocess.run(command, stdout = subprocess.PIPE)
data = result.stdout.decode('utf-8')
logger.debug(command)
logger.debug(data)
text_view.get_buffer().set_text(data, -1)