# Python imports import os import requests import subprocess # Lib imports import gi gi.require_version('Gtk', '3.0') gi.require_version('GtkSource', '4') from gi.repository import Gtk from gi.repository import GLib from gi.repository import GtkSource # Application imports class OCRWindow(Gtk.Window): def __init__(self): super(OCRWindow, self).__init__() self.tesseract_path = f"{settings.get_home_config_path()}/tesseract-ocr.AppImage" self.download_url = "https://github.com/AlexanderP/tesseract-appimage/releases/download/v5.3.3/tesseract-5.3.3-x86_64.AppImage" self._setup_styling() self._setup_signals() self._subscribe_to_events() self._load_widgets() def _setup_styling(self): self.set_title(f"Tesseract OCR") self.set_icon_from_file( settings.get_window_icon() ) self.set_gravity(5) # 5 = CENTER self.set_position(1) # 1 = CENTER, 4 = CENTER_ALWAYS self.set_default_size(480, 600) self.set_size_request(480, 600) def _setup_signals(self): self.connect("delete-event", self._tear_down) def _subscribe_to_events(self): event_system.subscribe("show_ocr", self._show_ocr) def _load_widgets(self): scrolled_window = Gtk.ScrolledWindow() box = Gtk.Box() download_button = Gtk.Button(label = "Download Tesseract OCR") run_ocr_button = Gtk.Button(label = "Run OCR") text_view = GtkSource.View() download_button.set_tooltip_text("Download Tesseract OCR") run_ocr_button.set_tooltip_text("Run OCR") download_button.connect("clicked", self._download_tesseract_ocr) run_ocr_button.connect("clicked", self._run_ocr_button) box.set_orientation(Gtk.Orientation.VERTICAL) text_view.set_vexpand(True) text_view.set_show_line_numbers(True) ctx = scrolled_window.get_style_context() ctx.add_class("container-padding-5px") scrolled_window.add(text_view) box.add(download_button) box.add(run_ocr_button) box.add(scrolled_window) self.add(box) box.show_all() if os.path.exists(self.tesseract_path): download_button.hide() else: run_ocr_button.hide() text_view.hide() def _show_ocr(self): self.show() def _tear_down(self, widget = None, eve = None): self.hide() # Return True to NOT propigate call (would actually destroy window n children) return True def _download_tesseract_ocr(self, button): parent = button.get_parent() spinner = Gtk.Spinner.new() parent.add(spinner) GLib.idle_add(self.__download_file, self.download_url, spinner) @daemon_threaded def __download_file(self, url = None, spinner = None): if not url: return if not spinner: return if not url == self.download_url: return spinner.show() spinner.start() # NOTE the stream = True parameter below with requests.get(url, stream = True) as r: r.raise_for_status() with open(self.tesseract_path, 'wb') as f: for chunk in r.iter_content(chunk_size = 8192): # If chunk encoded response uncomment and set chunk_size parameter to None. # if chunk: f.write(chunk) spinner.stop() GLib.idle_add(self.__process_dl_finished, spinner) def __process_dl_finished(self, spinner = None): if not spinner: return parent = spinner.get_parent() download_button, \ run_ocr_button, \ text_view, \ spinner = parent.get_children() parent.remove(spinner) if os.path.exists(self.tesseract_path): self.__set_as_executable(self.tesseract_path) run_ocr_button.show() text_view.show() download_button.hide() def __set_as_executable(self, tesseract_path): if not tesseract_path: return os.access(tesseract_path, os.X_OK) try: command = ["chmod", "544", tesseract_path] with subprocess.Popen(command, stdout = subprocess.PIPE) as proc: result = proc.stdout.read().decode("UTF-8").strip() except Exception as e: logger.error(f"Couldn't chmod\nFile: {properties.file_uri}") logger.error( repr(e) ) def _run_ocr_button(self, button): active_image = event_system.emit_and_await("get_active_image_path") if not active_image: return scrolled_window = button.get_parent().get_children()[2] text_view = scrolled_window.get_children()[0] command = [self.tesseract_path, active_image, "stdout"] result = subprocess.run(command, stdout = subprocess.PIPE) data = result.stdout.decode('utf-8') logger.debug(command) logger.debug(data) text_view.get_buffer().set_text(data, -1)