From 1d671d8f239d36ed5dd8d73b184ea6784e9b7a8c Mon Sep 17 00:00:00 2001 From: itdominator <1itdominator@gmail.com> Date: Fri, 15 Mar 2024 23:34:58 -0500 Subject: [PATCH] Added OCR processor --- src/core/containers/right_box.py | 10 +- src/core/widgets/button_controls.py | 9 + src/core/widgets/image_view.py | 7 +- src/core/widgets/image_view_mixin.py | 8 +- src/core/widgets/ocr_window.py | 163 +++++++++++++++++++ user_config/usr/share/mirage2/icons/ocr.png | Bin 0 -> 8263 bytes user_config/usr/share/mirage2/stylesheet.css | 2 +- 7 files changed, 191 insertions(+), 8 deletions(-) create mode 100644 src/core/widgets/ocr_window.py create mode 100644 user_config/usr/share/mirage2/icons/ocr.png diff --git a/src/core/containers/right_box.py b/src/core/containers/right_box.py index 6d4d2fd..a4cbd38 100644 --- a/src/core/containers/right_box.py +++ b/src/core/containers/right_box.py @@ -3,14 +3,14 @@ # Lib imports import gi gi.require_version('Gtk', '3.0') -gi.require_version('Gdk', '3.0') from gi.repository import Gtk -from gi.repository import Gdk # Application imports +from .image_view_scroll import ImageViewScroll + from ..widgets.button_controls import ButtonControls from ..widgets.path_label import PathLabel -from .image_view_scroll import ImageViewScroll +from ..widgets.ocr_window import OCRWindow @@ -38,10 +38,12 @@ class RightBox(Gtk.Box): event_system.subscribe("background_fill", self._toggle_background) def _load_widgets(self): + window = OCRWindow() + self.add(ButtonControls()) self.add(PathLabel()) self.add(ImageViewScroll()) def _toggle_background(self): ctx = self.get_style_context() - ctx.remove_class("background-fill") if ctx.has_class("background-fill") else ctx.add_class("background-fill") + ctx.remove_class("background-fill") if ctx.has_class("background-fill") else ctx.add_class("background-fill") \ No newline at end of file diff --git a/src/core/widgets/button_controls.py b/src/core/widgets/button_controls.py index df692ec..819cc15 100644 --- a/src/core/widgets/button_controls.py +++ b/src/core/widgets/button_controls.py @@ -40,6 +40,7 @@ class ButtonControls(Gtk.ButtonBox): hflip_button = Gtk.Button() rrotate_button = Gtk.Button() zoomin_button = Gtk.Button() + ocr_button = Gtk.Button() self._set_class(self.fit_button) @@ -51,6 +52,7 @@ class ButtonControls(Gtk.ButtonBox): hflip_button.set_tooltip_text("Flip Horizontal") rrotate_button.set_tooltip_text("Rotate Right") zoomin_button.set_tooltip_text("Zoom In") + ocr_button.set_tooltip_text("OCR") zoomout_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/zoom-out.png") ) lrotate_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/rotate-left.png") ) @@ -60,6 +62,7 @@ class ButtonControls(Gtk.ButtonBox): hflip_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/flip-horizontal.png") ) rrotate_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/rotate-right.png") ) zoomin_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/zoom-in.png") ) + ocr_button.set_image( Gtk.Image.new_from_file(f"{icons_path}/ocr.png") ) zoomout_button.set_always_show_image(True) lrotate_button.set_always_show_image(True) @@ -69,6 +72,7 @@ class ButtonControls(Gtk.ButtonBox): hflip_button.set_always_show_image(True) rrotate_button.set_always_show_image(True) zoomin_button.set_always_show_image(True) + ocr_button.set_always_show_image(True) zoomout_button.connect("clicked", self._zoom_out) lrotate_button.connect("clicked", self._rotate_left) @@ -78,6 +82,7 @@ class ButtonControls(Gtk.ButtonBox): hflip_button.connect("clicked", self._horizontal_flip) rrotate_button.connect("clicked", self._rotate_right) zoomin_button.connect("clicked", self._zoom_in) + ocr_button.connect("clicked", self._show_ocr) center_widget.add(zoomout_button) center_widget.add(lrotate_button) @@ -87,6 +92,7 @@ class ButtonControls(Gtk.ButtonBox): center_widget.add(hflip_button) center_widget.add(rrotate_button) center_widget.add(zoomin_button) + center_widget.add(ocr_button) self.set_center_widget(center_widget) @@ -125,3 +131,6 @@ class ButtonControls(Gtk.ButtonBox): def _unset_class(self, target): ctx = target.get_style_context() ctx.remove_class("button-highlighted") + + def _show_ocr(self, widget): + event_system.emit("show_ocr") diff --git a/src/core/widgets/image_view.py b/src/core/widgets/image_view.py index a810e1d..5d17dfb 100644 --- a/src/core/widgets/image_view.py +++ b/src/core/widgets/image_view.py @@ -50,6 +50,7 @@ class ImageView(ImageViewMixin, Gtk.Image): event_system.subscribe("size_allocate", self._size_allocate) event_system.subscribe("handle_file_from_dnd", self._handle_file_from_dnd) + event_system.subscribe("get_active_image_path", self._get_active_image_path) event_system.subscribe("zoom_out", self._zoom_out) event_system.subscribe("rotate_left", self._rotate_left) event_system.subscribe("vertical_flip", self._vertical_flip) @@ -85,7 +86,9 @@ class ImageView(ImageViewMixin, Gtk.Image): if not self.work_pixbuff: self.set_as_static(path) - self.pixbuff = self.work_pixbuff.copy() + self.pixbuff = self.work_pixbuff.copy() + self.pixbuff.path = path + width = self.pixbuff.get_width() height = self.pixbuff.get_height() size = sizeof_fmt( getsize(path) ) @@ -124,4 +127,4 @@ class ImageView(ImageViewMixin, Gtk.Image): w, h = im.size return GdkPixbuf.Pixbuf.new_from_bytes(data, GdkPixbuf.Colorspace.RGB, - False, 8, w, h, w * 3) + False, 8, w, h, w * 3) \ No newline at end of file diff --git a/src/core/widgets/image_view_mixin.py b/src/core/widgets/image_view_mixin.py index 60a4e81..53f5dc0 100644 --- a/src/core/widgets/image_view_mixin.py +++ b/src/core/widgets/image_view_mixin.py @@ -10,6 +10,12 @@ from gi.repository import GdkPixbuf class ImageViewMixin: + def _get_active_image_path(self): + if self.pixbuff and self.pixbuff.path: + return self.pixbuff.path + + return None + def _zoom_out(self): if self.work_pixbuff and self.pixbuff: # TODO: Setup scale factor setting to pull from settings... @@ -122,4 +128,4 @@ class ImageViewMixin: time.sleep(delay) def _stop_animation(self): - self.playing_animation = False + self.playing_animation = False \ No newline at end of file diff --git a/src/core/widgets/ocr_window.py b/src/core/widgets/ocr_window.py new file mode 100644 index 0000000..1d0c084 --- /dev/null +++ b/src/core/widgets/ocr_window.py @@ -0,0 +1,163 @@ +# Python imports +import os +import requests +import subprocess + +# Lib imports +import gi +gi.require_version('Gtk', '3.0') +gi.require_version('GtkSource', '4') +from gi.repository import Gtk +from gi.repository import GLib +from gi.repository import GtkSource + +# Application imports + + + +class OCRWindow(Gtk.Window): + def __init__(self): + super(OCRWindow, self).__init__() + + self.tesseract_path = f"{settings.get_home_config_path()}/tesseract-ocr.AppImage" + self.download_url = "https://github.com/AlexanderP/tesseract-appimage/releases/download/v5.3.3/tesseract-5.3.3-x86_64.AppImage" + + self._setup_styling() + self._setup_signals() + self._subscribe_to_events() + self._load_widgets() + + + def _setup_styling(self): + self.set_title(f"Tesseract OCR") + self.set_icon_from_file( settings.get_window_icon() ) + self.set_gravity(5) # 5 = CENTER + self.set_position(1) # 1 = CENTER, 4 = CENTER_ALWAYS + + self.set_default_size(480, 600) + self.set_size_request(480, 600) + + + def _setup_signals(self): + self.connect("delete-event", self._tear_down) + + def _subscribe_to_events(self): + event_system.subscribe("show_ocr", self._show_ocr) + + def _load_widgets(self): + scrolled_window = Gtk.ScrolledWindow() + box = Gtk.Box() + download_button = Gtk.Button(label = "Download Tesseract OCR") + run_ocr_button = Gtk.Button(label = "Run OCR") + text_view = GtkSource.View() + + download_button.set_tooltip_text("Download Tesseract OCR") + run_ocr_button.set_tooltip_text("Run OCR") + + download_button.connect("clicked", self._download_tesseract_ocr) + run_ocr_button.connect("clicked", self._run_ocr_button) + + box.set_orientation(Gtk.Orientation.VERTICAL) + text_view.set_vexpand(True) + text_view.set_show_line_numbers(True) + ctx = scrolled_window.get_style_context() + ctx.add_class("container-padding-5px") + + scrolled_window.add(text_view) + box.add(download_button) + box.add(run_ocr_button) + box.add(scrolled_window) + self.add(box) + + box.show_all() + + if os.path.exists(self.tesseract_path): + download_button.hide() + else: + run_ocr_button.hide() + text_view.hide() + + + def _show_ocr(self): + self.show() + + def _tear_down(self, widget = None, eve = None): + self.hide() + + # Return True to NOT propigate call (would actually destroy window n children) + return True + + def _download_tesseract_ocr(self, button): + parent = button.get_parent() + spinner = Gtk.Spinner.new() + + parent.add(spinner) + GLib.idle_add(self.__download_file, self.download_url, spinner) + + @daemon_threaded + def __download_file(self, url = None, spinner = None): + if not url: return + if not spinner: return + if not url == self.download_url: return + + spinner.show() + spinner.start() + + # NOTE the stream = True parameter below + with requests.get(url, stream = True) as r: + r.raise_for_status() + + with open(self.tesseract_path, 'wb') as f: + for chunk in r.iter_content(chunk_size = 8192): + # If chunk encoded response uncomment and set chunk_size parameter to None. + # if chunk: + f.write(chunk) + + spinner.stop() + + GLib.idle_add(self.__process_dl_finished, spinner) + + def __process_dl_finished(self, spinner = None): + if not spinner: return + + parent = spinner.get_parent() + download_button, \ + run_ocr_button, \ + text_view, \ + spinner = parent.get_children() + + parent.remove(spinner) + if os.path.exists(self.tesseract_path): + self.__set_as_executable(self.tesseract_path) + run_ocr_button.show() + text_view.show() + download_button.hide() + + def __set_as_executable(self, tesseract_path): + if not tesseract_path: return + + os.access(tesseract_path, os.X_OK) + try: + command = ["chmod", "544", tesseract_path] + with subprocess.Popen(command, stdout = subprocess.PIPE) as proc: + result = proc.stdout.read().decode("UTF-8").strip() + except Exception as e: + logger.error(f"Couldn't chmod\nFile: {properties.file_uri}") + logger.error( repr(e) ) + + + def _run_ocr_button(self, button): + active_image = event_system.emit_and_await("get_active_image_path") + + if not active_image: return + + scrolled_window = button.get_parent().get_children()[2] + text_view = scrolled_window.get_children()[0] + + command = [self.tesseract_path, active_image, "stdout"] + result = subprocess.run(command, stdout = subprocess.PIPE) + data = result.stdout.decode('utf-8') + + logger.debug(command) + logger.debug(data) + text_view.get_buffer().set_text(data, -1) diff --git a/user_config/usr/share/mirage2/icons/ocr.png b/user_config/usr/share/mirage2/icons/ocr.png new file mode 100644 index 0000000000000000000000000000000000000000..89630fb4a9732292a8a3d9e11d5b8a838b35b8bd GIT binary patch literal 8263 zcmeHKc{J4h_s7_>ON&t1CB_V9FvB3mzJ{z>#w?6&EQ3L|kX^|VDO>ifL?KyAvPO|z zBzvI}BK(GWo~P&hzUTbTbI$Mk-!tcY=JWa7_r0(8y{~)U_k3=&p}qz)0~Z4Y1qHK~ zCeny}S0-QL^t9yvLEFZCvYf)-#GGh^^5u2+B;fEaSYD!^JC+wq!s93?NTZ+9tWvpQ zEXe)37z>q~yhl7?S2CsL(`H}Mh^CZwt|+dg@40m3>lR9VSEi<-toxP~OxOh3&r!ka){WccRefAtYu`|iPh&4X)U~UY-LvXVrSjacapKbI;*aM)IZw2H^0Sh z{5b#M?EP(pfQIZHpX)m;tL-DQq@AzMMB{p2If*N3Lcu)2eiMa+q1guhXG?FozXzUZ zw7EXbylz(gk+5npu9$&43B*0_+>kmm3@MMJDMKXE8jP$RlT+KRZnN z7jQR%Kd+CUQV8?Qr0-SSOV~WiQe88fuz8|82;KIiH7Bd)Tuh+n$&jtxxr=bhpr)oK zCNs(wV(aB$yMXP}xgz`TQ$K66^tHxE&*)j+{|cqxvtM~`|1_jIsY57h_a0x-=JU!B z19Y~^iwG8K#eA>@Hj4k0yY_=RKlR-@**tfUKoCWuU@Vnh3au%7M7RMJJW+JE=yTCh06oF;CX+N9V}DpJ7Zn?v6KC*Q zlGdT`6wZJdqsCAsbxQ6Hw>>ZCEmr&v>&GD8O&ZygGy4%%B!PLRCrtpDL*Fi?r_#&@ z<3K@5`R6EkdJ8T~!?S$5wWyR6+Eq(iV;Y-gXSW=(-zi;PzISf-cHm~w*rVFg9EHsa zi-22jACUaxM`mdQp^6)$R!fr>G9e=7&ihM2T%)P(jNFrT$7Ku33m4y%$zL3+k*FR{ z_L!ZRs|do|?u{4-8>?Pbf7WCs>% zghy(xrtU?KbiIVrUFc4VX|UG&D0@A>>e~s zpk}PA50U%Q=)(PQ-nz7BKH|fw=M6?@Q(-;@yPL<%S(|O!{awx-^Q;{F8faSJ7LuMA zRlTDkBYsnzhPG67+9n3AccJV4C*QLif8!vmzBeR$M<3JL%Vg+-W-XhCh0*TMUi(~R@GNEgv(dPTgQy^ZIhU6{WqP4&tu4%S%Av=Tnudf(YrS zPcE9Pf4#*5w70quBEo!>yA64zWbw?=BSNVO^og&MO|Do!%WboMQrYew!Do}79IY*- zYF8%;ISQl6jVePbom(iq5id+lIex8;C+lIonS*5=qVzZ}JUqM&P?K-Yx`5g=ogRVg z%7b3?yx7RO_Cr%kP;Jf`a=QBKI0T1z@0f$V(wy0}ucY?BWD*0;B? zZc1_2;ErreVGVXj&0PMg#i@e@bx4}Fij#RCV!uu)7Nuo8zPuO_=Ly|>t$ERUn!cU0 zXyf)Lx6>1m<@xA76%>h3JamWaJ2;C@0#Wuv3xB8 zPKuty*IMGY)&yxYZwc6LmH2}N>sDlF?g~mYVtLa|L~RB7iRbj096m31y>GZvtM0=6 zv50xXnp$|6fuZwg50PEu>vhiXXZMakOx*;gg+}R7ISt>!x;K0sVqC7HW`nhbU3K!t z45JrLR>p5UEfwmw?g!X}GP+BToVvmH)wJ1d5NgCz7i@t<#@gqIXZLnXokm>m?&rn= zq$9qI0vqn$(ApOf31Z zV|$oSYO++Gb~6%uBTwFkphDVFu0*zk48MZ$ZOrlYh+X`VZDMvaZL#JO`<=@$N}Ea9 zq#yCF8JO+$2h+*ArTZoeR(m5=DO=-J7s}CgwGW_C9hL_7@~v+PZ}4586yMJqKDUp zw&?|y7sJ5fZ|QoDbMpPM2s$EAIZ8MGn4Jysj|cePKeXH@XGr679{+ z`CxnH`O~f%_IpZp>3hVI+8y;Ct}aY1)3T;*eu7M-)7(iwGc910efj;d)ttG71v5Z&*hMEz_FX3?IqjkJ-E|k@iOooW!Rh7=b2_9% zrOV>?Pge*!x)1t@R3{!jVjEH5=-sD4E5_;{2Zp?B3xYqc7+T95@fp8geHrSyrIt54 zh(UFIs$pm4b9<+qVpd#pES;9wKkxB6ojm`gmgx8i?%4_rIGGf1=JDU!d`7u}Lp2$C)Tte}*m zTg@tZJ}X*$$Vq?hGRFgf*TQOmhnq%UB;CxG8*dlv?{2+P4tC|$sax7(EO7i3)yJUh zM?paufmc;E)KXRbV-_M$Kz9NY5t`2ok3|?5=kp(@k&lw2F4i|is_{HxP%C0{iZDH` zYqKo?!D|{_=Ajw78m1I`iSDF>_36tr7uM*WkF;p@t7$(4iB?B_Zk-9d{TZ-)@xMiXtM)ZlkngZXpfwA9G= zr*LU9pW&Ye3S8YMMG_w5mL1LFxSmG5@Lr=r>`aq~M6LK*cT~E554%QGn+l^Cusfze zT|tZI67=quh`9Fi^T1xq;#RKB45?03IHyXEdEQU;TVFmdE!#;k#GqT<1OWD^r{l$-^lLPaaOq^>pEAf~y1y zL$Jq6kX+r#!z%@af)dFcg?7dgdF`CpZcw%sHBc%E-2=W_(-;qdkhl4=AzP=K^(h>wu2aqHT1_Obm zKvGgbvIfx0&y9#80o}X=4j_JFAhBL(PrN%3PjKTsz(m;-yom^Ye)2l+AO5+z>*@Uo z@8A!Y5x)Xp)y%ZPY;eHpuG>=(?TNn5B7&+2xvS8e)woF2?j%Vyr%3{F3L20>p5m9bv>;V)RTmnzVk(7eS%0gr%fiQb8 z1PDRHq=9m>G8mu?4u_LLp)uq|ncpA`Jn`hJM7jL#)d3WS3?&6YVbL%&4rninvIjz> zq{*OivS6Sb8f|YcBWVwT$jKf;VbE}Of~PBroKCze$^i>VQp2SxGV#(lRg^8JIL!<~M6otfv>b5)UvX z!4lF^hvo-qfs=zF6N@^iQ!>DzoE!^W)f0;%5$ou;pihiCb zW30!|x1X;r_`{>b%X@ez;3)LZ5WG-6Sj?fHWUQY{Xh)Qr1D4!9ewFJVa{PZN1sSk3 zP8x%P0PUgDU?2o03nOPz4h_WEV^OlwP#H872Kzg@7Xe4~MR{T`IFNZHb44!CL#}ww z9VSZj@7lhO*n=#P2?K(qfqxMu^NTRhuYf@ZJ>!pv6+r)s6NN*C-J4%0~z|?P5u$T|I+m@UH^!If290xb^S}%KVsk?DgRqt|8I0L z{54Kt-N;`-zT{!4n$WmN9<*rfbu^F^h7=AIuRk2;I*=T3*R=4WpkU-UxF{)Z-{B++ z>4;i-YIF+#HcBwtdo81E3JO4#7V?6L!=hp&i3Gd&ijs!vROsVPT(1JI80j^ ztdn?@_*G1W7dvC~j)A)Q79>DV+4DkGbeQ;Cj-6tQJ%=!MtKLdnzWI1ieOA`~c8t-D z$7RbsUiO@GJnyRNtzk_WRjWJQKeGJh?mEn+N9YPCbzr2@4{qZ@kYo6~xQrtTbh?zn zR03@SbayEILkXcQgm3Y=y(u2|)M}VXJZWrcJ0WkH4MGSZVc1f-ym!>u2&c1N+An;X zt#%zIII`ooDa)vt`b@wDvCaNd2*S5VT%+jFnozUQ00yU38XGFEP}>j>njL0}`Vi>k zQl<*oB!k+8CiB$ulI@dnEY$&}0UV>Qxf2ljPZrJ4Y-hH>`v4HBbo1>U3hX$b`J&ww z%o5#WfGuTHSN#dQGiy@r8UuUcYh^Xt#B2oJ@aa55IXrfXd+NL))$P!cW~Tl_4%;MZW|N$&pA@QggzmoJ)qPfrrC6bN zDw$?4^lmkoy`)g-rTkh#0~!rXnObhKnxM;jam8%QFeZ-iD8B#nyx#H1$WYZIZ(_gH zgHdBs<&9F~8W~b<)lzOp9w`K_1833<@)+*qs)Jg}}=%(0AL6@YZGsj9|Yq^Maz0XvX zl$&MFQ14Qqv^wB zU)H(s$?^fp&RTUl2jZTYeu!+t;Oa?WM2(QodhR+^Y-w^UW2iw$09}rA2+P!t69=!N zIwf}q_DK9t!(fK<%IKFt9A)XO`laO9W-Ddq^Q8X0qoiZQ=WO%O)rYc%>UbJ6!A^Xy zWqC?lrn3NH`34{7xpQXPdu#a`Exc=WAryZ9*;loCq4O>Kv_Tx7?oKWxU1&1369sFp zu*yHx+qv{YwAf9jS&{JV?GM`