diff --git a/.gitmodules b/.gitmodules index 5eb1217..4fec0c6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -22,6 +22,9 @@ [submodule "ox-chameleon"] path = lisp/ox-chameleon url = https://github.com/tecosaur/ox-chameleon.git +[submodule "pdftotext"] + path = lisp/pdftotext + url = https://github.com/tecosaur/pdftotext.el.git [submodule "ob-julia"] path = lisp/ob-julia url = https://github.com/nico202/ob-julia.git diff --git a/config.org b/config.org index 1202f97..1d5d1c7 100644 --- a/config.org +++ b/config.org @@ -10313,156 +10313,39 @@ priority of =mypyls= Sometimes I'm in a terminal and I still want to see the content. Additionally, sometimes I'd like to act on the textual content and so would like a plaintext version. - -#+begin_info -This is a candidate for a dedicated package. -Let me know if you'd like to see this. -#+end_info - -Thanks to src_shell{pdftotext} we have a convenient way of performing this -conversion. - -#+begin_src emacs-lisp -(defun pdf-text--update (&optional _window) - (when (eq major-mode 'pdf-text-mode) - (let* ((converted-file (expand-file-name (concat - (file-name-base buffer-file-name) - "-" - (substring (secure-hash 'sha1 (expand-file-name buffer-file-name)) 0 6) - ".txt") - temporary-file-directory)) - (width (number-to-string (- (min (window-width) fill-column) - (if display-line-numbers display-line-numbers-width 0)))) - (width-adjusted-file (concat (file-name-sans-extension converted-file) "-w" width ".txt"))) - (unless (and (file-exists-p converted-file) - (> (time-convert (file-attribute-modification-time (file-attributes converted-file)) 'integer) - (time-convert (file-attribute-modification-time (file-attributes buffer-file-name)) 'integer))) - (call-process "pdftotext" nil nil nil "-layout" "-eol" "unix" buffer-file-name converted-file)) - (unless (and (file-exists-p width-adjusted-file) - (>= (time-convert (file-attribute-modification-time (file-attributes width-adjusted-file)) 'integer) - (time-convert (file-attribute-modification-time (file-attributes converted-file)) 'integer))) - (call-process "fmt" nil (list :file width-adjusted-file) nil "-w" width converted-file)) - (unless (and (boundp 'pdf-text--file) - (string= pdf-text--file width-adjusted-file)) - (let ((pos (when (boundp 'pdf-text--file) (pdf-text--position-info)))) - (with-silent-modifications - (let ((inhibit-read-only t) - (coding-system-for-read 'utf-8)) - (erase-buffer) - (insert-file-contents width-adjusted-file) - (while (re-search-forward "\n?\f\n?" nil t) - (replace-match "\n\f\n")) - (goto-char (point-min))) - (setq-local pdf-text--file width-adjusted-file)) - (setq-default saved-pos pos) - (when pos (ignore-errors (pdf-text--goto-pos pos)))))))) +Thanks to src_shell{pdftotext} we have a convenient way of performing this conversion. +I've integrated this into a little package, =pdftotext.el=. +#+begin_src emacs-lisp :tangle packages.el +(package! pdftotext :recipe (:local-repo "lisp/pdftotext")) #+end_src -Now we just need to make a mode to use this. - -#+begin_src emacs-lisp -(define-derived-mode pdf-text-mode so-long-mode "PDF Text" ; so-long for the initial buffer load time - "Major mode for viewing the plaintext version of a PDF." - (set-buffer-multibyte t) - (read-only-mode t) - (add-hook 'before-save-hook (lambda () (user-error "Will not overwrite PDF with plaintext version"))) - (dolist (hook '(window-configuration-change-hook - window-size-change-functions - display-line-numbers-mode-hook)) - (add-hook hook 'pdf-text--update)) - (pdf-text--update) - (text-mode) - (setq mode-name "PDF Text")) -#+end_src - -In src_elisp{(pdf-text--update)} there's mention of position saving and -restoring. This needs to be implemented, and it's a bit difficult since the line -numbers and buffer positions are liable to change. So, instead we can try to -take note of some markers (such as the line breaks) and try to make our way to -them. - -#+begin_src emacs-lisp -(defun pdf-text--position-info () - (list :page-no (let ((current-point (point)) - (page-no 0)) - (save-excursion - (while (search-forward "\f" current-point t) - (setq page-no (1+ page-no)))) - page-no) - :par-start (save-excursion - (forward-paragraph -1) - (forward-line 1) - (thing-at-point 'line t)) - :previous-line-content (save-excursion - (forward-line -1) - (thing-at-point 'line t)))) - -(defun pdf-text--goto-pos (pos) - (goto-char (point-min)) - (search-forward "\f" nil nil (plist-get pos :page-no)) - (re-search-forward (replace-regexp-in-string " +" "[ \n]+" (regexp-quote (plist-get pos :par-start)))) - (unless (string= (plist-get pos :par-start) - (plist-get pos :previous-line-content)) - (re-search-forward (replace-regexp-in-string " +" "[ \n]+" (regexp-quote (plist-get pos :previous-line-content))) - (save-excursion (forward-paragraph 1) (point))))) -#+end_src - -Unfortunately while in isolated testing this position restoring works well, for -some reason as it's currently used it doesn't seem to work at all. - The output can be slightly nicer without spelling errors, and with prettier page feeds (=^L= by default). #+begin_src emacs-lisp -(add-hook 'pdf-text-mode-hook #'spell-fu-mode-disable) -(add-hook 'pdf-text-mode-hook (lambda () (page-break-lines-mode 1))) #+end_src This is very nice, now we just need to associate it with =.pdf= files, and make sure =pdf-tools= doesn't take priority. -#+begin_src emacs-lisp -(defconst pdf-text-auto-mode-alist-entry - '("\\.[pP][dD][fF]\\'" . pdf-text-mode) - "The entry to use for `auto-mode-alist'.") -(defconst pdf-text-magic-mode-alist-entry - '("%PDF" . pdf-text-mode) - "The entry to use for `magic-mode-alist'.") - -(defun pdf-text-install () - "Add a \".pdf\" associaton for all future buffers." - (interactive) - (add-to-list 'auto-mode-alist pdf-text-auto-mode-alist-entry) - (add-to-list 'magic-mode-alist pdf-text-magic-mode-alist-entry) - (when (featurep 'pdf-tools) - (setq-default auto-mode-alist - (remove pdf-tools-auto-mode-alist-entry auto-mode-alist)) - (setq-default magic-mode-alist - (remove pdf-tools-magic-mode-alist-entry magic-mode-alist)))) - -(defun pdf-text-uninstall () - "Remove the \".pdf\" associaton for all future buffers." - (interactive) - (setq-default auto-mode-alist - (remove pdf-text-auto-mode-alist-entry auto-mode-alist)) - (setq-default magic-mode-alist - (remove pdf-text-magic-mode-alist-entry auto-mode-alist))) -#+end_src - Lastly, whenever Emacs is non-graphical (i.e. a TUI), we want to use this by default. #+begin_src emacs-lisp :tangle (if (executable-find "pdftotext") "yes" "no") -(unless (display-graphic-p) - (pdf-text-install) - ;; From Doom's :tools pdf (use-package! pdf-tools) - (setq-default auto-mode-alist - (remove '("\\.pdf\\'" . pdf-view-mode) auto-mode-alist)) - (setq-default magic-mode-alist - (remove '("%PDF" . pdf-view-mode) magic-mode-alist)) +(use-package! pdftotext + :init + (unless (display-graphic-p) + (add-to-list 'auto-mode-alist '("\\.[pP][dD][fF]\\'" . pdftotext-mode)) + (add-to-list 'magic-mode-alist '("%PDF" . pdftotext-mode))) + :config + (unless (display-graphic-p) (after! pdf-tools (pdftotext-install))) + ;; For prettyness + (add-hook 'pdftotext-mode-hook #'spell-fu-mode-disable) + (add-hook 'pdftotext-mode-hook (lambda () (page-break-lines-mode 1))) ;; I have no idea why this is needed - (map! :map pdf-text-mode-map + (map! :map pdftotext-mode-map "" (cmd! (scroll-down mouse-wheel-scroll-amount-horizontal)) "" (cmd! (scroll-up mouse-wheel-scroll-amount-horizontal)))) + #+end_src ** R diff --git a/lisp/pdftotext b/lisp/pdftotext new file mode 160000 index 0000000..459b451 --- /dev/null +++ b/lisp/pdftotext @@ -0,0 +1 @@ +Subproject commit 459b4517e19d7d3bf8ee655ad95c4bcda5577dee