From 4e749697600bab526b1cf51bcca493f6d31d2e34 Mon Sep 17 00:00:00 2001 From: nathansmith Date: Fri, 23 May 2025 07:43:34 -0600 Subject: Almost done with this stupidness --- old/html-cleaners/clean_all.scm | 14 ++++++++++++ old/html-cleaners/clean_up_html.scm | 45 +++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100755 old/html-cleaners/clean_all.scm create mode 100755 old/html-cleaners/clean_up_html.scm (limited to 'old/html-cleaners') diff --git a/old/html-cleaners/clean_all.scm b/old/html-cleaners/clean_all.scm new file mode 100755 index 0000000..82091d9 --- /dev/null +++ b/old/html-cleaners/clean_all.scm @@ -0,0 +1,14 @@ +#! /usr/bin/guile \ +-e main -s +!# + +(define (main args) + (for-each + (lambda (file) + (system (string-append "./clean_up_html.scm " file ".html" + " | tidy | pandoc -f html -t org > " + "org/" file ".org"))) + '("index" "linux_room" "music_room" "nathans_philosophy" + "truth" "ttr" "tv_corner" "distro_guides/arch" + "distro_guides/fedora" "distro_guides/xfce" + "error/404"))) diff --git a/old/html-cleaners/clean_up_html.scm b/old/html-cleaners/clean_up_html.scm new file mode 100755 index 0000000..6ee81c5 --- /dev/null +++ b/old/html-cleaners/clean_up_html.scm @@ -0,0 +1,45 @@ +#! /usr/bin/guile \ +-e main -s +!# + +;; Loads an entire text file into a string +(define (read-entire-file-port fp) + (letrec ((read-characters + (lambda (fp) + (let ((character (read-char fp))) + (if (eof-object? character) + "" + (string-append (string character) + (read-characters fp))))))) + (read-characters fp))) + +(define (trim-words-all content word) + (let ((index (string-contains content word))) + (if index + (trim-words-all (string-append + (substring content 0 index) + (substring content + (+ index (string-length word)) + (string-length content))) + word) + content))) + +(define (clean-up-html file-name) + (call-with-input-file file-name + (lambda (fp) + (let ((content (read-entire-file-port fp)) + (remove-list '("" + "
" + "
" + "" + "" + "" + ""))) + (for-each + (lambda (item) + (set! content (trim-words-all content item))) + remove-list) + (display content))))) + +(define (main args) + (clean-up-html (cadr args))) -- cgit v1.2.3