aboutsummaryrefslogtreecommitdiff
path: root/old/html-cleaners
diff options
context:
space:
mode:
authornathansmith <nathansmith@posteo.com>2025-05-23 07:43:34 -0600
committernathansmith <nathansmith@posteo.com>2025-05-23 07:43:34 -0600
commit4e749697600bab526b1cf51bcca493f6d31d2e34 (patch)
tree10a0274c96667d1482dd7f07d768feaf518d6187 /old/html-cleaners
parentd194694f31002068868a378d121ed0e2db01c378 (diff)
Almost done with this stupidness
Diffstat (limited to 'old/html-cleaners')
-rwxr-xr-xold/html-cleaners/clean_all.scm14
-rwxr-xr-xold/html-cleaners/clean_up_html.scm45
2 files changed, 59 insertions, 0 deletions
diff --git a/old/html-cleaners/clean_all.scm b/old/html-cleaners/clean_all.scm
new file mode 100755
index 0000000..82091d9
--- /dev/null
+++ b/old/html-cleaners/clean_all.scm
@@ -0,0 +1,14 @@
+#! /usr/bin/guile \
+-e main -s
+!#
+
+(define (main args)
+ (for-each
+ (lambda (file)
+ (system (string-append "./clean_up_html.scm " file ".html"
+ " | tidy | pandoc -f html -t org > "
+ "org/" file ".org")))
+ '("index" "linux_room" "music_room" "nathans_philosophy"
+ "truth" "ttr" "tv_corner" "distro_guides/arch"
+ "distro_guides/fedora" "distro_guides/xfce"
+ "error/404")))
diff --git a/old/html-cleaners/clean_up_html.scm b/old/html-cleaners/clean_up_html.scm
new file mode 100755
index 0000000..6ee81c5
--- /dev/null
+++ b/old/html-cleaners/clean_up_html.scm
@@ -0,0 +1,45 @@
+#! /usr/bin/guile \
+-e main -s
+!#
+
+;; Loads an entire text file into a string
+(define (read-entire-file-port fp)
+ (letrec ((read-characters
+ (lambda (fp)
+ (let ((character (read-char fp)))
+ (if (eof-object? character)
+ ""
+ (string-append (string character)
+ (read-characters fp)))))))
+ (read-characters fp)))
+
+(define (trim-words-all content word)
+ (let ((index (string-contains content word)))
+ (if index
+ (trim-words-all (string-append
+ (substring content 0 index)
+ (substring content
+ (+ index (string-length word))
+ (string-length content)))
+ word)
+ content)))
+
+(define (clean-up-html file-name)
+ (call-with-input-file file-name
+ (lambda (fp)
+ (let ((content (read-entire-file-port fp))
+ (remove-list '("<table border=\"1\" width=\"60%\">"
+ "<table>"
+ "</table>"
+ "<tr>"
+ "<td>"
+ "</tr>"
+ "</td>")))
+ (for-each
+ (lambda (item)
+ (set! content (trim-words-all content item)))
+ remove-list)
+ (display content)))))
+
+(define (main args)
+ (clean-up-html (cadr args)))