aboutsummaryrefslogtreecommitdiff
path: root/old/html-cleaners/clean_up_html.scm
diff options
context:
space:
mode:
Diffstat (limited to 'old/html-cleaners/clean_up_html.scm')
-rwxr-xr-xold/html-cleaners/clean_up_html.scm45
1 files changed, 45 insertions, 0 deletions
diff --git a/old/html-cleaners/clean_up_html.scm b/old/html-cleaners/clean_up_html.scm
new file mode 100755
index 0000000..6ee81c5
--- /dev/null
+++ b/old/html-cleaners/clean_up_html.scm
@@ -0,0 +1,45 @@
+#! /usr/bin/guile \
+-e main -s
+!#
+
+;; Loads an entire text file into a string
+(define (read-entire-file-port fp)
+ (letrec ((read-characters
+ (lambda (fp)
+ (let ((character (read-char fp)))
+ (if (eof-object? character)
+ ""
+ (string-append (string character)
+ (read-characters fp)))))))
+ (read-characters fp)))
+
+(define (trim-words-all content word)
+ (let ((index (string-contains content word)))
+ (if index
+ (trim-words-all (string-append
+ (substring content 0 index)
+ (substring content
+ (+ index (string-length word))
+ (string-length content)))
+ word)
+ content)))
+
+(define (clean-up-html file-name)
+ (call-with-input-file file-name
+ (lambda (fp)
+ (let ((content (read-entire-file-port fp))
+ (remove-list '("<table border=\"1\" width=\"60%\">"
+ "<table>"
+ "</table>"
+ "<tr>"
+ "<td>"
+ "</tr>"
+ "</td>")))
+ (for-each
+ (lambda (item)
+ (set! content (trim-words-all content item)))
+ remove-list)
+ (display content)))))
+
+(define (main args)
+ (clean-up-html (cadr args)))