aboutsummaryrefslogtreecommitdiff
path: root/old/html-cleaners/clean_up_html.scm
blob: 6ee81c5443d2425907a7da2664b5733d1a84d228 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#! /usr/bin/guile \
-e main -s
!#

;; Loads an entire text file into a string
(define (read-entire-file-port fp)
  (letrec ((read-characters
            (lambda (fp)
              (let ((character (read-char fp)))
                (if (eof-object? character)
                    ""
                    (string-append (string character)
                                   (read-characters fp)))))))
    (read-characters fp)))

(define (trim-words-all content word)
  (let ((index (string-contains content word)))
    (if index
        (trim-words-all (string-append
                         (substring content 0 index)
                         (substring content
                                    (+ index (string-length word))
                                    (string-length content)))
                        word)
        content)))

(define (clean-up-html file-name)
  (call-with-input-file file-name
    (lambda (fp)
      (let ((content (read-entire-file-port fp))
            (remove-list '("<table border=\"1\" width=\"60%\">"
                           "<table>"
                           "</table>"
                           "<tr>"
                           "<td>"
                           "</tr>"
                           "</td>")))
        (for-each
         (lambda (item)
           (set! content (trim-words-all content item)))
         remove-list)
        (display content)))))

(define (main args)
  (clean-up-html (cadr args)))