I'm making a syntax analyzer for school that reads a c test file in scheme and outputs another in html with the program now colored based on their id. However, I'm having trouble with it recognizing the \
as a character. The teacher said we don't have to worry about respecting the formatting, so line breaks and indents aren't necessary. Here is my code as well as the test file:
#lang racket
(require 2htdp/batch-io)
;shortcuts
(define htmlformat "<!DOCTYPE html> <html> <head> <link rel='stylesheet' href='styles.css'> <title>output.html</title> </head> <body>")
(define htmlformat2 "</body> </html>")
(define number "<span class='number'>")
(define specials "<span class='special'>")
(define ids "<span class='identifier'>")
(define reserved "<span class='reserved'>")
(define comments "<span class='comment'>")
(define libraries "<span class='library'>")
(define endspan "</span>")
;opens input file
(define infile "test.cpp")
;read input file
;char by char
(define file->list-of-chars
(lambda (filename)
(flatten
(map string->list
(read-1strings filename)))))
;char to strings, shows up in reverse order
(define list-of-chars->list-of-strings
(lambda (loc aux result)
(cond
[(empty? loc) result]
[(char-whitespace? (car loc))
(list-of-chars->list-of-strings (cdr loc)
'()
(cons
(list->string aux)
result))]
[(char-punctuation? (car loc))
(list-of-chars->list-of-strings (cdr loc)
'()
(cons
(list->string
(cons (car loc) '()))
(cons
(list->string aux)
result)))]
[else
(list-of-chars->list-of-strings (cdr loc)
(append aux (cons (car loc) '()))
result)])))
; char to list of strings, shows up in corret order
(define file->list-of-strings
(lambda (filename)
(reverse
(list-of-chars->list-of-strings
(file->list-of-chars infile) '() '()))))
(define lst (file->list-of-strings infile))
;regex matching
(define match
(λ (strng)
(cond
[(regexp-match #rx"^[ -]?([0-9] \\.?[0-9]*|\\.[0-9] )$" strng) (string-append number strng endspan)]
[(equal? strng "<iostream>") (string-append libraries "<" endspan libraries "iostream" endspan libraries ">" endspan)]
[(regexp-match #rx"^\\<(.) \\>$" strng) (string-append libraries "strng" endspan)]
[(regexp-match #rx"^(asm|double|new|switch|auto|else|operator|template|break|enum|private|this|case|extern|printf|protected|throw|catch|float|public|try|char|for|register|typedef|class|friend|return|union|const|goto|short|unsigned|continue|if|signed|virtual|default|inline|sizeof|void|delete|int|static|volatile|do|long|struct|while) $" strng) (string-append reserved strng endspan)]
[(regexp-match #rx"^([A-Z]|[a-z]|\\_)(.)*$" strng) (string-append ids strng endspan)]
[(regexp-match #rx"^(\\#|\\[|\\]|\\{|\\}|\\ |\\-|\\/|\\<|\\>|\\<=|\\>=|\\=|\\(|\\)|\\*|\\'|\\;|\\!|\\$|\\%|\\^|\\&|\\?|\"|\\||,)$" strng) (string-append specials strng endspan)]
[(regexp-match #rx"^\\//(.)*$" strng) (string-append comments strng endspan)]
[else ""])))
;applies match to all elements of a list of strings
(define mapp
(λ (lst)
(cond
[(null? lst) '()]
[else (cons (match (car lst))(mapp (cdr lst)))])))
;assigns html format to list of strings
(define htmlList (mapp lst))
; list of strings to single string
(define list-of-strings->string
(lambda (strlst)
(string-join strlst " ")))
;; usage example
;; (define strlst (file->list-of-strings input-filename))
;; (list-of-strings->string strlst)
; converts list of strings to a single string
(define singlestring (list-of-strings->string htmlList))
;creates output file
(define outfile "output.html")
;creates single string adding html header and footer
(define finalstring (string-append htmlformat singlestring htmlformat2))
;writes everything into output file
(write-file outfile finalstring)
Test code:
#include <iostream>
int main(int argc, char *argv[])
{
int i = 0;
for (i = 0; i < 10; i )
{
printf("%i", i);
}
printf("\n");
return 0;
}
CodePudding user response:
What do you mean "recognize \"?
You can "escape the escape code" by using "\\"
printf("\\");
That should print "\".
If you want to print 2, you can do printf("\\\\")
... it goes on.
Other than that, I don't understand your question.
CodePudding user response:
Use
[(char-whitespace? (car loc))
...]
[(eqv? (car loc) #\\)
...]
to recognize backslash.
Note: #\a
means the character a
and thus #\\
means the character \
.