Home > Back-end >  Using flex to convert HTML TXT
Using flex to convert HTML TXT

Time:03-21

Compilation principle course assignments, not really, to help
Need to use flex to HTML file into a TXT file
Requirement is:
(1) remove all HTML tags, HTML and the converted file directly output to the screen.
(2) to delete all the start tag for & lt; Script...> , & lt; Style...> And & lt; The form...>
End tag for & lt;/script> , & lt;/style> And & lt;/form> Among all the content
(3) all the anchor of the hyperlink, and save it in a new text file hyplink. TXT in

/* like NONEED SCPT and the FORM, please complete the TAG, the LINK condition mode corresponds to the
Regular expressions, and the corresponding action */C

Html2txt. L as follows:

 % {
#include
#include
The FILE * hyplink;
Int lastchar;/* if the last output char is newline */
%}
NONEED % x
% x FORM
% x LINK
% x SCPT
% x TAG
The COMMENT % x
% %
(" & lt;" [sS] [cC] [rR] [iI] [pP] [tT] [^ & gt;] * & gt; "" ) the BEGIN (SCPT);
(" & lt;" [sS] [tT] [yY] [lL] [eE] [^ & gt;] * & gt; "" ) the BEGIN (NONEED);
"& lt;" [iI] [fF] [rR] [aA] [mM] [eE] [^ & gt;] * & gt; "" The BEGIN (NONEED);
(" & lt; ! "[sS] [cC] [rR] [iI] [pP] [tT] [^ & gt;] * & gt; "" ) the BEGIN (SCPT);
(" & lt;" [Ff] [oO] [rR] [Mm] [^ & gt;] * & gt; "" ) the BEGIN (FORM);

"& lt;" (aA) BEGIN (LINK);
"& lt;" The BEGIN (TAG);

" " <"[iI]/[fF] [rR] [aA] [mM] [eE] [^ & gt;] * & gt; "" The BEGIN (INITIAL);
. | \ n;


"/"[sS] [cC] [rR] [iI] [pP] [tT] [^ & gt;] * & gt; "" + (\ n) * the BEGIN (INITIAL);
. | \ n {; }

(" & lt;/"[Ff] [oO] [rR] [Mm] [^ & gt;] * & gt; "" + (\ n) *) the BEGIN (INITIAL);
. | \ n {;

/* like NONEED SCPT and the FORM, please complete the TAG, the LINK condition mode corresponds to the
Regular expressions, and the corresponding action */C
}
(& amp; [qQ] [oO] [tT] ";" ) {
Printf (" '");
}
(& amp; [they] [tT] ";" ) {
Printf (" & gt;" );
}
(& amp; [lL] [tT] ";" ) {
Printf (" & lt;" );
}
(& amp; [aA] [mM] [pP] ";" ) {
Printf (" & amp;" );
}
(& amp; [nN] [bB] [sS] [pP] ";" ) {
Printf (" ");
}

[\ r \ t] * {; }
(" & lt;/[lL] [iI] "" & gt;" ) {; }
(" & lt;/[pP] "" & gt;" ) {; }
(" -- & gt;" ) {; }




% %

Int main (int arg c, char * * argv)
{
+ + argv - arg c;
If (arg c & gt; 0 {
If ((yyin=fopen (argv [0], "rb"))==NULL) {
Printf (" the file % s could 'nt open! \ n ", argv [0]);
exit (1);
}
}
The else
Yyin=stdin;
If ((hyplink=fopen (" hyplink. TXT ", "w"))==NULL) {
Printf (" coud 'nt create the hyperlink to the file! \n");
exit(1);
}
Yylex ();
The fclose (yyin);
The fclose (hyplink);
return 0;
}

Int yywrap ()
{
return 1;
}
  • Related