I am using nearley.js and moo.js to make a programming language. With moo.js, there is a NL, which means new line regex matching, and mine is `/[\r\n] / but there seems to be a problem. With the tutorial that I am following, it says
Unexpected WS token: " ". Instead, I was expecting to see one of the following:
A identifier token based on:
var_assign → ● %identifier _ "=" _ expr
A identifier token based on:
fun_call → ● %identifier _ "(" _ fun_call$ebnf$1 ")"
at Parser.feed (C:\Users\mcqui\OneDrive\Desktop\Programming language for capstones\node_modules\nearley\lib\nearley.js:343:27)
at main (C:\Users\mcqui\OneDrive\Desktop\Programming language for capstones\parse.js:15:12)
I don't know what to do because I checked everything and It doesn't seem to work. Here is my lexer.js file:
const moo = require('moo')
const fs = require("mz/fs")
let lexer = moo.compile({
WS: /[ \t] /,
comment: /\/\/.*?$/,
number: /0|[1-9][0-9]*/,
string: /"(?:\\["\\]|[^\n"\\])*"/,
lparen: '(',
rparen: ')',
lbrace: '{',
rbrace: '}',
identifier: /[a-zA-Z][a-zA-Z_0-9]*/,
fatarrow: '=>',
assign: '=',
NL: { match: /[\r\n] /, lineBreaks: true }
});
module.exports = lexer;
async function main(){
const code = (await fs.readFile("main.kpp")).toString()
lexer.reset(code)
while (true){
const token = lexer.next();
if(!token){
break;
}
console.log(token);
}
}
Here is my parse.js file:
const nearley = require("nearley");
const grammar = require("./kpp.js");
const fs = require('mz/fs');
async function main() {
const filename = process.argv[2];
if(!filename){
console.log("Provide a .kpp file");
return;
}
const code = (await fs.readFile(filename)).toString();
const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
parser.feed(code);
if(parser.results.length > 1){
console.log('Error');
} else if (parser.results.length == 1){
const ast = parser.results[0];
const outputFilename = filename.replace('.kpp', '.ast');
await fs.writeFile(outputFilename, JSON.stringify(ast, null, " "))
console.log(`WROTE ${outputFilename}.`)
} else{
console.log("Parse error")
}
}
main().catch(err => console.log(err.stack));
Here is the file that I am running(in my own programming lanuage:
f = () => 1
g = (a b) => add(multiply(2 a) b)
h = (x y) => {
show("x=" x)
show("y=" y)
g(x y)
}
result = h(3 4)
show("result =" result)
show("f =" f())
show("f(gf f) =" g(f() f()))
And here is the nearley.js config file:
@{%
const myLexer = require("./lexer")
%}
@lexer myLexer
statements
->statement
{%
(data) => {
return [data[0]]
}
%}
| statements %NL statement
{%
(data) => {
return [...data[0], data[2]]
}
%}
statement
-> var_assign {% id %}
| fun_call {% id %}
var_assign
-> %identifier _ "=" _ expr
{%
(data) => {
return {
type: "var_assign",
var_name: data[0],
value: data[4]
}
}
%}
fun_call
-> %identifier _ "(" _ (arg_list _):? ")"
{%
(data) => {
return {
type: 'fun_call',
fun_name: data[0],
arguments: data[4] ? data[4][0] : []
}
}
%}
arg_list
-> expr
{%
(data) => {
return [data[0]]
}
%}
| arg_list __ expr
{%
(data) => {
return [...data[0], data[2]]
}
%}
expr
-> %string {% id %}
| %number {% id %}
| %identifier {% id %}
| fun_call {% id %}
| lamba {% id %}
lamba -> "(" _ (param_list _):? ")" _ "=>" _ lamba_body
{%
(data) => {
return {
type: "lamba",
parameters: data[2] ? data[2][0] : [],
body: data[7]
}
}
%}
param_list
-> %identifier (__ %identifier):*
{%
(data) => {
const repeatedPieces = data[1];
const restParams = repeatedPieces.map(piece => piece[1])
return [data[0], ...restParams]
}
%}
lamba_body
-> expr
{%
(data) => {
return [data[0]];
}
%}
| "{" _ %NL statements %NL _ "}"
{%
(data) => {
return data[3];
}
%}
_ -> %WS:*
__ -> %WS:
CodePudding user response:
The problem here comes from the fact, that you did not specify anything about the expectation of tabulation.
statements %NL statement
clearly tells the parser, that you are expecting nothing else, than one or more new line characters.
I would suggest you to not specify new line by itself as the end of the string, but instead expect whitespace (newline whitespace):* instead.
You could also use a similar technic, that this lexer and this whitespace rule uses.