パーサコンビネータもどきを書いてみた
JavaScriptでパーサコンビネータもどきを書いてみました。
一応、Scalaのパーサコンビネータを参考にしました。
左再帰にも対応できないし、効率なんてこれっぽっちも考えてません。
parsec.js
if(!unyaunya) { var unyaunya = {}; } if(!unyaunya.parsec) { unyaunya.parsec = {}; } /** * unyaunya.parsecパッケージ */ (function(){ var P = unyaunya.parsec; /** * Successクラス * Parseが成功した場合の結果 */ (function(){ P.Success = function(rslt, input) { this.rslt = rslt; this.input = input; } var proto = P.Success.prototype; proto.toString = function(){ return "Success("+(this.rslt == null ? "null" : this.rslt.toString()) + "," + this.input+")"; } })(); /** * Failureクラス * Parseが失敗した場合の結果 */ (function(){ P.Failure = function(errmsg, input) { this.errmsg = errmsg; this.input = input; } var proto = P.Failure.prototype; proto.toString = function(){ return "Failure(" + (this.errmsg == null ? "null" : this.errmsg) + "," + this.input+")"; } })(); /** * Parserクラス * parse - パースを実行する関数。SuccessまたはFailureを返す * conv - パース結果のSuccessを変換する関数 */ P.Parser = function(parse, conv) { this.parse = parse; this.conv = conv; } P.Parser.prototype.copy = function(parser) { this.parse = parser.parse; this.conv = parser.conv; } /** * parserを実行してinputを構文解析する。 */ P.exec = function(parser, input) { var rslt = parser.parse(input); if(rslt instanceof P.Success) { if(parser.conv) { rslt.rslt = parser.conv(rslt.rslt); } return rslt; } return rslt; } /** */ P.success = function(v, c){ return new P.Parser( function(input) { return new P.Success(v, input); }, c ); } /** */ P.failure = function(msg,c){ return new P.Parser( function(input) { return new P.Failure(msg, input); }, c ); } /* * リテラル・パーサ */ P.literal = function(s, c) { return new P.Parser( function(input) { if(s == input.substring(0, s.length)) { return new P.Success(s, input.substring(s.length)); } return new P.Failure(s + " expected", input); }, c ); } /* * 正規表現パーサ */ P.regex = function(re, c){ return new P.Parser( function(input) { var rslt = input.match(re); if(rslt != null) { var s = rslt[0]; if(s == input.substring(0, s.length)) { return new P.Success(s, input.substring(s.length)); } } return new P.Failure(re + " expected", input); }, c ); } /** */ P.seq = function(parsers, c){ if(parsers == undefined) { parsers = new Array(); } return new P.Parser( function(input) { var i var ret; var rslt = []; ret = new P.Success(null, input); for(i = 0; i < parsers.length; i++) { ret = P.exec(parsers[i], (ret.input)); if(ret instanceof P.Failure) { return new P.Failure(ret.errmsg, input); } else if(!(ret instanceof P.Success)) { return new P.Failure("sub-parser returned illegal result.", input); } rslt.push(ret.rslt); } return new P.Success(rslt, ret.input); }, c ); } /** */ P.alt = function(parsers, c){ if(parsers == undefined) { parsers = new Array(); } return new P.Parser( function(input) { var i var ret; var rslt = []; ret = new P.Success(null, input); for(i = 0; i < parsers.length; i++) { ret = P.exec(parsers[i], ret.input); if(ret instanceof P.Success) { return ret; } } return ret; }, c ); } /** */ P.opt = function(p, c) { return P.alt([p, P.success(null)], c); } /** */ P.rep = function(p, c) { return new P.Parser( function(input) { var ret; var rslt = []; ret = P.exec(p, input); while(ret instanceof P.Success) { rslt.push(ret.rslt); ret = P.exec(p, ret.input); } if(rslt.length > 0) { return new P.Success(rslt, ret.input); } else { return new P.Success([], input); } }, c ); } })();
ついでにJsUnitもはじめて使ってみました。
最後のテストケースは、お約束の四則演算のパーサです。
testParser.html
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <title>Sample Test File</title> <script language="JavaScript" type="text/javascript" src="./app/jsUnitCore.js"></script> <script language="JavaScript" type="text/javascript" src="../js/parsec.js"></script> <script language="JavaScript" type="text/javascript"> var parser = unyaunya.parsec; var exec = parser.exec; function testSuccessParser() { var p, rslt; p = parser.success(999); rslt = exec(p, "abcdef1238isl"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt == 999); } function testFailureParser() { var p, rslt; p = parser.failure("error message"); rslt = exec(p, "abcdef1238isl"); assertTrue(rslt instanceof parser.Failure); assertTrue(rslt.errmsg == "error message"); } function testLiteral() { var p, rslt; p = parser.literal("DIV"); rslt = exec(p, "DIVabc"); assertTrue(rslt.toString(), rslt instanceof parser.Success); assertTrue(rslt.rslt == "DIV"); assertTrue(rslt.input == "abc"); } function testRegex() { var p, rslt; p = parser.regex("DIV"); rslt = exec(p, "DIVabc"); assertTrue(rslt.toString(), rslt instanceof parser.Success); assertTrue(rslt.rslt == "DIV"); assertTrue(rslt.input == "abc"); } function testRegex1() { var p, rslt; p = parser.regex("[0]|([1-9][0-9]*)"); rslt = exec(p, "123abc"); assertTrue(rslt.toString(), rslt instanceof parser.Success); assertTrue(rslt.rslt == "123"); assertTrue(rslt.input == "abc"); rslt = exec(p, "abc123"); assertTrue(rslt.toString(), rslt instanceof parser.Failure); assertTrue(rslt.input == "abc123"); rslt = exec(p, "0123"); assertTrue(rslt.toString(), rslt instanceof parser.Success); assertTrue(rslt.rslt == "0"); assertTrue(rslt.input == "123"); rslt = exec(p, "-0123"); info(rslt.rslt); info(rslt.input); assertTrue(rslt.toString(), rslt instanceof parser.Failure); assertTrue(rslt.input == "-0123"); } function testSeq0() { var p, rslt; p = parser.seq(); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt.length == 0); assertTrue(rslt.input == "<DIV>"); p = parser.seq([]); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt.length == 0); assertTrue(rslt.input == "<DIV>"); } function testSeq1() { var p, rslt; p = parser.seq([parser.literal("<")]); assertNotUndefined(p.toString(), p); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt == "<"); assertTrue(rslt.input == "DIV>"); } function testSeq3() { var p, rslt; p = parser.seq([parser.literal("<"), parser.literal("DIV"), parser.literal(">")]); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == ""); assertTrue(rslt.rslt.toString() == "<,DIV,>"); } function testAlt0() { var p, rslt; p = parser.alt([]); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertNull(rslt.rslt); assertTrue(rslt.input == "<DIV>"); p = parser.alt(); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertNull(rslt.rslt); assertTrue(rslt.input == "<DIV>"); } function testAlt1() { var p, rslt; p = parser.alt([parser.literal("<")]); rslt = exec(p, "<DIV>"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt == "<"); assertTrue(rslt.input == "DIV>"); } function testAlt3() { var p, rslt; p = parser.alt([parser.literal("SPAN"), parser.literal("DIV"), parser.literal("TR")]); rslt = exec(p, "DIVSPANTR"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "SPANTR"); assertTrue(rslt.rslt.toString() == "DIV"); rslt = exec(p, "SPANDIVTR"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "DIVTR"); assertTrue(rslt.rslt.toString() == "SPAN"); rslt = exec(p, "TRDIV"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "DIV"); assertTrue(rslt.rslt.toString() == "TR"); rslt = exec(p, "STRDIV"); assertTrue(rslt instanceof parser.Failure); assertTrue(rslt.input == "STRDIV"); } function testOpt() { var p, rslt; p = parser.opt(parser.literal("OPT")); rslt = exec(p, "OPT"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt == "OPT"); assertTrue(rslt.input == ""); rslt = exec(p, "OP1T"); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.rslt == null); assertTrue(rslt.input == "OP1T"); } function testRep() { var p, rslt; p = parser.rep(parser.literal("A")); rslt = exec(p, ""); info(rslt.toString()); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == ""); assertTrue(rslt.rslt == ""); rslt = exec(p, "ABB"); info(rslt.toString()); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "BB"); assertTrue(rslt.rslt == "A"); rslt = exec(p, "AABB"); info(rslt.toString()); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "BB"); assertTrue(rslt.rslt == "A,A"); rslt = exec(p, "AAABB"); info(rslt.toString()); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == "BB"); assertTrue(rslt.rslt == "A,A,A"); } /* exp ::= exp + term | exp - term | term term ::= term * factor | term / factor | factor factor ::= num | ( exp ) 左再帰を除去して、 exp ::= term exp_r exp_r ::= + term exp_r | - term exp_r | ε term ;;= factor term_r term_r ::= * factor term_r | / factor term_r | ε factor ::= num | ( exp ) */ function testCalc() { var S = new Array(); var C = parser.c var LITERAL = parser.literal; var REGEX = parser.regex; var SEQ = parser.seq var ALT = parser.alt var rslt; var lparen = LITERAL("("); var rparen = LITERAL(")"); var plus = LITERAL("+"); var minus = LITERAL("-"); var star = LITERAL("*"); var div = LITERAL("/"); var eps = parser.success(null); var ws = parser.rep(REGEX("\\s")); var num = REGEX("(0|\\d+)\(.\\d*)?", function(s){S.push(parseFloat(s));}); var exp = new parser.Parser(); var factor = ALT([ SEQ([lparen, ws, exp, ws, rparen]), num ]); var term_r = new parser.Parser(); term_r.copy(ALT([ SEQ([ws, star, ws, factor, term_r], function(v){ var r = S.pop(); var l = S.pop(); S.push(l*r); }), SEQ([ws, div, ws, factor, term_r], function(v){ var r = S.pop(); var l = S.pop(); S.push(l/r); }), eps ]) ); var term = SEQ([factor, term_r]); var exp_r = new parser.Parser(); exp_r.copy(ALT([ SEQ([ws, plus, ws, term, exp_r], function(v){ var r = S.pop(); var l = S.pop(); S.push(l+r); }), SEQ([ws, minus, ws, term, exp_r], function(v){ var r = S.pop(); var l = S.pop(); S.push(l-r); }), eps ]) ); exp.parse = SEQ([term, exp_r]).parse; rslt = exec(exp, "1.60 * 4.5"); info(rslt); info(S); assertTrue(rslt instanceof parser.Success); assertTrue(rslt.input == ""); //assertTrue(S.toString() == "1.6,4.5,*"); assertTrue(S[0] == (1.60 * 4.5)); S = new Array(); rslt = exec(exp, "9.3 / 0.3"); info(rslt); info(S); //assertTrue(S.toString() == "9.3,0.3,/"); assertTrue(S[0] == (9.3 / 0.3)); S = new Array(); rslt = exec(exp, "5 * 3 * 4"); info(rslt); info(S); //assertTrue(S.toString() == "5,3,4,*,*"); assertTrue(S[0] == (5*3*4)); S = new Array(); rslt = exec(exp, "5 * 3 / 4"); info(rslt); info(S); //assertTrue(S.toString() == "5,3,4,/,*"); assertTrue(S[0] == (5 * 3 / 4)); S = new Array(); rslt = exec(exp, "( 5 * 3 ) / 4"); info(rslt); info(S); //assertTrue(S.toString() == "5,3,*,4,/"); assertTrue(S[0] == (( 5 * 3 ) / 4)); S = new Array(); rslt = exec(exp, "1 + ( 5 * 3 ) / 4"); info(rslt); info(S); //assertTrue(S.toString() == "1,5,3,*,4,/,+"); assertTrue(S[0] == (1 + ( 5 * 3 ) / 4)); } </script> </head> <body> <h1>ParserCombinator Test File</h1> <input type="button" id="btn" value="calc" onClick="testCalc();"/> </body> </html>