パーサコンビネータもどきを書いてみた

JavaScriptでパーサコンビネータもどきを書いてみました。
一応、Scalaのパーサコンビネータを参考にしました。
再帰にも対応できないし、効率なんてこれっぽっちも考えてません。

parsec.js

if(!unyaunya) {
  var unyaunya = {};
}
if(!unyaunya.parsec) {
  unyaunya.parsec = {};
}

/**
* unyaunya.parsecパッケージ
*/
(function(){
var P = unyaunya.parsec;

/**
 * Successクラス
 *  Parseが成功した場合の結果
 */
(function(){
  P.Success = function(rslt, input) {
    this.rslt = rslt;
    this.input = input;
  }

  var proto = P.Success.prototype;
  proto.toString = function(){
    return "Success("+(this.rslt == null ? "null" : this.rslt.toString()) + 
           "," + this.input+")";
  }
})();

/**
 * Failureクラス
 *  Parseが失敗した場合の結果
 */
(function(){
  P.Failure = function(errmsg, input) {
    this.errmsg = errmsg;
    this.input = input;
  }
  var proto = P.Failure.prototype;
  proto.toString = function(){
    return  "Failure(" +
            (this.errmsg == null ? "null" : this.errmsg) +
            "," + this.input+")";
  }
})();

/**
 * Parserクラス
 *    parse - パースを実行する関数。SuccessまたはFailureを返す
 *    conv  - パース結果のSuccessを変換する関数
 */
P.Parser = function(parse, conv) {
  this.parse = parse;
  this.conv = conv;
}

P.Parser.prototype.copy = function(parser) {
  this.parse = parser.parse;
  this.conv = parser.conv;
}

/**
 * parserを実行してinputを構文解析する。
 */
P.exec = function(parser, input) {
  var rslt = parser.parse(input);
  if(rslt instanceof P.Success) {
    if(parser.conv) {
      rslt.rslt = parser.conv(rslt.rslt);
    }
    return rslt;
  }
  return rslt;
}

/**
*/
P.success = function(v, c){
  return new P.Parser(
    function(input) {
      return new P.Success(v, input);
    },
    c
  );
}

/**
*/
P.failure = function(msg,c){
  return new P.Parser(
    function(input) {
      return new P.Failure(msg, input);
    },
    c
  );
}

/*
* リテラル・パーサ
*/
P.literal = function(s, c) {
  return new P.Parser(
    function(input) {
      if(s == input.substring(0, s.length)) {
        return new P.Success(s, input.substring(s.length));
      }
      return new P.Failure(s + " expected", input);
    },
    c
  );
}

/*
* 正規表現パーサ
*/
P.regex = function(re, c){
  return new P.Parser(
    function(input) {
      var rslt = input.match(re);
      if(rslt != null) {
        var s = rslt[0];
        if(s == input.substring(0, s.length)) {
          return new P.Success(s, input.substring(s.length));
        }
      }
      return new P.Failure(re + " expected", input);
    },
    c
  );
}

/**
*/
P.seq = function(parsers, c){
  if(parsers == undefined) {
    parsers = new Array();
  }
  return new P.Parser(
    function(input) {
      var i
      var ret;
      var rslt = [];
      ret = new P.Success(null, input);
      for(i = 0; i < parsers.length; i++) {
        ret = P.exec(parsers[i], (ret.input));
        if(ret instanceof P.Failure) {
          return new P.Failure(ret.errmsg, input);
        }
        else if(!(ret instanceof P.Success)) {
          return new P.Failure("sub-parser returned illegal result.", input);
        }
        rslt.push(ret.rslt);
      }
      return new P.Success(rslt, ret.input);
    },
    c
  );
}

/**
*/
P.alt = function(parsers, c){
  if(parsers == undefined) {
    parsers = new Array();
  }
  return new P.Parser(
    function(input) {
      var i
      var ret;
      var rslt = [];
      ret = new P.Success(null, input);
      for(i = 0; i < parsers.length; i++) {
        ret = P.exec(parsers[i], ret.input);
        if(ret instanceof P.Success) {
          return ret;
        }
      }
      return ret;
    },
    c
  );
}

/**
*/
P.opt = function(p, c) {
  return P.alt([p, P.success(null)], c);
}

/**
*/
P.rep = function(p, c) {
  return new P.Parser(
    function(input) {
      var ret;
      var rslt = [];

      ret = P.exec(p, input);
      while(ret instanceof P.Success) {
        rslt.push(ret.rslt);
        ret = P.exec(p, ret.input);
      }
      if(rslt.length > 0) {
        return new P.Success(rslt, ret.input);
      }
      else {
        return new P.Success([], input);
      }
    },
    c
  );
}

})();


ついでにJsUnitもはじめて使ってみました。
最後のテストケースは、お約束の四則演算のパーサです。

testParser.html

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">

<html>

<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>Sample Test File</title>
<script language="JavaScript" type="text/javascript" src="./app/jsUnitCore.js"></script>
<script language="JavaScript" type="text/javascript" src="../js/parsec.js"></script>
<script language="JavaScript" type="text/javascript">
var parser = unyaunya.parsec;
var exec = parser.exec;

function testSuccessParser() {
  var p, rslt;
  p = parser.success(999);
  rslt = exec(p, "abcdef1238isl");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt == 999);
}

function testFailureParser() {
  var p, rslt;
  p = parser.failure("error message");
  rslt = exec(p, "abcdef1238isl");
  assertTrue(rslt instanceof parser.Failure);
  assertTrue(rslt.errmsg == "error message");
}

function testLiteral() {
  var p, rslt;
  p = parser.literal("DIV");
  rslt = exec(p, "DIVabc");
  assertTrue(rslt.toString(), rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "DIV");
  assertTrue(rslt.input == "abc");
}

function testRegex() {
  var p, rslt;
  p = parser.regex("DIV");
  rslt = exec(p, "DIVabc");
  assertTrue(rslt.toString(), rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "DIV");
  assertTrue(rslt.input == "abc");
}

function testRegex1() {
  var p, rslt;
  p = parser.regex("[0]|([1-9][0-9]*)");
  rslt = exec(p, "123abc");
  assertTrue(rslt.toString(), rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "123");
  assertTrue(rslt.input == "abc");

  rslt = exec(p, "abc123");
  assertTrue(rslt.toString(), rslt instanceof parser.Failure);
  assertTrue(rslt.input == "abc123");

  rslt = exec(p, "0123");
  assertTrue(rslt.toString(), rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "0");
  assertTrue(rslt.input == "123");

  rslt = exec(p, "-0123");
  info(rslt.rslt);
  info(rslt.input);
  assertTrue(rslt.toString(), rslt instanceof parser.Failure);
  assertTrue(rslt.input == "-0123");
}

function testSeq0() {
  var p, rslt;

  p = parser.seq();
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt.length == 0);
  assertTrue(rslt.input == "<DIV>");

  p = parser.seq([]);
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt.length == 0);
  assertTrue(rslt.input == "<DIV>");
}

function testSeq1() {
  var p, rslt;
  p = parser.seq([parser.literal("<")]);
  assertNotUndefined(p.toString(), p);
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "<");
  assertTrue(rslt.input == "DIV>");
}

function testSeq3() {
  var p, rslt;
  p = parser.seq([parser.literal("<"), parser.literal("DIV"), parser.literal(">")]);
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "");
  assertTrue(rslt.rslt.toString() == "<,DIV,>");
}

function testAlt0() {
  var p, rslt;

  p = parser.alt([]);
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertNull(rslt.rslt);
  assertTrue(rslt.input == "<DIV>");

  p = parser.alt();
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertNull(rslt.rslt);
  assertTrue(rslt.input == "<DIV>");
}

function testAlt1() {
  var p, rslt;
  p = parser.alt([parser.literal("<")]);
  rslt = exec(p, "<DIV>");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "<");
  assertTrue(rslt.input == "DIV>");
}

function testAlt3() {
  var p, rslt;
  p = parser.alt([parser.literal("SPAN"), parser.literal("DIV"), parser.literal("TR")]);
  rslt = exec(p, "DIVSPANTR");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "SPANTR");
  assertTrue(rslt.rslt.toString() == "DIV");

  rslt = exec(p, "SPANDIVTR");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "DIVTR");
  assertTrue(rslt.rslt.toString() == "SPAN");

  rslt = exec(p, "TRDIV");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "DIV");
  assertTrue(rslt.rslt.toString() == "TR");

  rslt = exec(p, "STRDIV");
  assertTrue(rslt instanceof parser.Failure);
  assertTrue(rslt.input == "STRDIV");
}


function testOpt() {
  var p, rslt;
  p = parser.opt(parser.literal("OPT"));

  rslt = exec(p, "OPT");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt == "OPT");
  assertTrue(rslt.input == "");

  rslt = exec(p, "OP1T");
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.rslt == null);
  assertTrue(rslt.input == "OP1T");
}

function testRep() {
  var p, rslt;
  p = parser.rep(parser.literal("A"));

  rslt = exec(p, "");
  info(rslt.toString());
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "");
  assertTrue(rslt.rslt == "");

  rslt = exec(p, "ABB");
  info(rslt.toString());
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "BB");
  assertTrue(rslt.rslt == "A");

  rslt = exec(p, "AABB");
  info(rslt.toString());
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "BB");
  assertTrue(rslt.rslt == "A,A");

  rslt = exec(p, "AAABB");
  info(rslt.toString());
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "BB");
  assertTrue(rslt.rslt == "A,A,A");
}

/*
  exp     ::= exp + term |
              exp - term |
              term
  term    ::= term * factor |
              term / factor |
              factor
  factor  ::= num | ( exp )

左再帰を除去して、

  exp     ::= term exp_r
  exp_r   ::= + term exp_r |
              - term exp_r |
              ε
  term    ;;= factor term_r
  term_r  ::= * factor term_r |
              / factor term_r |
              ε
  factor  ::= num | ( exp )
*/
function testCalc() {
  var S = new Array();
  var C = parser.c
  var LITERAL = parser.literal;
  var REGEX = parser.regex;
  var SEQ = parser.seq
  var ALT = parser.alt
  var rslt;
  var lparen = LITERAL("(");
  var rparen = LITERAL(")");
  var plus   = LITERAL("+");
  var minus  = LITERAL("-");
  var star   = LITERAL("*");
  var div    = LITERAL("/");
  var eps    = parser.success(null);

  var ws = parser.rep(REGEX("\\s"));
  var num = REGEX("(0|\\d+)\(.\\d*)?", function(s){S.push(parseFloat(s));});

  var exp   = new parser.Parser();
  var factor       = ALT([
                      SEQ([lparen, ws, exp, ws, rparen]),
                      num
                      ]);
  var term_r   = new parser.Parser();
  term_r.copy(ALT([
      SEQ([ws, star, ws, factor, term_r],
          function(v){
            var r = S.pop();
            var l = S.pop();
            S.push(l*r);
          }),
      SEQ([ws, div,  ws, factor, term_r],
          function(v){
            var r = S.pop();
            var l = S.pop();
            S.push(l/r);
          }),
      eps
    ])
  );
  var term     = SEQ([factor, term_r]);
  var exp_r = new parser.Parser();
  exp_r.copy(ALT([
      SEQ([ws, plus,  ws, term, exp_r],
          function(v){
            var r = S.pop();
            var l = S.pop();
            S.push(l+r);
          }),
      SEQ([ws, minus, ws, term, exp_r],
          function(v){
            var r = S.pop();
            var l = S.pop();
            S.push(l-r);
          }),
      eps
    ])
  );
  exp.parse  = SEQ([term, exp_r]).parse;

  rslt = exec(exp, "1.60 * 4.5");
  info(rslt);
  info(S);
  assertTrue(rslt instanceof parser.Success);
  assertTrue(rslt.input == "");
  //assertTrue(S.toString() == "1.6,4.5,*");
  assertTrue(S[0] == (1.60 * 4.5));

  S = new Array();
  rslt = exec(exp, "9.3 / 0.3");
  info(rslt);
  info(S);
  //assertTrue(S.toString() == "9.3,0.3,/");
  assertTrue(S[0] == (9.3 / 0.3));

  S = new Array();
  rslt = exec(exp, "5 * 3 * 4");
  info(rslt);
  info(S);
  //assertTrue(S.toString() == "5,3,4,*,*");
  assertTrue(S[0] == (5*3*4));

  S = new Array();
  rslt = exec(exp, "5 * 3 / 4");
  info(rslt);
  info(S);
  //assertTrue(S.toString() == "5,3,4,/,*");
  assertTrue(S[0] == (5 * 3 / 4));

  S = new Array();
  rslt = exec(exp, "( 5 * 3 ) / 4");
  info(rslt);
  info(S);
  //assertTrue(S.toString() == "5,3,*,4,/");
  assertTrue(S[0] == (( 5 * 3 ) / 4));

  S = new Array();
  rslt = exec(exp, "1 + ( 5 * 3 ) / 4");
  info(rslt);
  info(S);
  //assertTrue(S.toString() == "1,5,3,*,4,/,+");
  assertTrue(S[0] == (1 + ( 5 * 3 ) / 4));
}

</script>
</head>

<body>
<h1>ParserCombinator Test File</h1>
<input type="button" id="btn" value="calc" onClick="testCalc();"/>
</body>

</html>