Boost Spirit V2+Universal Tree における POSIX shell スクリプトパーサの例

ID: 26
creation date: 2013/05/14 13:22
modification date: 2013/05/14 13:22
owner: taiji
tags: C++, Boost, Spirit, Spirit V2, Spirit utree, AST, shell, ksh

以前の Boost Spirit V2+Universal Tree における計算機の例 では一行毎の構文解析であったが、複数行に渡る構文解析の例として、シェルスクリプト構文解析を段階的に実装してみよう。但し、先の Boost Spirit V2+Universal Tree における bash スクリプトパーサの例 では pre-skipping を前提としたが、ここでは明示的に skipping を指定しつつ、今度は POSIX shell の BNF を最終的に目指した例を示していく。

Spirit V2+Universal Tree の基本形

複数行に対応した Spirit V2+Universal Tree の基本形で pre-skipping を前提としないものは以下のようになる。

#include <iostream>
#include <string>
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>

namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;

namespace client {
  template <typename Iterator>
  struct a_parser : qi::grammar<Iterator, spirit::utree()> {
    qi::rule<Iterator, spirit::utree()> top;
    a_parser() : a_parser::base_type(top)
    {
      top = *~qi::lit('\t'); // ここのパーサ規則を書く

      BOOST_SPIRIT_DEBUG_NODE(top);
    }
  };
}
int main()
{
  client::a_parser<std::string::const_iterator> a_parser;
  std::string str;
  std::cin.unsetf(std::ios::skipws);
  std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
  {
    std::string::const_iterator it = str.begin(), end = str.end();
    spirit::utree ut;
    bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
    if (r && it == end) {
      std::cout << "succeeded:\t" << ut << std::endl;
    }
    else {
      std::cout << "failed:\t" << std::string(it, end) << std::endl;
      return 1;
    }
  }
  return 0;
}

これを雛形としてシェルスクリプトの構文解析を肉付けしていこう。以降、Unified diff 形式でコードの変遷を表現するものとする。

コメント処理、コマンド行、変数代入、シングルクォート

例えば、以下のようなスクリプトを受理できるようなパーサ規則を書く。

#!/bin/sh
i=0
message='hello world'!
echo $i $message
LC_ALL=C time
--- a_multiline_parser+utree00.cc       2013-05-14 14:24:45.000000000 +0900
+++ xsh00.cc    2013-05-14 14:24:45.000000000 +0900
@@ -9,11 +9,43 @@
 
 namespace client {
   template <typename Iterator>
-  struct a_parser : qi::grammar<Iterator, spirit::utree()> {
-    qi::rule<Iterator, spirit::utree()> top;
-    a_parser() : a_parser::base_type(top)
+  struct sh_parser : qi::grammar<Iterator, spirit::utree()> {
+    qi::rule<Iterator, spirit::utree()> top,
+      variable_name,
+      single_quoted_string,
+      unquoted_string,
+      command_name,
+      command_arguments,
+      argument,
+      arguments,
+      assignment,
+      assignments,
+      nl,
+      nl_list,
+      comment;
+    sh_parser() : sh_parser::base_type(top)
     {
-      top = *~qi::lit('\t');
+      top = *(*nl_list >>
+              ((qi::omit[*qi::ascii::blank] >>
+                (assignments >> command_arguments | assignments | command_arguments)) %
+               nl_list) >> *nl_list || nl);
+
+      variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
+      single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
+      unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
+
+      command_name = +(single_quoted_string | unquoted_string);
+      command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
+
+      argument = +(single_quoted_string | unquoted_string);
+      arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);
+
+      assignment = variable_name >> '=' >> -argument;
+      assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
+
+      nl = qi::omit[*comment] >> qi::eol;
+      nl_list = +(qi::omit[*comment] >> qi::eol);
+      comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
 
       BOOST_SPIRIT_DEBUG_NODE(top);
     }
@@ -21,20 +53,20 @@
 }
 int main()
 {
-  client::a_parser<std::string::const_iterator> a_parser;
+  client::sh_parser<std::string::const_iterator> sh_parser;
   std::string str;
   std::cin.unsetf(std::ios::skipws);
   std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
   {
     std::string::const_iterator it = str.begin(), end = str.end();
     spirit::utree ut;
-    bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
+    bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
+    std::cout << str << std::endl;
     if (r && it == end) {
       std::cout << "succeeded:\t" << ut << std::endl;
     }
     else {
       std::cout << "failed:\t" << std::string(it, end) << std::endl;
-      return 1;
     }
   }
   return 0;

これでコメント処理、コマンド行、変数代入、シングルクォートされた任意の文字列を含む文字列や、空白区切りによる文字列のリストのパースが可能となっている。

ダブルクォート

次に以下のような、ダブルクォートされた文字列を受理できるようなパーサ規則を書く。

message="hello \"world\"!"
echo "$message"
--- xsh00.cc    2013-05-14 14:24:45.000000000 +0900
+++ xsh01.cc    2013-05-14 14:24:45.000000000 +0900
@@ -13,6 +13,7 @@
     qi::rule<Iterator, spirit::utree()> top,
       variable_name,
       single_quoted_string,
+      double_quoted_string,
       unquoted_string,
       command_name,
       command_arguments,
@@ -32,12 +33,16 @@
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
       single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
+      double_quoted_string = qi::as_string[qi::char_('"') >> *(
+                                                               (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"'))
+                                                               ) >>
+                                           qi::char_('"')];
       unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
 
-      command_name = +(single_quoted_string | unquoted_string);
+      command_name = +(single_quoted_string | double_quoted_string | unquoted_string);
       command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
 
-      argument = +(single_quoted_string | unquoted_string);
+      argument = +(single_quoted_string | double_quoted_string | unquoted_string);
       arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);
 
       assignment = variable_name >> '=' >> -argument;

これでバックスラッシュでエスケープされていないダブルクォーテーションでのみ閉じられたダブルクォート文字列のパースが可能となっている。

ヒアドキュメント

次に、以下のようなヒアドキュメントの入力を受理するようなパーサ規則を書くが、まずは終端文字列を「EOH」に固定したものから試みる。

cat <<EOH
usage:
        $0 < filename
EOH

ちなみに、ヒアドキュメントでタブを除去する「<<-」のときには真、さもなくば偽を付加するものとする。

--- xsh01.cc    2013-05-14 14:24:45.000000000 +0900
+++ xsh02.cc    2013-05-14 14:24:45.000000000 +0900
@@ -21,14 +21,19 @@
       arguments,
       assignment,
       assignments,
+      io_here,
       nl,
       nl_list,
       comment;
+    qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
     sh_parser() : sh_parser::base_type(top)
     {
+      DLESS = qi::as<std::string>()["<<"];
+      MINUS = '-';
+
       top = *(*nl_list >>
               ((qi::omit[*qi::ascii::blank] >>
-                (assignments >> command_arguments | assignments | command_arguments)) %
+                (assignments >> command_arguments | assignments | command_arguments) >> -io_here) %
                nl_list) >> *nl_list || nl);
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
@@ -48,6 +53,12 @@
       assignment = variable_name >> '=' >> -argument;
       assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
 
+      io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
+        qi::lit("EOH") >> nl >>
+        (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >>
+         qi::omit[qi::eol >> qi::lit("EOH")] |
+         qi::omit[qi::lit("EOH")]);
+
       nl = qi::omit[*comment] >> qi::eol;
       nl_list = +(qi::omit[*comment] >> qi::eol);
       comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];

このままでは「EOH」以外の終端文字列には対応できていない。よって、io_here にローカル変数 qi::locals<std::string> を使用可能にして、それを参照するようにする。

--- xsh02.cc    2013-05-14 14:24:45.000000000 +0900
+++ xsh03.cc    2013-05-14 14:24:45.000000000 +0900
@@ -3,14 +3,15 @@
 //#define BOOST_SPIRIT_DEBUG
 #include <boost/spirit/include/support_utree.hpp>
 #include <boost/spirit/include/qi.hpp>
+#include <boost/spirit/include/phoenix.hpp>
 
 namespace qi = boost::spirit::qi;
 namespace spirit = boost::spirit;
 
 namespace client {
   template <typename Iterator>
-  struct sh_parser : qi::grammar<Iterator, spirit::utree()> {
-    qi::rule<Iterator, spirit::utree()> top,
+  struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > {
+    qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top,
       variable_name,
       single_quoted_string,
       double_quoted_string,
@@ -53,11 +54,11 @@
       assignment = variable_name >> '=' >> -argument;
       assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
 
-      io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
-        qi::lit("EOH") >> nl >>
-        (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >>
-         qi::omit[qi::eol >> qi::lit("EOH")] |
-         qi::omit[qi::lit("EOH")]);
+      io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
+        qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >>
+        (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
+         qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
+         qi::omit[qi::string(qi::labels::_a)]);
 
       nl = qi::omit[*comment] >> qi::eol;
       nl_list = +(qi::omit[*comment] >> qi::eol);

これで任意の終端文字列でヒアドキュメントが受理できるようになる。さらに、以下のような変数展開を抑止するヒアドキュメントにも対応しよう。

cat <<'EOF'
$1 $2 ...
EOF
--- xsh03.cc    2013-05-14 14:24:45.000000000 +0900
+++ xsh04.cc    2013-05-14 14:24:45.000000000 +0900
@@ -26,6 +26,7 @@
       nl,
       nl_list,
       comment;
+    qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
     qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
     sh_parser() : sh_parser::base_type(top)
     {
@@ -54,8 +55,12 @@
       assignment = variable_name >> '=' >> -argument;
       assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
 
+      quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
+      unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
+      here_end %= quoted_here_end | unquoted_here_end;
+
       io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
-        qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >>
+        here_end[qi::labels::_a = qi::labels::_1] >> nl >>
         (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
          qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
          qi::omit[qi::string(qi::labels::_a)]);

ちなみに規則で「=」の代わりに「%=」とすると「_val = _1」を省略できる。

リダイレクション

次に、以下のようなリダイレクションを受理するパーサ規則を書く。

sort < filename
printf '' > filename
--- xsh04.cc    2013-05-14 14:24:45.000000000 +0900
+++ xsh05.cc    2013-05-14 14:24:45.000000000 +0900
@@ -23,19 +23,43 @@
       assignment,
       assignments,
       io_here,
+      filename,
+      io_redirect,
+      io_file,
       nl,
       nl_list,
       comment;
     qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
-    qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
+    qi::rule<Iterator, spirit::utree()> IO_NUMBER;
+    qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
+    qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
+    qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
     sh_parser() : sh_parser::base_type(top)
     {
+      IO_NUMBER = +qi::digit;
+
+      AMP = '&';
+      VLINE = '|';
+      SEMI = ';';
+
+      AND_IF = qi::as<std::string>()["&&"];
+      OR_IF = qi::as<std::string>()["||"];
+      DSEMI = qi::as<std::string>()[";;"];
+
+      LESS = '<';
+      GREAT = '>';
       DLESS = qi::as<std::string>()["<<"];
+      DGREAT = qi::as<std::string>()[">>"];
+      LESSAND = qi::as<std::string>()["<&"];
+      GREATAND = qi::as<std::string>()[">&"];
+      LESSGREAT = qi::as<std::string>()["<>"];
+      CLOBBER = qi::as<std::string>()[">|"];
+      ANDGREAT = qi::as<std::string>()["&>"];
       MINUS = '-';
 
       top = *(*nl_list >>
               ((qi::omit[*qi::ascii::blank] >>
-                (assignments >> command_arguments | assignments | command_arguments) >> -io_here) %
+                (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
                nl_list) >> *nl_list || nl);
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
@@ -65,6 +89,11 @@
          qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
          qi::omit[qi::string(qi::labels::_a)]);
 
+      filename = qi::omit[*qi::ascii::blank] >> argument;
+
+      io_redirect = io_file | io_here;
+      io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
+
       nl = qi::omit[*comment] >> qi::eol;
       nl_list = +(qi::omit[*comment] >> qi::eol);
       comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];

これでリダイレクションの構文を受理できるようになる。

パイプライン

次に、以下のようなパイプラインを受理できるパーサ規則を書く。

echo | cat | sort | uniq
--- xsh05.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh06.cc    2013-05-14 14:24:46.000000000 +0900
@@ -24,6 +24,7 @@
       assignments,
       io_here,
       filename,
+      pipeline,
       io_redirect,
       io_file,
       nl,
@@ -57,10 +58,7 @@
       ANDGREAT = qi::as<std::string>()["&>"];
       MINUS = '-';
 
-      top = *(*nl_list >>
-              ((qi::omit[*qi::ascii::blank] >>
-                (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
-               nl_list) >> *nl_list || nl);
+      top = *(*nl >> (pipeline % nl_list) >> *nl || nl);
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
       single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -91,6 +89,10 @@
 
       filename = qi::omit[*qi::ascii::blank] >> argument;
 
+      pipeline = (qi::omit[*qi::ascii::blank] >>
+                  (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
+        (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
+
       io_redirect = io_file | io_here;
       io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
 

しかしこれでは、以下のようなヒアドキュメント直後のパイプラインには未対応である。

cat <<EOF | sort
        :
EOF

よって、以下のように pipe_here 規則を io_here 規則に挿入する。

--- xsh06.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh07.cc    2013-05-14 14:24:46.000000000 +0900
@@ -22,6 +22,7 @@
       arguments,
       assignment,
       assignments,
+      pipe_here,
       io_here,
       filename,
       pipeline,
@@ -77,12 +78,14 @@
       assignment = variable_name >> '=' >> -argument;
       assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
 
+      pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline;
+
       quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
       unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
       here_end %= quoted_here_end | unquoted_here_end;
 
       io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
-        here_end[qi::labels::_a = qi::labels::_1] >> nl >>
+        here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >>
         (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
          qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
          qi::omit[qi::string(qi::labels::_a)]);

これでパイプラインの構文を受理できるようになる。

リスト

次に、以下のようなリストを受理できるようなパーサ規則を書く。

true && echo yes || echo no
--- xsh07.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh08.cc    2013-05-14 14:24:46.000000000 +0900
@@ -26,6 +26,7 @@
       io_here,
       filename,
       pipeline,
+      and_or,
       io_redirect,
       io_file,
       nl,
@@ -59,7 +60,7 @@
       ANDGREAT = qi::as<std::string>()["&>"];
       MINUS = '-';
 
-      top = *(*nl >> (pipeline % nl_list) >> *nl || nl);
+      top = *(*nl >> (and_or % nl_list) >> *nl || nl);
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
       single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -95,6 +96,7 @@
       pipeline = (qi::omit[*qi::ascii::blank] >>
                   (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
         (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
+      and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
 
       io_redirect = io_file | io_here;
       io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;

さらに、次のようなリストを受理できるようなパーサ規則を書く。

echo & i=0; LC_ALL=C date
--- xsh08.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh09.cc    2013-05-14 14:24:46.000000000 +0900
@@ -25,8 +25,11 @@
       pipe_here,
       io_here,
       filename,
+      separator_op,
+      separator,
       pipeline,
       and_or,
+      list,
       io_redirect,
       io_file,
       nl,
@@ -60,7 +63,7 @@
       ANDGREAT = qi::as<std::string>()["&>"];
       MINUS = '-';
 
-      top = *(*nl >> (and_or % nl_list) >> *nl || nl);
+      top = *(list || nl);
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
       single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -93,10 +96,14 @@
 
       filename = qi::omit[*qi::ascii::blank] >> argument;
 
+      separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
+      separator = (separator_op >> *nl | nl_list);
+
       pipeline = (qi::omit[*qi::ascii::blank] >>
                   (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
         (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
       and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
+      list = and_or % separator_op;
 
       io_redirect = io_file | io_here;
       io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;

これでリスト構文を受理できるようになる。

サブシェル、グループ

次に、以下のようなサブシェルやグループを受理できるようなパーサ規則を書く。

true && (echo yes) || (echo no)
true && {
  echo yes
} || {
  echo no
}

また、ついでにリダイレクションにてファイルデスクリプタ番号に対応させておく。

--- xsh09.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh10.cc    2013-05-14 14:24:46.000000000 +0900
@@ -30,8 +30,11 @@
       pipeline,
       and_or,
       list,
+      term,
+      compound_list,
       io_redirect,
       io_file,
+      command,
       nl,
       nl_list,
       comment;
@@ -40,6 +43,7 @@
     qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
     qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
     qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
+    qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang;
     sh_parser() : sh_parser::base_type(top)
     {
       IO_NUMBER = +qi::digit;
@@ -63,7 +67,27 @@
       ANDGREAT = qi::as<std::string>()["&>"];
       MINUS = '-';
 
-      top = *(list || nl);
+      If = qi::as<std::string>()["if"];
+      Then = qi::as<std::string>()["then"];
+      Elif = qi::as<std::string>()["elif"];
+      Else = qi::as<std::string>()["else"];
+      Fi = qi::as<std::string>()["fi"];
+      For = qi::as<std::string>()["for"];
+      While = qi::as<std::string>()["while"];
+      Until = qi::as<std::string>()["until"];
+      Do = qi::as<std::string>()["do"];
+      Done = qi::as<std::string>()["done"];
+      Case = qi::as<std::string>()["case"];
+      Esac = qi::as<std::string>()["esac"];
+      In = qi::as<std::string>()["in"];
+      Lparen = '(';
+      Rparen = ')';
+      Lbrace = '{';
+      Rbrace = '}';
+      Parens = qi::as<std::string>()["()"];
+      Bang = '!';
+
+      top = *compound_list;
 
       variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
       single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -99,15 +123,23 @@
       separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
       separator = (separator_op >> *nl | nl_list);
 
-      pipeline = (qi::omit[*qi::ascii::blank] >>
-                  (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
+      pipeline = command %
         (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
       and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
       list = and_or % separator_op;
+      term = and_or % separator;
+      compound_list = *nl >> term >> -separator;
 
-      io_redirect = io_file | io_here;
+      io_redirect = -IO_NUMBER >> (io_file | io_here);
       io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
 
+      command = qi::omit[*qi::ascii::blank] >>
+        (
+         (assignments >> command_arguments | assignments | command_arguments) |
+         qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
+         qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen]
+         ) >> *io_redirect;
+
       nl = qi::omit[*comment] >> qi::eol;
       nl_list = +(qi::omit[*comment] >> qi::eol);
       comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];

ここまで受理できるようになったので、残りは if などの制御構文のみとなった。

制御構文

if then elif else fi,

for in do done,

while do done,

until do done,

case in ) ;; esac,

function()

の制御構文を受理できるパーサ規則を書く。

--- xsh10.cc    2013-05-14 14:24:46.000000000 +0900
+++ xsh.cc      2013-05-14 14:24:46.000000000 +0900
@@ -24,9 +24,13 @@
       assignments,
       pipe_here,
       io_here,
+      name,
+      word,
+      wordlist,
       filename,
       separator_op,
       separator,
+      sequential_sep,
       pipeline,
       and_or,
       list,
@@ -34,6 +38,13 @@
       compound_list,
       io_redirect,
       io_file,
+      do_group,
+      for_clause,
+      case_item, case_item_ns, pattern,
+      case_clause,
+      if_clause,
+      compound_command,
+      function_definition,
       command,
       nl,
       nl_list,
@@ -97,7 +108,8 @@
                                            qi::char_('"')];
       unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
 
-      command_name = +(single_quoted_string | double_quoted_string | unquoted_string);
+      command_name = +(single_quoted_string | double_quoted_string | unquoted_string) -
+        (/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While);
       command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
 
       argument = +(single_quoted_string | double_quoted_string | unquoted_string);
@@ -118,12 +130,16 @@
          qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
          qi::omit[qi::string(qi::labels::_a)]);
 
+      name = qi::omit[+qi::ascii::blank] >> variable_name;
+      word = qi::omit[+qi::ascii::blank] >> argument;
+      wordlist = qi::omit[+qi::ascii::blank] >> arguments;
       filename = qi::omit[*qi::ascii::blank] >> argument;
 
-      separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
+      separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI];
       separator = (separator_op >> *nl | nl_list);
+      sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list;
 
-      pipeline = command %
+      pipeline = -Bang >> command %
         (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
       and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
       list = and_or % separator_op;
@@ -133,11 +149,47 @@
       io_redirect = -IO_NUMBER >> (io_file | io_here);
       io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
 
+      do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done];
+      for_clause = qi::skip(qi::ascii::blank)[For] >> name >>
+        (SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group;
+      pattern = argument >> *(qi::omit[VLINE] >> argument);
+
+      case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
+        (compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl;
+      case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
+        -compound_list >> *nl;
+
+      case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >>
+        qi::skip(qi::ascii::space)[In] >> *nl >>
+        *case_item >> -case_item_ns >>
+        qi::skip(qi::ascii::space)[Esac];
+
+      if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >>
+        qi::skip(qi::ascii::blank)[Then] >> compound_list >>
+        *(
+          qi::skip(qi::ascii::blank)[Elif] >> compound_list >>
+          qi::skip(qi::ascii::blank)[Then] >> compound_list
+          ) >>
+        -(
+          qi::skip(qi::ascii::blank)[Else] >> compound_list
+          ) >>
+        qi::skip(qi::ascii::space)[Fi];
+
+      compound_command = 
+        qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
+        qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] |
+        for_clause |
+        case_clause |
+        if_clause |
+        qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group |
+        qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group;
+      function_definition =
+        variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command;
       command = qi::omit[*qi::ascii::blank] >>
         (
+         function_definition |
          (assignments >> command_arguments | assignments | command_arguments) |
-         qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
-         qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen]
+         compound_command
          ) >> *io_redirect;
 
       nl = qi::omit[*comment] >> qi::eol;

これで POSIX シェルスクリプト構文を受理できるようになった。

そしてこれは Shell Command Language 記載の BNF を Boost Spirit Qi で書き直したものになっている。最後に、全体像としてすべてのコードを載せておく。ちなみに、!qi::eps というのが稀に必要となっている箇所があるが、これがないと属性が重複してしまう現象を防いでいる。

#include <iostream>
#include <string>
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;

namespace client {
  template <typename Iterator>
  struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > {
    qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top,
      variable_name,
      single_quoted_string,
      double_quoted_string,
      unquoted_string,
      command_name,
      command_arguments,
      argument,
      arguments,
      assignment,
      assignments,
      pipe_here,
      io_here,
      name,
      word,
      wordlist,
      filename,
      separator_op,
      separator,
      sequential_sep,
      pipeline,
      and_or,
      list,
      term,
      compound_list,
      io_redirect,
      io_file,
      do_group,
      for_clause,
      case_item, case_item_ns, pattern,
      case_clause,
      if_clause,
      compound_command,
      function_definition,
      command,
      nl,
      nl_list,
      comment;
    qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
    qi::rule<Iterator, spirit::utree()> IO_NUMBER;
    qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
    qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
    qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
    qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang;
    sh_parser() : sh_parser::base_type(top)
    {
      IO_NUMBER = +qi::digit;

      AMP = '&';
      VLINE = '|';
      SEMI = ';';

      AND_IF = qi::as<std::string>()["&&"];
      OR_IF = qi::as<std::string>()["||"];
      DSEMI = qi::as<std::string>()[";;"];

      LESS = '<';
      GREAT = '>';
      DLESS = qi::as<std::string>()["<<"];
      DGREAT = qi::as<std::string>()[">>"];
      LESSAND = qi::as<std::string>()["<&"];
      GREATAND = qi::as<std::string>()[">&"];
      LESSGREAT = qi::as<std::string>()["<>"];
      CLOBBER = qi::as<std::string>()[">|"];
      ANDGREAT = qi::as<std::string>()["&>"];
      MINUS = '-';

      If = qi::as<std::string>()["if"];
      Then = qi::as<std::string>()["then"];
      Elif = qi::as<std::string>()["elif"];
      Else = qi::as<std::string>()["else"];
      Fi = qi::as<std::string>()["fi"];
      For = qi::as<std::string>()["for"];
      While = qi::as<std::string>()["while"];
      Until = qi::as<std::string>()["until"];
      Do = qi::as<std::string>()["do"];
      Done = qi::as<std::string>()["done"];
      Case = qi::as<std::string>()["case"];
      Esac = qi::as<std::string>()["esac"];
      In = qi::as<std::string>()["in"];
      Lparen = '(';
      Rparen = ')';
      Lbrace = '{';
      Rbrace = '}';
      Parens = qi::as<std::string>()["()"];
      Bang = '!';

      top = *compound_list;

      variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
      single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
      double_quoted_string = qi::as_string[qi::char_('"') >> *(
                                                               (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"'))
                                                               ) >>
                                           qi::char_('"')];
      unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];

      command_name = +(single_quoted_string | double_quoted_string | unquoted_string) -
        (/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While);
      command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);

      argument = +(single_quoted_string | double_quoted_string | unquoted_string);
      arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);

      assignment = variable_name >> '=' >> -argument;
      assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);

      pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline;

      quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
      unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
      here_end %= quoted_here_end | unquoted_here_end;

      io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
        here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >>
        (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
         qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
         qi::omit[qi::string(qi::labels::_a)]);

      name = qi::omit[+qi::ascii::blank] >> variable_name;
      word = qi::omit[+qi::ascii::blank] >> argument;
      wordlist = qi::omit[+qi::ascii::blank] >> arguments;
      filename = qi::omit[*qi::ascii::blank] >> argument;

      separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI];
      separator = (separator_op >> *nl | nl_list);
      sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list;

      pipeline = -Bang >> command %
        (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
      and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
      list = and_or % separator_op;
      term = and_or % separator;
      compound_list = *nl >> term >> -separator;

      io_redirect = -IO_NUMBER >> (io_file | io_here);
      io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;

      do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done];
      for_clause = qi::skip(qi::ascii::blank)[For] >> name >>
        (SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group;
      pattern = argument >> *(qi::omit[VLINE] >> argument);

      case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
        (compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl;
      case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
        -compound_list >> *nl;

      case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >>
        qi::skip(qi::ascii::space)[In] >> *nl >>
        *case_item >> -case_item_ns >>
        qi::skip(qi::ascii::space)[Esac];

      if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >>
        qi::skip(qi::ascii::blank)[Then] >> compound_list >>
        *(
          qi::skip(qi::ascii::blank)[Elif] >> compound_list >>
          qi::skip(qi::ascii::blank)[Then] >> compound_list
          ) >>
        -(
          qi::skip(qi::ascii::blank)[Else] >> compound_list
          ) >>
        qi::skip(qi::ascii::space)[Fi];

      compound_command = 
        qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
        qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] |
        for_clause |
        case_clause |
        if_clause |
        qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group |
        qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group;
      function_definition =
        variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command;
      command = qi::omit[*qi::ascii::blank] >>
        (
         function_definition |
         (assignments >> command_arguments | assignments | command_arguments) |
         compound_command
         ) >> *io_redirect;

      nl = qi::omit[*comment] >> qi::eol;
      nl_list = +(qi::omit[*comment] >> qi::eol);
      comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];

      BOOST_SPIRIT_DEBUG_NODE(top);
    }
  };
}
int main()
{
  client::sh_parser<std::string::const_iterator> sh_parser;
  std::string str;
  std::cin.unsetf(std::ios::skipws);
  std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
  {
    std::string::const_iterator it = str.begin(), end = str.end();
    spirit::utree ut;
    bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
    std::cout << str << std::endl;
    if (r && it == end) {
      std::cout << "succeeded:\t" << ut << std::endl;
    }
    else {
      std::cout << "failed:\t" << std::string(it, end) << std::endl;
    }
  }
  return 0;
}
0 コメント
ゲストコメント認証用なぞなぞ:
キーボードのLから左に全部打って下さい。それを二回やって下さい。 ...