以前の Boost Spirit V2+Universal Tree における計算機の例 では一行毎の構文解析であったが、複数行に渡る構文解析の例として、シェルスクリプト構文解析を段階的に実装してみよう。但し、先の Boost Spirit V2+Universal Tree における bash スクリプトパーサの例 では pre-skipping
を前提としたが、ここでは明示的に skipping
を指定しつつ、今度は POSIX shell の BNF を最終的に目指した例を示していく。
Spirit V2+Universal Tree の基本形
複数行に対応した Spirit V2+Universal Tree の基本形で pre-skipping
を前提としないものは以下のようになる。
#include <iostream> #include <string> //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> struct a_parser : qi::grammar<Iterator, spirit::utree()> { qi::rule<Iterator, spirit::utree()> top; a_parser() : a_parser::base_type(top) { top = *~qi::lit('\t'); // ここのパーサ規則を書く BOOST_SPIRIT_DEBUG_NODE(top); } }; } int main() { client::a_parser<std::string::const_iterator> a_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; return 1; } } return 0; }
これを雛形としてシェルスクリプトの構文解析を肉付けしていこう。以降、Unified diff 形式でコードの変遷を表現するものとする。
コメント処理、コマンド行、変数代入、シングルクォート
例えば、以下のようなスクリプトを受理できるようなパーサ規則を書く。
#!/bin/sh i=0 message='hello world'! echo $i $message LC_ALL=C time
--- a_multiline_parser+utree00.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh00.cc 2013-05-14 14:24:45.000000000 +0900 @@ -9,11 +9,43 @@ namespace client { template <typename Iterator> - struct a_parser : qi::grammar<Iterator, spirit::utree()> { - qi::rule<Iterator, spirit::utree()> top; - a_parser() : a_parser::base_type(top) + struct sh_parser : qi::grammar<Iterator, spirit::utree()> { + qi::rule<Iterator, spirit::utree()> top, + variable_name, + single_quoted_string, + unquoted_string, + command_name, + command_arguments, + argument, + arguments, + assignment, + assignments, + nl, + nl_list, + comment; + sh_parser() : sh_parser::base_type(top) { - top = *~qi::lit('\t'); + top = *(*nl_list >> + ((qi::omit[*qi::ascii::blank] >> + (assignments >> command_arguments | assignments | command_arguments)) % + nl_list) >> *nl_list || nl); + + variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; + single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; + unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]; + + command_name = +(single_quoted_string | unquoted_string); + command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument); + + argument = +(single_quoted_string | unquoted_string); + arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument); + + assignment = variable_name >> '=' >> -argument; + assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); + + nl = qi::omit[*comment] >> qi::eol; + nl_list = +(qi::omit[*comment] >> qi::eol); + comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))]; BOOST_SPIRIT_DEBUG_NODE(top); } @@ -21,20 +53,20 @@ } int main() { - client::a_parser<std::string::const_iterator> a_parser; + client::sh_parser<std::string::const_iterator> sh_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; - bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); + bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); + std::cout << str << std::endl; if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; - return 1; } } return 0;
これでコメント処理、コマンド行、変数代入、シングルクォートされた任意の文字列を含む文字列や、空白区切りによる文字列のリストのパースが可能となっている。
ダブルクォート
次に以下のような、ダブルクォートされた文字列を受理できるようなパーサ規則を書く。
message="hello \"world\"!" echo "$message"
--- xsh00.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh01.cc 2013-05-14 14:24:45.000000000 +0900 @@ -13,6 +13,7 @@ qi::rule<Iterator, spirit::utree()> top, variable_name, single_quoted_string, + double_quoted_string, unquoted_string, command_name, command_arguments, @@ -32,12 +33,16 @@ variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; + double_quoted_string = qi::as_string[qi::char_('"') >> *( + (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"')) + ) >> + qi::char_('"')]; unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]; - command_name = +(single_quoted_string | unquoted_string); + command_name = +(single_quoted_string | double_quoted_string | unquoted_string); command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument); - argument = +(single_quoted_string | unquoted_string); + argument = +(single_quoted_string | double_quoted_string | unquoted_string); arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument); assignment = variable_name >> '=' >> -argument;
これでバックスラッシュでエスケープされていないダブルクォーテーションでのみ閉じられたダブルクォート文字列のパースが可能となっている。
ヒアドキュメント
次に、以下のようなヒアドキュメントの入力を受理するようなパーサ規則を書くが、まずは終端文字列を「EOH
」に固定したものから試みる。
cat <<EOH usage: $0 < filename EOH
ちなみに、ヒアドキュメントでタブを除去する「<<-
」のときには真、さもなくば偽を付加するものとする。
--- xsh01.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh02.cc 2013-05-14 14:24:45.000000000 +0900 @@ -21,14 +21,19 @@ arguments, assignment, assignments, + io_here, nl, nl_list, comment; + qi::rule<Iterator, spirit::utree()> DLESS, MINUS; sh_parser() : sh_parser::base_type(top) { + DLESS = qi::as<std::string>()["<<"]; + MINUS = '-'; + top = *(*nl_list >> ((qi::omit[*qi::ascii::blank] >> - (assignments >> command_arguments | assignments | command_arguments)) % + (assignments >> command_arguments | assignments | command_arguments) >> -io_here) % nl_list) >> *nl_list || nl); variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; @@ -48,6 +53,12 @@ assignment = variable_name >> '=' >> -argument; assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); + io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> + qi::lit("EOH") >> nl >> + (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >> + qi::omit[qi::eol >> qi::lit("EOH")] | + qi::omit[qi::lit("EOH")]); + nl = qi::omit[*comment] >> qi::eol; nl_list = +(qi::omit[*comment] >> qi::eol); comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
このままでは「EOH
」以外の終端文字列には対応できていない。よって、io_here
にローカル変数 qi::locals<std::string>
を使用可能にして、それを参照するようにする。
--- xsh02.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh03.cc 2013-05-14 14:24:45.000000000 +0900 @@ -3,14 +3,15 @@ //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> +#include <boost/spirit/include/phoenix.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> - struct sh_parser : qi::grammar<Iterator, spirit::utree()> { - qi::rule<Iterator, spirit::utree()> top, + struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > { + qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top, variable_name, single_quoted_string, double_quoted_string, @@ -53,11 +54,11 @@ assignment = variable_name >> '=' >> -argument; assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); - io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> - qi::lit("EOH") >> nl >> - (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >> - qi::omit[qi::eol >> qi::lit("EOH")] | - qi::omit[qi::lit("EOH")]); + io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> + qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >> + (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >> + qi::omit[qi::eol >> qi::string(qi::labels::_a)] | + qi::omit[qi::string(qi::labels::_a)]); nl = qi::omit[*comment] >> qi::eol; nl_list = +(qi::omit[*comment] >> qi::eol);
これで任意の終端文字列でヒアドキュメントが受理できるようになる。さらに、以下のような変数展開を抑止するヒアドキュメントにも対応しよう。
cat <<'EOF' $1 $2 ... EOF
--- xsh03.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh04.cc 2013-05-14 14:24:45.000000000 +0900 @@ -26,6 +26,7 @@ nl, nl_list, comment; + qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end; qi::rule<Iterator, spirit::utree()> DLESS, MINUS; sh_parser() : sh_parser::base_type(top) { @@ -54,8 +55,12 @@ assignment = variable_name >> '=' >> -argument; assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); + quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\''; + unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))]; + here_end %= quoted_here_end | unquoted_here_end; + io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> - qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >> + here_end[qi::labels::_a = qi::labels::_1] >> nl >> (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >> qi::omit[qi::eol >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]);
ちなみに規則で「=
」の代わりに「%=
」とすると「_val = _1
」を省略できる。
リダイレクション
次に、以下のようなリダイレクションを受理するパーサ規則を書く。
sort < filename printf '' > filename
--- xsh04.cc 2013-05-14 14:24:45.000000000 +0900 +++ xsh05.cc 2013-05-14 14:24:45.000000000 +0900 @@ -23,19 +23,43 @@ assignment, assignments, io_here, + filename, + io_redirect, + io_file, nl, nl_list, comment; qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end; - qi::rule<Iterator, spirit::utree()> DLESS, MINUS; + qi::rule<Iterator, spirit::utree()> IO_NUMBER; + qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI; + qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI; + qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS; sh_parser() : sh_parser::base_type(top) { + IO_NUMBER = +qi::digit; + + AMP = '&'; + VLINE = '|'; + SEMI = ';'; + + AND_IF = qi::as<std::string>()["&&"]; + OR_IF = qi::as<std::string>()["||"]; + DSEMI = qi::as<std::string>()[";;"]; + + LESS = '<'; + GREAT = '>'; DLESS = qi::as<std::string>()["<<"]; + DGREAT = qi::as<std::string>()[">>"]; + LESSAND = qi::as<std::string>()["<&"]; + GREATAND = qi::as<std::string>()[">&"]; + LESSGREAT = qi::as<std::string>()["<>"]; + CLOBBER = qi::as<std::string>()[">|"]; + ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; top = *(*nl_list >> ((qi::omit[*qi::ascii::blank] >> - (assignments >> command_arguments | assignments | command_arguments) >> -io_here) % + (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % nl_list) >> *nl_list || nl); variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; @@ -65,6 +89,11 @@ qi::omit[qi::eol >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); + filename = qi::omit[*qi::ascii::blank] >> argument; + + io_redirect = io_file | io_here; + io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename; + nl = qi::omit[*comment] >> qi::eol; nl_list = +(qi::omit[*comment] >> qi::eol); comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
これでリダイレクションの構文を受理できるようになる。
パイプライン
次に、以下のようなパイプラインを受理できるパーサ規則を書く。
echo | cat | sort | uniq
--- xsh05.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh06.cc 2013-05-14 14:24:46.000000000 +0900 @@ -24,6 +24,7 @@ assignments, io_here, filename, + pipeline, io_redirect, io_file, nl, @@ -57,10 +58,7 @@ ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; - top = *(*nl_list >> - ((qi::omit[*qi::ascii::blank] >> - (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % - nl_list) >> *nl_list || nl); + top = *(*nl >> (pipeline % nl_list) >> *nl || nl); variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; @@ -91,6 +89,10 @@ filename = qi::omit[*qi::ascii::blank] >> argument; + pipeline = (qi::omit[*qi::ascii::blank] >> + (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % + (qi::skip(qi::ascii::blank)[VLINE] >> *nl); + io_redirect = io_file | io_here; io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
しかしこれでは、以下のようなヒアドキュメント直後のパイプラインには未対応である。
cat <<EOF | sort : EOF
よって、以下のように pipe_here
規則を io_here
規則に挿入する。
--- xsh06.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh07.cc 2013-05-14 14:24:46.000000000 +0900 @@ -22,6 +22,7 @@ arguments, assignment, assignments, + pipe_here, io_here, filename, pipeline, @@ -77,12 +78,14 @@ assignment = variable_name >> '=' >> -argument; assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); + pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline; + quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\''; unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))]; here_end %= quoted_here_end | unquoted_here_end; io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> - here_end[qi::labels::_a = qi::labels::_1] >> nl >> + here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >> (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >> qi::omit[qi::eol >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]);
これでパイプラインの構文を受理できるようになる。
リスト
次に、以下のようなリストを受理できるようなパーサ規則を書く。
true && echo yes || echo no
--- xsh07.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh08.cc 2013-05-14 14:24:46.000000000 +0900 @@ -26,6 +26,7 @@ io_here, filename, pipeline, + and_or, io_redirect, io_file, nl, @@ -59,7 +60,7 @@ ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; - top = *(*nl >> (pipeline % nl_list) >> *nl || nl); + top = *(*nl >> (and_or % nl_list) >> *nl || nl); variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; @@ -95,6 +96,7 @@ pipeline = (qi::omit[*qi::ascii::blank] >> (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % (qi::skip(qi::ascii::blank)[VLINE] >> *nl); + and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl); io_redirect = io_file | io_here; io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
さらに、次のようなリストを受理できるようなパーサ規則を書く。
echo & i=0; LC_ALL=C date
--- xsh08.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh09.cc 2013-05-14 14:24:46.000000000 +0900 @@ -25,8 +25,11 @@ pipe_here, io_here, filename, + separator_op, + separator, pipeline, and_or, + list, io_redirect, io_file, nl, @@ -60,7 +63,7 @@ ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; - top = *(*nl >> (and_or % nl_list) >> *nl || nl); + top = *(list || nl); variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; @@ -93,10 +96,14 @@ filename = qi::omit[*qi::ascii::blank] >> argument; + separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI]; + separator = (separator_op >> *nl | nl_list); + pipeline = (qi::omit[*qi::ascii::blank] >> (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % (qi::skip(qi::ascii::blank)[VLINE] >> *nl); and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl); + list = and_or % separator_op; io_redirect = io_file | io_here; io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
これでリスト構文を受理できるようになる。
サブシェル、グループ
次に、以下のようなサブシェルやグループを受理できるようなパーサ規則を書く。
true && (echo yes) || (echo no) true && { echo yes } || { echo no }
また、ついでにリダイレクションにてファイルデスクリプタ番号に対応させておく。
--- xsh09.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh10.cc 2013-05-14 14:24:46.000000000 +0900 @@ -30,8 +30,11 @@ pipeline, and_or, list, + term, + compound_list, io_redirect, io_file, + command, nl, nl_list, comment; @@ -40,6 +43,7 @@ qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI; qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI; qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS; + qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang; sh_parser() : sh_parser::base_type(top) { IO_NUMBER = +qi::digit; @@ -63,7 +67,27 @@ ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; - top = *(list || nl); + If = qi::as<std::string>()["if"]; + Then = qi::as<std::string>()["then"]; + Elif = qi::as<std::string>()["elif"]; + Else = qi::as<std::string>()["else"]; + Fi = qi::as<std::string>()["fi"]; + For = qi::as<std::string>()["for"]; + While = qi::as<std::string>()["while"]; + Until = qi::as<std::string>()["until"]; + Do = qi::as<std::string>()["do"]; + Done = qi::as<std::string>()["done"]; + Case = qi::as<std::string>()["case"]; + Esac = qi::as<std::string>()["esac"]; + In = qi::as<std::string>()["in"]; + Lparen = '('; + Rparen = ')'; + Lbrace = '{'; + Rbrace = '}'; + Parens = qi::as<std::string>()["()"]; + Bang = '!'; + + top = *compound_list; variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; @@ -99,15 +123,23 @@ separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI]; separator = (separator_op >> *nl | nl_list); - pipeline = (qi::omit[*qi::ascii::blank] >> - (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) % + pipeline = command % (qi::skip(qi::ascii::blank)[VLINE] >> *nl); and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl); list = and_or % separator_op; + term = and_or % separator; + compound_list = *nl >> term >> -separator; - io_redirect = io_file | io_here; + io_redirect = -IO_NUMBER >> (io_file | io_here); io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename; + command = qi::omit[*qi::ascii::blank] >> + ( + (assignments >> command_arguments | assignments | command_arguments) | + qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] | + qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] + ) >> *io_redirect; + nl = qi::omit[*comment] >> qi::eol; nl_list = +(qi::omit[*comment] >> qi::eol); comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
ここまで受理できるようになったので、残りは if
などの制御構文のみとなった。
制御構文
if then elif else fi
,
for in do done
,
while do done
,
until do done
,
case in ) ;; esac
,
function()
の制御構文を受理できるパーサ規則を書く。
--- xsh10.cc 2013-05-14 14:24:46.000000000 +0900 +++ xsh.cc 2013-05-14 14:24:46.000000000 +0900 @@ -24,9 +24,13 @@ assignments, pipe_here, io_here, + name, + word, + wordlist, filename, separator_op, separator, + sequential_sep, pipeline, and_or, list, @@ -34,6 +38,13 @@ compound_list, io_redirect, io_file, + do_group, + for_clause, + case_item, case_item_ns, pattern, + case_clause, + if_clause, + compound_command, + function_definition, command, nl, nl_list, @@ -97,7 +108,8 @@ qi::char_('"')]; unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]; - command_name = +(single_quoted_string | double_quoted_string | unquoted_string); + command_name = +(single_quoted_string | double_quoted_string | unquoted_string) - + (/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While); command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument); argument = +(single_quoted_string | double_quoted_string | unquoted_string); @@ -118,12 +130,16 @@ qi::omit[qi::eol >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); + name = qi::omit[+qi::ascii::blank] >> variable_name; + word = qi::omit[+qi::ascii::blank] >> argument; + wordlist = qi::omit[+qi::ascii::blank] >> arguments; filename = qi::omit[*qi::ascii::blank] >> argument; - separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI]; + separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI]; separator = (separator_op >> *nl | nl_list); + sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list; - pipeline = command % + pipeline = -Bang >> command % (qi::skip(qi::ascii::blank)[VLINE] >> *nl); and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl); list = and_or % separator_op; @@ -133,11 +149,47 @@ io_redirect = -IO_NUMBER >> (io_file | io_here); io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename; + do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done]; + for_clause = qi::skip(qi::ascii::blank)[For] >> name >> + (SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group; + pattern = argument >> *(qi::omit[VLINE] >> argument); + + case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >> + (compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl; + case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >> + -compound_list >> *nl; + + case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >> + qi::skip(qi::ascii::space)[In] >> *nl >> + *case_item >> -case_item_ns >> + qi::skip(qi::ascii::space)[Esac]; + + if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >> + qi::skip(qi::ascii::blank)[Then] >> compound_list >> + *( + qi::skip(qi::ascii::blank)[Elif] >> compound_list >> + qi::skip(qi::ascii::blank)[Then] >> compound_list + ) >> + -( + qi::skip(qi::ascii::blank)[Else] >> compound_list + ) >> + qi::skip(qi::ascii::space)[Fi]; + + compound_command = + qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] | + qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] | + for_clause | + case_clause | + if_clause | + qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group | + qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group; + function_definition = + variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command; command = qi::omit[*qi::ascii::blank] >> ( + function_definition | (assignments >> command_arguments | assignments | command_arguments) | - qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] | - qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] + compound_command ) >> *io_redirect; nl = qi::omit[*comment] >> qi::eol;
これで POSIX シェルスクリプト構文を受理できるようになった。
そしてこれは Shell Command Language 記載の BNF を Boost Spirit Qi で書き直したものになっている。最後に、全体像としてすべてのコードを載せておく。ちなみに、!qi::eps
というのが稀に必要となっている箇所があるが、これがないと属性が重複してしまう現象を防いでいる。
#include <iostream> #include <string> //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > { qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top, variable_name, single_quoted_string, double_quoted_string, unquoted_string, command_name, command_arguments, argument, arguments, assignment, assignments, pipe_here, io_here, name, word, wordlist, filename, separator_op, separator, sequential_sep, pipeline, and_or, list, term, compound_list, io_redirect, io_file, do_group, for_clause, case_item, case_item_ns, pattern, case_clause, if_clause, compound_command, function_definition, command, nl, nl_list, comment; qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end; qi::rule<Iterator, spirit::utree()> IO_NUMBER; qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI; qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI; qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS; qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang; sh_parser() : sh_parser::base_type(top) { IO_NUMBER = +qi::digit; AMP = '&'; VLINE = '|'; SEMI = ';'; AND_IF = qi::as<std::string>()["&&"]; OR_IF = qi::as<std::string>()["||"]; DSEMI = qi::as<std::string>()[";;"]; LESS = '<'; GREAT = '>'; DLESS = qi::as<std::string>()["<<"]; DGREAT = qi::as<std::string>()[">>"]; LESSAND = qi::as<std::string>()["<&"]; GREATAND = qi::as<std::string>()[">&"]; LESSGREAT = qi::as<std::string>()["<>"]; CLOBBER = qi::as<std::string>()[">|"]; ANDGREAT = qi::as<std::string>()["&>"]; MINUS = '-'; If = qi::as<std::string>()["if"]; Then = qi::as<std::string>()["then"]; Elif = qi::as<std::string>()["elif"]; Else = qi::as<std::string>()["else"]; Fi = qi::as<std::string>()["fi"]; For = qi::as<std::string>()["for"]; While = qi::as<std::string>()["while"]; Until = qi::as<std::string>()["until"]; Do = qi::as<std::string>()["do"]; Done = qi::as<std::string>()["done"]; Case = qi::as<std::string>()["case"]; Esac = qi::as<std::string>()["esac"]; In = qi::as<std::string>()["in"]; Lparen = '('; Rparen = ')'; Lbrace = '{'; Rbrace = '}'; Parens = qi::as<std::string>()["()"]; Bang = '!'; top = *compound_list; variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]; single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]; double_quoted_string = qi::as_string[qi::char_('"') >> *( (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"')) ) >> qi::char_('"')]; unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]; command_name = +(single_quoted_string | double_quoted_string | unquoted_string) - (/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While); command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument); argument = +(single_quoted_string | double_quoted_string | unquoted_string); arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument); assignment = variable_name >> '=' >> -argument; assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment); pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline; quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\''; unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))]; here_end %= quoted_here_end | unquoted_here_end; io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >> here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >> (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >> qi::omit[qi::eol >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); name = qi::omit[+qi::ascii::blank] >> variable_name; word = qi::omit[+qi::ascii::blank] >> argument; wordlist = qi::omit[+qi::ascii::blank] >> arguments; filename = qi::omit[*qi::ascii::blank] >> argument; separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI]; separator = (separator_op >> *nl | nl_list); sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list; pipeline = -Bang >> command % (qi::skip(qi::ascii::blank)[VLINE] >> *nl); and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl); list = and_or % separator_op; term = and_or % separator; compound_list = *nl >> term >> -separator; io_redirect = -IO_NUMBER >> (io_file | io_here); io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename; do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done]; for_clause = qi::skip(qi::ascii::blank)[For] >> name >> (SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group; pattern = argument >> *(qi::omit[VLINE] >> argument); case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >> (compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl; case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >> -compound_list >> *nl; case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >> qi::skip(qi::ascii::space)[In] >> *nl >> *case_item >> -case_item_ns >> qi::skip(qi::ascii::space)[Esac]; if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >> qi::skip(qi::ascii::blank)[Then] >> compound_list >> *( qi::skip(qi::ascii::blank)[Elif] >> compound_list >> qi::skip(qi::ascii::blank)[Then] >> compound_list ) >> -( qi::skip(qi::ascii::blank)[Else] >> compound_list ) >> qi::skip(qi::ascii::space)[Fi]; compound_command = qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] | qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] | for_clause | case_clause | if_clause | qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group | qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group; function_definition = variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command; command = qi::omit[*qi::ascii::blank] >> ( function_definition | (assignments >> command_arguments | assignments | command_arguments) | compound_command ) >> *io_redirect; nl = qi::omit[*comment] >> qi::eol; nl_list = +(qi::omit[*comment] >> qi::eol); comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))]; BOOST_SPIRIT_DEBUG_NODE(top); } }; } int main() { client::sh_parser<std::string::const_iterator> sh_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); std::cout << str << std::endl; if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; } } return 0; }