先の Boost Spirit V2+Universal Tree における計算機の例 では一行毎の構文解析であったが、複数行に渡る構文解析の例として、シェルスクリプト構文解析を段階的に実装してみよう。ここでは bash の BNF を最終的に目指した例を示していく。
Spirit V2+Universal Tree の基本形
複数行に対応した Spirit V2+Universal Tree の基本形は以下のようになる。
#include <iostream> #include <string> //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> struct a_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree()> { qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> top; a_parser() : a_parser::base_type(top) { top = *~qi::lit('\t'); // ここのパーサ規則を書く BOOST_SPIRIT_DEBUG_NODE(top); } }; } int main() { client::a_parser<std::string::const_iterator> a_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; return 1; } } return 0; }
これを雛形としてシェルスクリプトの構文解析を肉付けしていこう。以降、Unified diff 形式でコードの変遷を表現するものとする。
コメント処理、コマンド行、変数代入、シングルクォート
例えば、以下のようなスクリプトを受理できるようなパーサ規則を書く。
#!/bin/sh i=0 message='hello world'! echo $i $message LC_ALL=C time
--- a_multiline_parser+utree01.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba00.cc 2013-05-13 20:16:28.000000000 +0900 @@ -9,11 +9,53 @@ namespace client { template <typename Iterator> - struct a_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree()> { + struct sh_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree()> { qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> top; - a_parser() : a_parser::base_type(top) + qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> NEWLINE, + INDENT, + DELIMITER, + variable_name, + single_quoted_string, + unquoted_string, + command_name, + command_arguments, + argument, + arguments, + ASSIGNMENT_WORD, + assignments, + simple_command, + command, + compound_list, + comment; + sh_parser() : sh_parser::base_type(top) { - top = *~qi::lit('\t'); + top = *(compound_list || NEWLINE); + + NEWLINE = qi::omit[-comment] >> qi::no_skip[qi::eol]; + INDENT = qi::no_skip[*qi::ascii::blank]; + DELIMITER = qi::no_skip[+qi::ascii::blank]; + + variable_name = qi::as_string[qi::no_skip[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]]; + single_quoted_string = qi::no_skip[qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]]; + unquoted_string = qi::no_skip[qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]]; + + command_name = +(single_quoted_string | unquoted_string); + command_arguments = command_name >> *(qi::omit[DELIMITER] >> argument); + + argument = +(single_quoted_string | unquoted_string); + arguments = argument >> *(qi::omit[DELIMITER] >> argument); + + ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument; + assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); + + simple_command = assignments >> command_arguments | assignments | command_arguments; + command = qi::omit[INDENT] >> simple_command; + + compound_list + = *NEWLINE >> command % +NEWLINE >> -NEWLINE + ; + + comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]]; BOOST_SPIRIT_DEBUG_NODE(top); } @@ -21,20 +63,20 @@ } int main() { - client::a_parser<std::string::const_iterator> a_parser; + client::sh_parser<std::string::const_iterator> sh_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; - bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); + bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); + std::cout << str << std::endl; if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; - return 1; } } return 0;
これでコメント処理、コマンド行、変数代入、シングルクォートされた任意の文字列を含む文字列や、空白区切りによる文字列のリストのパースが可能となっている。
ダブルクォート
次に以下のような、ダブルクォートされた文字列を受理できるようなパーサ規則を書く。
message="hello \"world\"!" echo "$message"
--- xsh_ba00.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba01.cc 2013-05-13 20:16:28.000000000 +0900 @@ -16,6 +16,7 @@ DELIMITER, variable_name, single_quoted_string, + double_quoted_string, unquoted_string, command_name, command_arguments, @@ -37,12 +38,16 @@ variable_name = qi::as_string[qi::no_skip[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]]; single_quoted_string = qi::no_skip[qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]]; + double_quoted_string = qi::no_skip[qi::as_string[qi::char_('"') >> *( + (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"')) + ) >> + qi::char_('"')]]; unquoted_string = qi::no_skip[qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]]; - command_name = +(single_quoted_string | unquoted_string); + command_name = +(single_quoted_string | double_quoted_string | unquoted_string); command_arguments = command_name >> *(qi::omit[DELIMITER] >> argument); - argument = +(single_quoted_string | unquoted_string); + argument = +(single_quoted_string | double_quoted_string | unquoted_string); arguments = argument >> *(qi::omit[DELIMITER] >> argument); ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument;
これでバックスラッシュでエスケープされていないダブルクォーテーションでのみ閉じられたダブルクォート文字列のパースが可能となっている。
ヒアドキュメント
次に、以下のようなヒアドキュメントの入力を受理するようなパーサ規則を書くが、まずは終端文字列を「EOH
」に固定したものから試みる。
cat <<EOH usage: $0 < filename EOH
ちなみに、ヒアドキュメントでタブを除去する「<<-
」のときには真、さもなくば偽を付加するものとする。
--- xsh_ba01.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba02.cc 2013-05-13 20:16:28.000000000 +0900 @@ -24,12 +24,16 @@ arguments, ASSIGNMENT_WORD, assignments, + io_here, simple_command, command, compound_list, comment; sh_parser() : sh_parser::base_type(top) { +#define DLESS qi::as_string["<<"] +#define MINUS qi::as_string['-'] + top = *(compound_list || NEWLINE); NEWLINE = qi::omit[-comment] >> qi::no_skip[qi::eol]; @@ -53,7 +57,13 @@ ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument; assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); - simple_command = assignments >> command_arguments | assignments | command_arguments; + io_here %= DLESS >> qi::matches[qi::no_skip[MINUS]] >> + qi::no_skip[qi::lit("EOH")] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> + ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]]) >> + qi::omit[qi::no_skip[qi::eol] >> qi::lit("EOH")] | + qi::omit[qi::lit("EOH")]); + + simple_command = (assignments >> command_arguments | assignments | command_arguments) >> *io_here; command = qi::omit[INDENT] >> simple_command; compound_list @@ -63,6 +73,9 @@ comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]]; BOOST_SPIRIT_DEBUG_NODE(top); + +#undef DLESS +#undef MINUS } }; }
このままでは「EOH
」以外の終端文字列には対応できていない。よって、io_here
にローカル変数 qi::locals<std::string>
を使用可能にして、それを参照するようにする。
--- xsh_ba02.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba03.cc 2013-05-13 20:16:28.000000000 +0900 @@ -3,14 +3,16 @@ //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> +#include <boost/spirit/include/phoenix.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> - struct sh_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree()> { - qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> top; + struct sh_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree(), qi::locals<std::string> > { + qi::rule<Iterator, qi::ascii::space_type, spirit::utree(), qi::locals<std::string> > top, + io_here; qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> NEWLINE, INDENT, DELIMITER, @@ -24,7 +26,6 @@ arguments, ASSIGNMENT_WORD, assignments, - io_here, simple_command, command, compound_list, @@ -58,10 +59,10 @@ assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); io_here %= DLESS >> qi::matches[qi::no_skip[MINUS]] >> - qi::no_skip[qi::lit("EOH")] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> - ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]]) >> - qi::omit[qi::no_skip[qi::eol] >> qi::lit("EOH")] | - qi::omit[qi::lit("EOH")]); + qi::as_string[qi::no_skip[+(qi::char_ - qi::ascii::space)]][qi::labels::_a = qi::labels::_1] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> + ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]]) >> + qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | + qi::omit[qi::string(qi::labels::_a)]); simple_command = (assignments >> command_arguments | assignments | command_arguments) >> *io_here; command = qi::omit[INDENT] >> simple_command;
これで任意の終端文字列でヒアドキュメントが受理できるようになる。さらに、以下のような変数展開を抑止するヒアドキュメントにも対応しよう。
cat <<'EOF' $1 $2 ... EOF
--- xsh_ba03.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba04.cc 2013-05-13 20:16:28.000000000 +0900 @@ -30,6 +30,7 @@ command, compound_list, comment; + qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; sh_parser() : sh_parser::base_type(top) { #define DLESS qi::as_string["<<"] @@ -58,8 +59,12 @@ ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument; assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); + quoted_here_end = '\'' >> qi::as_string[qi::no_skip[+(qi::char_ - '\'')]][qi::labels::_val = qi::labels::_1] >> '\''; + unquoted_here_end %= qi::as_string[qi::no_skip[+(qi::char_ - qi::ascii::space)]]; + here_end %= qi::as_string[quoted_here_end | unquoted_here_end]; + io_here %= DLESS >> qi::matches[qi::no_skip[MINUS]] >> - qi::as_string[qi::no_skip[+(qi::char_ - qi::ascii::space)]][qi::labels::_a = qi::labels::_1] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> + here_end[qi::labels::_a = qi::labels::_1] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]]) >> qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]);
ちなみに規則で「=
」の代わりに「%=
」とすると「_val = _1
」を省略できる。
リダイレクション
次に、以下のようなリダイレクションを受理するパーサ規則を書く。
sort < filename printf '' > filename
--- xsh_ba04.cc 2013-05-13 20:16:28.000000000 +0900 +++ xsh_ba05.cc 2013-05-13 20:16:28.000000000 +0900 @@ -26,6 +26,9 @@ arguments, ASSIGNMENT_WORD, assignments, + filename, + redirection, + redirection_list, simple_command, command, compound_list, @@ -33,7 +36,25 @@ qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; sh_parser() : sh_parser::base_type(top) { +#define IO_NUMBER qi::as_string[+qi::digit] + +#define AMP qi::as_string['&'] +#define VLINE qi::as_string['|'] +#define SEMI qi::as_string[';'] + +#define AND_IF qi::as_string["&&"] +#define OR_IF qi::as_string["||"] +#define DSEMI qi::as_string[";;"] + +#define LESS qi::as_string['<'] +#define GREAT qi::as_string['>'] #define DLESS qi::as_string["<<"] +#define DGREAT qi::as_string[">>"] +#define LESSAND qi::as_string["<&"] +#define GREATAND qi::as_string[">&"] +#define LESSGREAT qi::as_string["<>"] +#define CLOBBER qi::as_string[">|"] +#define ANDGREAT qi::as_string["&>"] #define MINUS qi::as_string['-'] top = *(compound_list || NEWLINE); @@ -69,7 +90,14 @@ qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); - simple_command = (assignments >> command_arguments | assignments | command_arguments) >> *io_here; + filename = qi::omit[*qi::ascii::blank] >> argument; + + redirection = + (LESS | GREAT | DLESS | DGREAT | LESSAND | GREATAND | LESSGREAT | CLOBBER) >> filename | + io_here | + ANDGREAT >> filename; + redirection_list = +redirection; + simple_command = (assignments >> command_arguments | assignments | command_arguments) >> -redirection_list; command = qi::omit[INDENT] >> simple_command; compound_list @@ -80,7 +108,25 @@ BOOST_SPIRIT_DEBUG_NODE(top); +#undef IO_NUMBER + +#undef AMP +#undef VLINE +#undef SEMI + +#undef AND_IF +#undef OR_IF +#undef DSEMI + +#undef LESS +#undef GREAT #undef DLESS +#undef DGREAT +#undef LESSAND +#undef GREATAND +#undef LESSGREAT +#undef CLOBBER +#undef ANDGREAT #undef MINUS } };
これでリダイレクションの構文を受理できるようになる。
パイプライン
次に、以下のようなパイプラインを受理できるパーサ規則を書く。
echo | cat | sort | uniq
--- xsh_ba05.cc 2013-05-13 20:16:29.000000000 +0900 +++ xsh_ba06.cc 2013-05-13 20:16:29.000000000 +0900 @@ -32,6 +32,7 @@ simple_command, command, compound_list, + pipeline, comment; qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; sh_parser() : sh_parser::base_type(top) @@ -101,8 +102,9 @@ command = qi::omit[INDENT] >> simple_command; compound_list - = *NEWLINE >> command % +NEWLINE >> -NEWLINE + = *NEWLINE >> pipeline % +NEWLINE >> -NEWLINE ; + pipeline = command % (VLINE >> *NEWLINE); comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]];
しかしこれでは、以下のようなヒアドキュメント直後のパイプラインには未対応である。
cat <<EOF | sort : EOF
よって、以下のように pipe_here
規則を io_here
規則に挿入する。
--- xsh_ba06.cc 2013-05-13 20:16:29.000000000 +0900 +++ xsh_ba07.cc 2013-05-13 20:16:29.000000000 +0900 @@ -26,6 +26,7 @@ arguments, ASSIGNMENT_WORD, assignments, + pipe_here, filename, redirection, redirection_list, @@ -81,12 +82,14 @@ ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument; assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); + pipe_here = VLINE >> *pipeline; + quoted_here_end = '\'' >> qi::as_string[qi::no_skip[+(qi::char_ - '\'')]][qi::labels::_val = qi::labels::_1] >> '\''; unquoted_here_end %= qi::as_string[qi::no_skip[+(qi::char_ - qi::ascii::space)]]; here_end %= qi::as_string[quoted_here_end | unquoted_here_end]; io_here %= DLESS >> qi::matches[qi::no_skip[MINUS]] >> - here_end[qi::labels::_a = qi::labels::_1] >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> + here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]]) >> qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]);
これでパイプラインの構文を受理できるようになる。
リスト
次に、以下のようなリストを受理できるようなパーサ規則を書く。
true && echo yes || echo no
--- xsh_ba07.cc 2013-05-13 20:16:29.000000000 +0900 +++ xsh_ba08.cc 2013-05-13 20:16:29.000000000 +0900 @@ -32,7 +32,10 @@ redirection_list, simple_command, command, + list, compound_list, + list1, + newline_list, pipeline, comment; qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; @@ -103,11 +106,14 @@ redirection_list = +redirection; simple_command = (assignments >> command_arguments | assignments | command_arguments) >> -redirection_list; command = qi::omit[INDENT] >> simple_command; - + list = newline_list >> list1 >> NEWLINE >> newline_list; compound_list - = *NEWLINE >> pipeline % +NEWLINE >> -NEWLINE + = list + | newline_list >> list1 ; - pipeline = command % (VLINE >> *NEWLINE); + list1 = pipeline % ((AND_IF | OR_IF | NEWLINE) >> newline_list); + newline_list = *NEWLINE; + pipeline = command % (VLINE >> newline_list); comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]];
さらに、次のようなリストを受理できるようなパーサ規則を書く。
echo & i=0; LC_ALL=C date
--- xsh_ba08.cc 2013-05-13 20:16:29.000000000 +0900 +++ xsh_ba09.cc 2013-05-13 20:16:29.000000000 +0900 @@ -106,12 +106,12 @@ redirection_list = +redirection; simple_command = (assignments >> command_arguments | assignments | command_arguments) >> -redirection_list; command = qi::omit[INDENT] >> simple_command; - list = newline_list >> list1 >> NEWLINE >> newline_list; + list = newline_list >> list1 >> (AMP | SEMI | NEWLINE) >> newline_list; compound_list = list | newline_list >> list1 ; - list1 = pipeline % ((AND_IF | OR_IF | NEWLINE) >> newline_list); + list1 = pipeline % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); newline_list = *NEWLINE; pipeline = command % (VLINE >> newline_list);
これでリスト構文を受理できるようになる。
サブシェル、グループ
次に、以下のようなサブシェルやグループを受理できるようなパーサ規則を書く。
true && (echo yes) || (echo no) true && { echo yes } || { echo no }
また、ついでにリダイレクションにてファイルデスクリプタ番号に対応させておく。
--- xsh_ba09.cc 2013-05-13 20:16:29.000000000 +0900 +++ xsh_ba10.cc 2013-05-13 20:16:29.000000000 +0900 @@ -27,15 +27,23 @@ ASSIGNMENT_WORD, assignments, pipe_here, + number, filename, redirection, redirection_list, simple_command, command, + shell_command, + subshell, + group_command, list, compound_list, + list0, list1, + list_terminator, newline_list, + simple_list1, + simple_list, pipeline, comment; qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; @@ -62,7 +70,29 @@ #define ANDGREAT qi::as_string["&>"] #define MINUS qi::as_string['-'] - top = *(compound_list || NEWLINE); +#define If qi::as_string["if"] +#define Then qi::as_string["then"] +#define Elif qi::as_string["elif"] +#define Else qi::as_string["else"] +#define Fi qi::as_string["fi"] +#define For qi::as_string["for"] +#define While qi::as_string["while"] +#define Until qi::as_string["until"] +#define Do qi::as_string["do"] +#define Done qi::as_string["done"] +#define Select qi::as_string["select"] +#define Case qi::as_string["case"] +#define Esac qi::as_string["esac"] +#define In qi::as_string["in"] +#define Function qi::as_string["function"] +#define Lparen qi::as_string['('] +#define Rparen qi::as_string[')'] +#define Lbrace qi::as_string['{'] +#define Rbrace qi::as_string['}'] +#define Parens qi::as_string["()"] +#define Bang qi::as_string['!'] + + top = *compound_list; NEWLINE = qi::omit[-comment] >> qi::no_skip[qi::eol]; INDENT = qi::no_skip[*qi::ascii::blank]; @@ -97,22 +127,35 @@ qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); + number = IO_NUMBER; filename = qi::omit[*qi::ascii::blank] >> argument; redirection = - (LESS | GREAT | DLESS | DGREAT | LESSAND | GREATAND | LESSGREAT | CLOBBER) >> filename | - io_here | + -number >> (LESS | GREAT | DLESS | DGREAT | LESSAND | GREATAND | LESSGREAT | CLOBBER) >> filename | + -number >> (LESSAND | GREATAND) >> (number | MINUS) | + -number >> io_here | ANDGREAT >> filename; redirection_list = +redirection; simple_command = (assignments >> command_arguments | assignments | command_arguments) >> -redirection_list; - command = qi::omit[INDENT] >> simple_command; - list = newline_list >> list1 >> (AMP | SEMI | NEWLINE) >> newline_list; + command = qi::omit[INDENT] >> (shell_command >> -redirection_list | simple_command); + shell_command + = subshell + | group_command + ; + subshell = Lparen >> compound_list >> Rparen; + group_command = Lbrace >> list >> Rbrace; + list = newline_list >> list0; compound_list = list | newline_list >> list1 ; + list0 = list1 >> (AMP | SEMI | NEWLINE) >> newline_list; list1 = pipeline % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); + list_terminator = NEWLINE | SEMI; newline_list = *NEWLINE; + simple_list = simple_list1 >> -(AMP | SEMI); + simple_list1 + = pipeline % ((AND_IF | OR_IF) >> newline_list | (AMP | SEMI)); pipeline = command % (VLINE >> newline_list); comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]]; @@ -139,6 +182,28 @@ #undef CLOBBER #undef ANDGREAT #undef MINUS + +#undef If +#undef Then +#undef Elif +#undef Else +#undef Fi +#undef For +#undef While +#undef Until +#undef Do +#undef Done +#undef Select +#undef Case +#undef Esac +#undef In +#undef Function +#undef Lparen +#undef Rparen +#undef Lbrace +#undef Rbrace +#undef Parens +#undef Bang } }; }
ここまで受理できるようになったので、残りは if
などの制御構文のみとなった。
制御構文
if then elif else fi
,
for in do done
,
while do done
,
until do done
,
select do done
,
case in ) ;; esac
,
[function] f()
の制御構文を受理できるパーサ規則を書く。
--- xsh_ba10.cc 2013-05-13 20:16:30.000000000 +0900 +++ xsh_ba11.cc 2013-05-13 20:16:30.000000000 +0900 @@ -16,6 +16,7 @@ qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> NEWLINE, INDENT, DELIMITER, + RESERVED_WORDS, variable_name, single_quoted_string, double_quoted_string, @@ -28,14 +29,28 @@ assignments, pipe_here, number, + name, + word, + word_list, filename, redirection, redirection_list, + simple_command_element, simple_command, command, shell_command, + for_command, + select_command, + case_command, + function_def, subshell, + if_command, group_command, + elif_clause, + case_clause_sequence, + case_clause, + pattern, + pattern_list, list, compound_list, list0, @@ -44,6 +59,7 @@ newline_list, simple_list1, simple_list, + pipeline_command, pipeline, comment; qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; @@ -97,6 +113,7 @@ NEWLINE = qi::omit[-comment] >> qi::no_skip[qi::eol]; INDENT = qi::no_skip[*qi::ascii::blank]; DELIMITER = qi::no_skip[+qi::ascii::blank]; + RESERVED_WORDS = /*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While; variable_name = qi::as_string[qi::no_skip[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]]; single_quoted_string = qi::no_skip[qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]]; @@ -106,7 +123,7 @@ qi::char_('"')]]; unquoted_string = qi::no_skip[qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]]; - command_name = +(single_quoted_string | double_quoted_string | unquoted_string); + command_name = +(single_quoted_string | double_quoted_string | unquoted_string) - RESERVED_WORDS; command_arguments = command_name >> *(qi::omit[DELIMITER] >> argument); argument = +(single_quoted_string | double_quoted_string | unquoted_string); @@ -128,6 +145,9 @@ qi::omit[qi::string(qi::labels::_a)]); number = IO_NUMBER; + name = qi::omit[DELIMITER] >> variable_name; + word = qi::omit[DELIMITER] >> argument; + word_list = qi::omit[DELIMITER] >> arguments; filename = qi::omit[*qi::ascii::blank] >> argument; redirection = @@ -135,27 +155,59 @@ -number >> (LESSAND | GREATAND) >> (number | MINUS) | -number >> io_here | ANDGREAT >> filename; + simple_command_element + = qi::omit[DELIMITER] >> ASSIGNMENT_WORD + | redirection + | qi::omit[DELIMITER] >> argument + ; redirection_list = +redirection; - simple_command = (assignments >> command_arguments | assignments | command_arguments) >> -redirection_list; + simple_command = (ASSIGNMENT_WORD | redirection | command_name) >> *simple_command_element; command = qi::omit[INDENT] >> (shell_command >> -redirection_list | simple_command); shell_command - = subshell + = for_command + | case_command + | (While | Until) >> compound_list >> Do >> compound_list >> Done + | select_command + | if_command + | subshell | group_command + | function_def ; + for_command + = For >> name >> (SEMI >> newline_list + | newline_list >> -(In >> -word_list >> list_terminator >> newline_list) + ) >> (Do | Lbrace) >> compound_list >> (Done | Rbrace); + select_command + = Select >> name >> (SEMI >> newline_list + | newline_list >> -(In >> -word_list >> list_terminator >> newline_list) + ) >> (Do | Lbrace) >> list >> (Done | Rbrace); + case_command + = Case >> word >> newline_list >> In >> (case_clause | -case_clause_sequence >> newline_list) >> Esac; + function_def = (-(Function >> qi::omit[DELIMITER]) >> variable_name >> Parens | Function >> name) >> + newline_list >> (group_command | subshell); subshell = Lparen >> compound_list >> Rparen; + if_command + = If >> compound_list >> Then >> compound_list >> -(Else >> compound_list) >> Fi + | If >> compound_list >> Then >> compound_list >> elif_clause >> Fi; group_command = Lbrace >> list >> Rbrace; + elif_clause = *(Elif >> compound_list >> Then >> compound_list) >> -(Else >> compound_list); + case_clause = -case_clause_sequence >> pattern_list; + pattern_list = newline_list >> qi::omit[INDENT] >> qi::omit[-Lparen] >> pattern >> qi::omit[Rparen] >> (compound_list | newline_list); + case_clause_sequence = +(!qi::eps | pattern_list >> DSEMI); + pattern = argument % VLINE; list = newline_list >> list0; compound_list = list | newline_list >> list1 ; - list0 = list1 >> (AMP | SEMI | NEWLINE) >> newline_list; - list1 = pipeline % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); + list0 = list1 >> (AMP | (!DSEMI >> SEMI) | NEWLINE) >> newline_list; + list1 = pipeline_command % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); list_terminator = NEWLINE | SEMI; newline_list = *NEWLINE; simple_list = simple_list1 >> -(AMP | SEMI); simple_list1 - = pipeline % ((AND_IF | OR_IF) >> newline_list | (AMP | SEMI)); + = pipeline_command % ((AND_IF | OR_IF) >> newline_list | (AMP | SEMI)); + pipeline_command = -Bang >> pipeline; pipeline = command % (VLINE >> newline_list); comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]];
これで bash シェルスクリプト構文を受理できるようになった。
そしてこれは Learning the bash Shell, Second Edition, BNF for bash 記載の BNF を Boost Spirit Qi で書き直したものになっている。最後に、全体像としてオリジナルの BNF も添えたコードを載せておく。ちなみに、!qi::eps
というのが稀に必要となっている箇所があるが、これがないと属性が重複してしまう現象を防いでいる。
#include <iostream> #include <string> //#define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/support_utree.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix.hpp> namespace qi = boost::spirit::qi; namespace spirit = boost::spirit; namespace client { template <typename Iterator> struct sh_parser : qi::grammar<Iterator, qi::ascii::space_type, spirit::utree(), qi::locals<std::string> > { qi::rule<Iterator, qi::ascii::space_type, spirit::utree(), qi::locals<std::string> > top, io_here; qi::rule<Iterator, qi::ascii::space_type, spirit::utree()> NEWLINE, INDENT, DELIMITER, RESERVED_WORDS, variable_name, single_quoted_string, double_quoted_string, unquoted_string, command_name, command_arguments, argument, arguments, ASSIGNMENT_WORD, assignments, pipe_here, number, name, word, word_list, filename, redirection, redirection_list, simple_command_element, simple_command, command, shell_command, for_command, select_command, case_command, function_def, subshell, if_command, group_command, elif_clause, case_clause_sequence, case_clause, pattern, pattern_list, list, compound_list, list0, list1, list_terminator, newline_list, simple_list1, simple_list, pipeline_command, pipeline, comment; qi::rule<Iterator, qi::ascii::space_type, std::string()> quoted_here_end, unquoted_here_end, here_end; sh_parser() : sh_parser::base_type(top) { #define IO_NUMBER qi::as_string[+qi::digit] #define AMP qi::as_string['&'] #define VLINE qi::as_string['|'] #define SEMI qi::as_string[';'] #define AND_IF qi::as_string["&&"] #define OR_IF qi::as_string["||"] #define DSEMI qi::as_string[";;"] #define LESS qi::as_string['<'] #define GREAT qi::as_string['>'] #define DLESS qi::as_string["<<"] #define DGREAT qi::as_string[">>"] #define LESSAND qi::as_string["<&"] #define GREATAND qi::as_string[">&"] #define LESSGREAT qi::as_string["<>"] #define CLOBBER qi::as_string[">|"] #define ANDGREAT qi::as_string["&>"] #define MINUS qi::as_string['-'] #define If qi::as_string["if"] #define Then qi::as_string["then"] #define Elif qi::as_string["elif"] #define Else qi::as_string["else"] #define Fi qi::as_string["fi"] #define For qi::as_string["for"] #define While qi::as_string["while"] #define Until qi::as_string["until"] #define Do qi::as_string["do"] #define Done qi::as_string["done"] #define Select qi::as_string["select"] #define Case qi::as_string["case"] #define Esac qi::as_string["esac"] #define In qi::as_string["in"] #define Function qi::as_string["function"] #define Lparen qi::as_string['('] #define Rparen qi::as_string[')'] #define Lbrace qi::as_string['{'] #define Rbrace qi::as_string['}'] #define Parens qi::as_string["()"] #define Bang qi::as_string['!'] top = *compound_list; NEWLINE = qi::omit[-comment] >> qi::no_skip[qi::eol]; INDENT = qi::no_skip[*qi::ascii::blank]; DELIMITER = qi::no_skip[+qi::ascii::blank]; RESERVED_WORDS = /*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While; variable_name = qi::as_string[qi::no_skip[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))]]; single_quoted_string = qi::no_skip[qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")]]; double_quoted_string = qi::no_skip[qi::as_string[qi::char_('"') >> *( (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"')) ) >> qi::char_('"')]]; unquoted_string = qi::no_skip[qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")]]; command_name = +(single_quoted_string | double_quoted_string | unquoted_string) - RESERVED_WORDS; command_arguments = command_name >> *(qi::omit[DELIMITER] >> argument); argument = +(single_quoted_string | double_quoted_string | unquoted_string); arguments = argument >> *(qi::omit[DELIMITER] >> argument); ASSIGNMENT_WORD = variable_name >> qi::no_skip['='] >> -argument; assignments = ASSIGNMENT_WORD >> *(!qi::eps | qi::omit[DELIMITER] >> ASSIGNMENT_WORD); pipe_here = VLINE >> *pipeline; quoted_here_end = '\'' >> qi::as_string[qi::no_skip[+(qi::char_ - '\'')]][qi::labels::_val = qi::labels::_1] >> '\''; unquoted_here_end %= qi::as_string[qi::no_skip[+(qi::char_ - qi::ascii::space)]]; here_end %= qi::as_string[quoted_here_end | unquoted_here_end]; io_here %= DLESS >> qi::matches[qi::no_skip[MINUS]] >> here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> qi::omit[-comment] >> qi::no_skip[qi::eol] >> ((qi::as_string[*qi::no_skip[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]]) >> qi::omit[qi::no_skip[qi::eol] >> qi::string(qi::labels::_a)] | qi::omit[qi::string(qi::labels::_a)]); number = IO_NUMBER; name = qi::omit[DELIMITER] >> variable_name; word = qi::omit[DELIMITER] >> argument; word_list = qi::omit[DELIMITER] >> arguments; filename = qi::omit[*qi::ascii::blank] >> argument; #if 0 /* !use EBNF-like qi */ redirection = LESS >> filename | number >> LESS >> filename | GREAT >> filename | number >> GREAT >> filename | DGREAT >> filename | number >> DGREAT >> filename | LESSAND >> number | number >> LESSAND >> number | GREATAND >> number | number >> GREATAND >> number | LESSAND >> filename | number >> LESSAND >> filename | GREATAND >> filename | number >> GREATAND >> filename | LESSAND >> MINUS | number >> LESSAND >> MINUS | GREATAND >> MINUS | number >> GREATAND >> MINUS | LESSGREAT >> filename | number >> LESSGREAT >> filename | CLOBBER >> filename | number >> CLOBBER >> filename | io_here | number >> io_here | ANDGREAT >> filename ; #else redirection = -number >> (LESS | GREAT | DLESS | DGREAT | LESSAND | GREATAND | LESSGREAT | CLOBBER) >> filename | -number >> (LESSAND | GREATAND) >> (number | MINUS) | -number >> io_here | ANDGREAT >> filename; #endif #if 0 /* use recursive qi */ simple_command_element = command_name | ASSIGNMENT_WORD | redirection ; redirection_list = redirection | redirection_list >> redirection ; simple_command = simple_command_element | simple_command >> simple_command_element ; #else simple_command_element = qi::omit[DELIMITER] >> ASSIGNMENT_WORD | redirection | qi::omit[DELIMITER] >> argument ; redirection_list = +redirection; simple_command = (ASSIGNMENT_WORD | redirection | command_name) >> *simple_command_element; #endif #if 0 /* !use EBNF-like qi */ command = qi::omit[INDENT] >> shell_command >> redirection_list | qi::omit[INDENT] >> shell_command | qi::omit[INDENT] >> simple_command ; #else command = qi::omit[INDENT] >> (shell_command >> -redirection_list | simple_command); #endif #if 0 /* !use EBNF-like qi */ shell_command = for_command | case_command | While >> compound_list >> Do >> compound_list >> Done | Until >> compound_list >> Do >> compound_list >> Done | select_command | if_command | subshell | group_command | function_def ; #else shell_command = for_command | case_command | (While | Until) >> compound_list >> Do >> compound_list >> Done | select_command | if_command | subshell | group_command | function_def ; #endif #if 0 /* !use EBNF-like qi */ for_command = For >> name >> newline_list >> Do >> compound_list >> Done | For >> name >> newline_list >> Lbrace >> compound_list >> Rbrace | For >> name >> SEMI >> newline_list >> Do >> compound_list >> Done | For >> name >> SEMI >> newline_list >> Lbrace >> compound_list >> Rbrace | For >> name >> newline_list >> In >> list_terminator >> newline_list >> Do >> compound_list >> Done | For >> name >> newline_list >> In >> list_terminator >> newline_list >> Lbrace >> compound_list >> Rbrace | For >> name >> newline_list >> In >> word_list >> list_terminator >> newline_list >> Do >> compound_list >> Done | For >> name >> newline_list >> In >> word_list >> list_terminator >> newline_list >> Lbrace >> compound_list >> Rbrace ; #else for_command = For >> name >> (SEMI >> newline_list | newline_list >> -(In >> -word_list >> list_terminator >> newline_list) ) >> (Do | Lbrace) >> compound_list >> (Done | Rbrace); #endif #if 0 /* !use EBNF-like qi */ select_command = Select >> name >> newline_list >> Do >> list >> Done | Select >> name >> newline_list >> Lbrace >> list >> Rbrace | Select >> name >> SEMI >> newline_list >> Do >> list >> Done | Select >> name >> SEMI >> newline_list >> Lbrace >> list >> Rbrace | Select >> name >> newline_list >> In >> word_list >> list_terminator >> newline_list >> Do >> list >> Done | Select >> name >> newline_list >> In >> word_list >> list_terminator >> newline_list >> Lbrace >> list >> Rbrace ; #else select_command = Select >> name >> (SEMI >> newline_list | newline_list >> -(In >> -word_list >> list_terminator >> newline_list) ) >> (Do | Lbrace) >> list >> (Done | Rbrace); #endif #if 0 /* !use EBNF-like qi */ case_command = Case >> word >> newline_list >> In >> case_clause >> Esac | Case >> word >> newline_list >> In >> case_clause_sequence >> newline_list >> Esac | Case >> word >> newline_list >> In >> newline_list >> Esac ; #else case_command = Case >> word >> newline_list >> In >> (case_clause | -case_clause_sequence >> newline_list) >> Esac; #endif #if 0 /* !use EBNF-like qi */ function_def = variable_name >> Parens >> newline_list >> (group_command | subshell) | Function >> name >> Parens >> newline_list >> (group_command | subshell) | Function >> name >> newline_list >> (group_command | subshell) ; #else function_def = (-(Function >> qi::omit[DELIMITER]) >> variable_name >> Parens | Function >> name) >> newline_list >> (group_command | subshell); #endif subshell = Lparen >> compound_list >> Rparen; #if 0 /* !use EBNF-like qi */ if_command = If >> compound_list >> Then >> compound_list >> Else >> compound_list >> Fi | If >> compound_list >> Then >> compound_list >> elif_clause >> Fi | If >> compound_list >> Then >> compound_list >> Fi ; #else if_command = If >> compound_list >> Then >> compound_list >> -(Else >> compound_list) >> Fi | If >> compound_list >> Then >> compound_list >> elif_clause >> Fi; #endif group_command = Lbrace >> list >> Rbrace; #if 0 /* use recursive qi */ elif_clause = Elif >> compound_list >> Then >> compound_list >> Else >> compound_list | Elif >> compound_list >> Then >> compound_list >> elif_clause | Elif >> compound_list >> Then >> compound_list ; #else elif_clause = *(Elif >> compound_list >> Then >> compound_list) >> -(Else >> compound_list); #endif #if 0 /* !use EBNF-like qi */ case_clause = case_clause_sequence >> pattern_list | pattern_list ; pattern_list = newline_list >> qi::omit[INDENT] >> pattern >> qi::omit[Rparen] >> compound_list | newline_list >> qi::omit[INDENT] >> pattern >> qi::omit[Rparen] >> newline_list | newline_list >> qi::omit[INDENT] >> qi::omit[Lparen] >> pattern >> qi::omit[Rparen] >> compound_list | newline_list >> qi::omit[INDENT] >> qi::omit[Lparen] >> pattern >> qi::omit[Rparen] >> newline_list ; #else case_clause = -case_clause_sequence >> pattern_list; pattern_list = newline_list >> qi::omit[INDENT] >> qi::omit[-Lparen] >> pattern >> qi::omit[Rparen] >> (compound_list | newline_list); #endif #if 0 /* use recursive qi */ case_clause_sequence = pattern_list >> DSEMI | case_clause_sequence >> pattern_list >> DSEMI ; pattern = word | pattern >> VLINE >> word ; #else case_clause_sequence = +(!qi::eps | pattern_list >> DSEMI); pattern = argument % VLINE; #endif list = newline_list >> list0; compound_list = list | newline_list >> list1 ; #if 0 /* !use EBNF-like qi */ list0 = list1 >> AMP >> newline_list | list1 >> (!DSEMI >> SEMI) >> newline_list | list1 >> NEWLINE >> newline_list ; #if 0 /* use recursive qi */ list1 = list1 >> AND_IF >> newline_list >> list1 | list1 >> OR_IF >> newline_list >> list1 | list1 >> AMP >> newline_list >> list1 | list1 >> SEMI >> newline_list >> list1 | list1 >> NEWLINE >> newline_list >> list1 | pipeline_command ; #else list1 = pipeline_command % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); #endif list_terminator = NEWLINE | SEMI ; #else list0 = list1 >> (AMP | (!DSEMI >> SEMI) | NEWLINE) >> newline_list; #if 0 /* use recursive qi */ list1 = list1 >> (AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list >> list1 | pipeline_command ; #else list1 = pipeline_command % ((AND_IF | OR_IF | AMP | SEMI | NEWLINE) >> newline_list); #endif list_terminator = NEWLINE | SEMI; #endif #if 0 /* use recursive qi */ newline_list = qi::eps | newline_list >> NEWLINE ; #else newline_list = *NEWLINE; #endif #if 0 /* !use EBNF-like qi */ simple_list = simple_list1 | simple_list1 >> AMP | simple_list1 >> SEMI ; simple_list1 = simple_list1 >> AND_IF >> newline_list >> simple_list1 | simple_list1 >> OR_IF >> newline_list >> simple_list1 | simple_list1 >> AMP >> simple_list1 | simple_list1 >> SEMI >> simple_list1 | pipeline_command ; pipeline_command = pipeline | Bang >> pipeline ; #else simple_list = simple_list1 >> -(AMP | SEMI); #if 0 /* use recursive qi */ simple_list1 = simple_list1 >> ((AND_IF | OR_IF) >> newline_list | (AMP | SEMI)) >> simple_list1 | pipeline_command ; #else simple_list1 = pipeline_command % ((AND_IF | OR_IF) >> newline_list | (AMP | SEMI)); #endif pipeline_command = -Bang >> pipeline; #endif #if 0 /* use recursive qi */ pipeline = pipeline >> VLINE >> newline_list >> pipeline | command ; #else pipeline = command % (VLINE >> newline_list); #endif comment = qi::no_skip[*qi::ascii::blank >> qi::as_string['#' >> *(~qi::char_("\r\n"))]]; BOOST_SPIRIT_DEBUG_NODE(top); #undef IO_NUMBER #undef AMP #undef VLINE #undef SEMI #undef AND_IF #undef OR_IF #undef DSEMI #undef LESS #undef GREAT #undef DLESS #undef DGREAT #undef LESSAND #undef GREATAND #undef LESSGREAT #undef CLOBBER #undef ANDGREAT #undef MINUS #undef If #undef Then #undef Elif #undef Else #undef Fi #undef For #undef While #undef Until #undef Do #undef Done #undef Select #undef Case #undef Esac #undef In #undef Function #undef Lparen #undef Rparen #undef Lbrace #undef Rbrace #undef Parens #undef Bang } }; } int main() { client::sh_parser<std::string::const_iterator> sh_parser; std::string str; std::cin.unsetf(std::ios::skipws); std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str)); { std::string::const_iterator it = str.begin(), end = str.end(); spirit::utree ut; bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut); std::cout << str << std::endl; if (r && it == end) { std::cout << "succeeded:\t" << ut << std::endl; } else { std::cout << "failed:\t" << std::string(it, end) << std::endl; } } return 0; }