以前の Boost Spirit V2+Universal Tree における計算機の例 では一行毎の構文解析であったが、複数行に渡る構文解析の例として、シェルスクリプト構文解析を段階的に実装してみよう。但し、先の Boost Spirit V2+Universal Tree における bash スクリプトパーサの例 では pre-skipping を前提としたが、ここでは明示的に skipping を指定しつつ、今度は POSIX shell の BNF を最終的に目指した例を示していく。
Spirit V2+Universal Tree の基本形
複数行に対応した Spirit V2+Universal Tree の基本形で pre-skipping を前提としないものは以下のようになる。
#include <iostream>
#include <string>
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;
namespace client {
template <typename Iterator>
struct a_parser : qi::grammar<Iterator, spirit::utree()> {
qi::rule<Iterator, spirit::utree()> top;
a_parser() : a_parser::base_type(top)
{
top = *~qi::lit('\t'); // ここのパーサ規則を書く
BOOST_SPIRIT_DEBUG_NODE(top);
}
};
}
int main()
{
client::a_parser<std::string::const_iterator> a_parser;
std::string str;
std::cin.unsetf(std::ios::skipws);
std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
{
std::string::const_iterator it = str.begin(), end = str.end();
spirit::utree ut;
bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
if (r && it == end) {
std::cout << "succeeded:\t" << ut << std::endl;
}
else {
std::cout << "failed:\t" << std::string(it, end) << std::endl;
return 1;
}
}
return 0;
}
これを雛形としてシェルスクリプトの構文解析を肉付けしていこう。以降、Unified diff 形式でコードの変遷を表現するものとする。
コメント処理、コマンド行、変数代入、シングルクォート
例えば、以下のようなスクリプトを受理できるようなパーサ規則を書く。
#!/bin/sh i=0 message='hello world'! echo $i $message LC_ALL=C time
--- a_multiline_parser+utree00.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh00.cc 2013-05-14 14:24:45.000000000 +0900
@@ -9,11 +9,43 @@
namespace client {
template <typename Iterator>
- struct a_parser : qi::grammar<Iterator, spirit::utree()> {
- qi::rule<Iterator, spirit::utree()> top;
- a_parser() : a_parser::base_type(top)
+ struct sh_parser : qi::grammar<Iterator, spirit::utree()> {
+ qi::rule<Iterator, spirit::utree()> top,
+ variable_name,
+ single_quoted_string,
+ unquoted_string,
+ command_name,
+ command_arguments,
+ argument,
+ arguments,
+ assignment,
+ assignments,
+ nl,
+ nl_list,
+ comment;
+ sh_parser() : sh_parser::base_type(top)
{
- top = *~qi::lit('\t');
+ top = *(*nl_list >>
+ ((qi::omit[*qi::ascii::blank] >>
+ (assignments >> command_arguments | assignments | command_arguments)) %
+ nl_list) >> *nl_list || nl);
+
+ variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
+ single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
+ unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
+
+ command_name = +(single_quoted_string | unquoted_string);
+ command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
+
+ argument = +(single_quoted_string | unquoted_string);
+ arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);
+
+ assignment = variable_name >> '=' >> -argument;
+ assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
+
+ nl = qi::omit[*comment] >> qi::eol;
+ nl_list = +(qi::omit[*comment] >> qi::eol);
+ comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
BOOST_SPIRIT_DEBUG_NODE(top);
}
@@ -21,20 +53,20 @@
}
int main()
{
- client::a_parser<std::string::const_iterator> a_parser;
+ client::sh_parser<std::string::const_iterator> sh_parser;
std::string str;
std::cin.unsetf(std::ios::skipws);
std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
{
std::string::const_iterator it = str.begin(), end = str.end();
spirit::utree ut;
- bool r = phrase_parse(it, end, a_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
+ bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
+ std::cout << str << std::endl;
if (r && it == end) {
std::cout << "succeeded:\t" << ut << std::endl;
}
else {
std::cout << "failed:\t" << std::string(it, end) << std::endl;
- return 1;
}
}
return 0;
これでコメント処理、コマンド行、変数代入、シングルクォートされた任意の文字列を含む文字列や、空白区切りによる文字列のリストのパースが可能となっている。
ダブルクォート
次に以下のような、ダブルクォートされた文字列を受理できるようなパーサ規則を書く。
message="hello \"world\"!" echo "$message"
--- xsh00.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh01.cc 2013-05-14 14:24:45.000000000 +0900
@@ -13,6 +13,7 @@
qi::rule<Iterator, spirit::utree()> top,
variable_name,
single_quoted_string,
+ double_quoted_string,
unquoted_string,
command_name,
command_arguments,
@@ -32,12 +33,16 @@
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
+ double_quoted_string = qi::as_string[qi::char_('"') >> *(
+ (qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"'))
+ ) >>
+ qi::char_('"')];
unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
- command_name = +(single_quoted_string | unquoted_string);
+ command_name = +(single_quoted_string | double_quoted_string | unquoted_string);
command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
- argument = +(single_quoted_string | unquoted_string);
+ argument = +(single_quoted_string | double_quoted_string | unquoted_string);
arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);
assignment = variable_name >> '=' >> -argument;
これでバックスラッシュでエスケープされていないダブルクォーテーションでのみ閉じられたダブルクォート文字列のパースが可能となっている。
ヒアドキュメント
次に、以下のようなヒアドキュメントの入力を受理するようなパーサ規則を書くが、まずは終端文字列を「EOH」に固定したものから試みる。
cat <<EOH
usage:
$0 < filename
EOH
ちなみに、ヒアドキュメントでタブを除去する「<<-」のときには真、さもなくば偽を付加するものとする。
--- xsh01.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh02.cc 2013-05-14 14:24:45.000000000 +0900
@@ -21,14 +21,19 @@
arguments,
assignment,
assignments,
+ io_here,
nl,
nl_list,
comment;
+ qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
sh_parser() : sh_parser::base_type(top)
{
+ DLESS = qi::as<std::string>()["<<"];
+ MINUS = '-';
+
top = *(*nl_list >>
((qi::omit[*qi::ascii::blank] >>
- (assignments >> command_arguments | assignments | command_arguments)) %
+ (assignments >> command_arguments | assignments | command_arguments) >> -io_here) %
nl_list) >> *nl_list || nl);
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
@@ -48,6 +53,12 @@
assignment = variable_name >> '=' >> -argument;
assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
+ io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
+ qi::lit("EOH") >> nl >>
+ (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >>
+ qi::omit[qi::eol >> qi::lit("EOH")] |
+ qi::omit[qi::lit("EOH")]);
+
nl = qi::omit[*comment] >> qi::eol;
nl_list = +(qi::omit[*comment] >> qi::eol);
comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
このままでは「EOH」以外の終端文字列には対応できていない。よって、io_here にローカル変数 qi::locals<std::string> を使用可能にして、それを参照するようにする。
--- xsh02.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh03.cc 2013-05-14 14:24:45.000000000 +0900
@@ -3,14 +3,15 @@
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>
+#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;
namespace client {
template <typename Iterator>
- struct sh_parser : qi::grammar<Iterator, spirit::utree()> {
- qi::rule<Iterator, spirit::utree()> top,
+ struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > {
+ qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top,
variable_name,
single_quoted_string,
double_quoted_string,
@@ -53,11 +54,11 @@
assignment = variable_name >> '=' >> -argument;
assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
- io_here = qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
- qi::lit("EOH") >> nl >>
- (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::lit("EOH") >> qi::eol))]) >>
- qi::omit[qi::eol >> qi::lit("EOH")] |
- qi::omit[qi::lit("EOH")]);
+ io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
+ qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >>
+ (*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
+ qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
+ qi::omit[qi::string(qi::labels::_a)]);
nl = qi::omit[*comment] >> qi::eol;
nl_list = +(qi::omit[*comment] >> qi::eol);
これで任意の終端文字列でヒアドキュメントが受理できるようになる。さらに、以下のような変数展開を抑止するヒアドキュメントにも対応しよう。
cat <<'EOF' $1 $2 ... EOF
--- xsh03.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh04.cc 2013-05-14 14:24:45.000000000 +0900
@@ -26,6 +26,7 @@
nl,
nl_list,
comment;
+ qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
sh_parser() : sh_parser::base_type(top)
{
@@ -54,8 +55,12 @@
assignment = variable_name >> '=' >> -argument;
assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
+ quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
+ unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
+ here_end %= quoted_here_end | unquoted_here_end;
+
io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
- qi::as_string[(+(qi::char_ - qi::ascii::space))][qi::labels::_a = qi::labels::_1] >> nl >>
+ here_end[qi::labels::_a = qi::labels::_1] >> nl >>
(*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
qi::omit[qi::string(qi::labels::_a)]);
ちなみに規則で「=」の代わりに「%=」とすると「_val = _1」を省略できる。
リダイレクション
次に、以下のようなリダイレクションを受理するパーサ規則を書く。
sort < filename printf '' > filename
--- xsh04.cc 2013-05-14 14:24:45.000000000 +0900
+++ xsh05.cc 2013-05-14 14:24:45.000000000 +0900
@@ -23,19 +23,43 @@
assignment,
assignments,
io_here,
+ filename,
+ io_redirect,
+ io_file,
nl,
nl_list,
comment;
qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
- qi::rule<Iterator, spirit::utree()> DLESS, MINUS;
+ qi::rule<Iterator, spirit::utree()> IO_NUMBER;
+ qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
+ qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
+ qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
sh_parser() : sh_parser::base_type(top)
{
+ IO_NUMBER = +qi::digit;
+
+ AMP = '&';
+ VLINE = '|';
+ SEMI = ';';
+
+ AND_IF = qi::as<std::string>()["&&"];
+ OR_IF = qi::as<std::string>()["||"];
+ DSEMI = qi::as<std::string>()[";;"];
+
+ LESS = '<';
+ GREAT = '>';
DLESS = qi::as<std::string>()["<<"];
+ DGREAT = qi::as<std::string>()[">>"];
+ LESSAND = qi::as<std::string>()["<&"];
+ GREATAND = qi::as<std::string>()[">&"];
+ LESSGREAT = qi::as<std::string>()["<>"];
+ CLOBBER = qi::as<std::string>()[">|"];
+ ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
top = *(*nl_list >>
((qi::omit[*qi::ascii::blank] >>
- (assignments >> command_arguments | assignments | command_arguments) >> -io_here) %
+ (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
nl_list) >> *nl_list || nl);
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
@@ -65,6 +89,11 @@
qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
qi::omit[qi::string(qi::labels::_a)]);
+ filename = qi::omit[*qi::ascii::blank] >> argument;
+
+ io_redirect = io_file | io_here;
+ io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
+
nl = qi::omit[*comment] >> qi::eol;
nl_list = +(qi::omit[*comment] >> qi::eol);
comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
これでリダイレクションの構文を受理できるようになる。
パイプライン
次に、以下のようなパイプラインを受理できるパーサ規則を書く。
echo | cat | sort | uniq
--- xsh05.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh06.cc 2013-05-14 14:24:46.000000000 +0900
@@ -24,6 +24,7 @@
assignments,
io_here,
filename,
+ pipeline,
io_redirect,
io_file,
nl,
@@ -57,10 +58,7 @@
ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
- top = *(*nl_list >>
- ((qi::omit[*qi::ascii::blank] >>
- (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
- nl_list) >> *nl_list || nl);
+ top = *(*nl >> (pipeline % nl_list) >> *nl || nl);
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -91,6 +89,10 @@
filename = qi::omit[*qi::ascii::blank] >> argument;
+ pipeline = (qi::omit[*qi::ascii::blank] >>
+ (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
+ (qi::skip(qi::ascii::blank)[VLINE] >> *nl);
+
io_redirect = io_file | io_here;
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
しかしこれでは、以下のようなヒアドキュメント直後のパイプラインには未対応である。
cat <<EOF | sort
:
EOF
よって、以下のように pipe_here 規則を io_here 規則に挿入する。
--- xsh06.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh07.cc 2013-05-14 14:24:46.000000000 +0900
@@ -22,6 +22,7 @@
arguments,
assignment,
assignments,
+ pipe_here,
io_here,
filename,
pipeline,
@@ -77,12 +78,14 @@
assignment = variable_name >> '=' >> -argument;
assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
+ pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline;
+
quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
here_end %= quoted_here_end | unquoted_here_end;
io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
- here_end[qi::labels::_a = qi::labels::_1] >> nl >>
+ here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >>
(*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
qi::omit[qi::string(qi::labels::_a)]);
これでパイプラインの構文を受理できるようになる。
リスト
次に、以下のようなリストを受理できるようなパーサ規則を書く。
true && echo yes || echo no
--- xsh07.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh08.cc 2013-05-14 14:24:46.000000000 +0900
@@ -26,6 +26,7 @@
io_here,
filename,
pipeline,
+ and_or,
io_redirect,
io_file,
nl,
@@ -59,7 +60,7 @@
ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
- top = *(*nl >> (pipeline % nl_list) >> *nl || nl);
+ top = *(*nl >> (and_or % nl_list) >> *nl || nl);
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -95,6 +96,7 @@
pipeline = (qi::omit[*qi::ascii::blank] >>
(assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
(qi::skip(qi::ascii::blank)[VLINE] >> *nl);
+ and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
io_redirect = io_file | io_here;
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
さらに、次のようなリストを受理できるようなパーサ規則を書く。
echo & i=0; LC_ALL=C date
--- xsh08.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh09.cc 2013-05-14 14:24:46.000000000 +0900
@@ -25,8 +25,11 @@
pipe_here,
io_here,
filename,
+ separator_op,
+ separator,
pipeline,
and_or,
+ list,
io_redirect,
io_file,
nl,
@@ -60,7 +63,7 @@
ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
- top = *(*nl >> (and_or % nl_list) >> *nl || nl);
+ top = *(list || nl);
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -93,10 +96,14 @@
filename = qi::omit[*qi::ascii::blank] >> argument;
+ separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
+ separator = (separator_op >> *nl | nl_list);
+
pipeline = (qi::omit[*qi::ascii::blank] >>
(assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
(qi::skip(qi::ascii::blank)[VLINE] >> *nl);
and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
+ list = and_or % separator_op;
io_redirect = io_file | io_here;
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
これでリスト構文を受理できるようになる。
サブシェル、グループ
次に、以下のようなサブシェルやグループを受理できるようなパーサ規則を書く。
true && (echo yes) || (echo no)
true && {
echo yes
} || {
echo no
}
また、ついでにリダイレクションにてファイルデスクリプタ番号に対応させておく。
--- xsh09.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh10.cc 2013-05-14 14:24:46.000000000 +0900
@@ -30,8 +30,11 @@
pipeline,
and_or,
list,
+ term,
+ compound_list,
io_redirect,
io_file,
+ command,
nl,
nl_list,
comment;
@@ -40,6 +43,7 @@
qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
+ qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang;
sh_parser() : sh_parser::base_type(top)
{
IO_NUMBER = +qi::digit;
@@ -63,7 +67,27 @@
ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
- top = *(list || nl);
+ If = qi::as<std::string>()["if"];
+ Then = qi::as<std::string>()["then"];
+ Elif = qi::as<std::string>()["elif"];
+ Else = qi::as<std::string>()["else"];
+ Fi = qi::as<std::string>()["fi"];
+ For = qi::as<std::string>()["for"];
+ While = qi::as<std::string>()["while"];
+ Until = qi::as<std::string>()["until"];
+ Do = qi::as<std::string>()["do"];
+ Done = qi::as<std::string>()["done"];
+ Case = qi::as<std::string>()["case"];
+ Esac = qi::as<std::string>()["esac"];
+ In = qi::as<std::string>()["in"];
+ Lparen = '(';
+ Rparen = ')';
+ Lbrace = '{';
+ Rbrace = '}';
+ Parens = qi::as<std::string>()["()"];
+ Bang = '!';
+
+ top = *compound_list;
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
@@ -99,15 +123,23 @@
separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
separator = (separator_op >> *nl | nl_list);
- pipeline = (qi::omit[*qi::ascii::blank] >>
- (assignments >> command_arguments | assignments | command_arguments) >> *io_redirect) %
+ pipeline = command %
(qi::skip(qi::ascii::blank)[VLINE] >> *nl);
and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
list = and_or % separator_op;
+ term = and_or % separator;
+ compound_list = *nl >> term >> -separator;
- io_redirect = io_file | io_here;
+ io_redirect = -IO_NUMBER >> (io_file | io_here);
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
+ command = qi::omit[*qi::ascii::blank] >>
+ (
+ (assignments >> command_arguments | assignments | command_arguments) |
+ qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
+ qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen]
+ ) >> *io_redirect;
+
nl = qi::omit[*comment] >> qi::eol;
nl_list = +(qi::omit[*comment] >> qi::eol);
comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
ここまで受理できるようになったので、残りは if などの制御構文のみとなった。
制御構文
if then elif else fi,
for in do done,
while do done,
until do done,
case in ) ;; esac,
function()
の制御構文を受理できるパーサ規則を書く。
--- xsh10.cc 2013-05-14 14:24:46.000000000 +0900
+++ xsh.cc 2013-05-14 14:24:46.000000000 +0900
@@ -24,9 +24,13 @@
assignments,
pipe_here,
io_here,
+ name,
+ word,
+ wordlist,
filename,
separator_op,
separator,
+ sequential_sep,
pipeline,
and_or,
list,
@@ -34,6 +38,13 @@
compound_list,
io_redirect,
io_file,
+ do_group,
+ for_clause,
+ case_item, case_item_ns, pattern,
+ case_clause,
+ if_clause,
+ compound_command,
+ function_definition,
command,
nl,
nl_list,
@@ -97,7 +108,8 @@
qi::char_('"')];
unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
- command_name = +(single_quoted_string | double_quoted_string | unquoted_string);
+ command_name = +(single_quoted_string | double_quoted_string | unquoted_string) -
+ (/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While);
command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
argument = +(single_quoted_string | double_quoted_string | unquoted_string);
@@ -118,12 +130,16 @@
qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
qi::omit[qi::string(qi::labels::_a)]);
+ name = qi::omit[+qi::ascii::blank] >> variable_name;
+ word = qi::omit[+qi::ascii::blank] >> argument;
+ wordlist = qi::omit[+qi::ascii::blank] >> arguments;
filename = qi::omit[*qi::ascii::blank] >> argument;
- separator_op = qi::skip(qi::ascii::blank)[AMP | SEMI];
+ separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI];
separator = (separator_op >> *nl | nl_list);
+ sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list;
- pipeline = command %
+ pipeline = -Bang >> command %
(qi::skip(qi::ascii::blank)[VLINE] >> *nl);
and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
list = and_or % separator_op;
@@ -133,11 +149,47 @@
io_redirect = -IO_NUMBER >> (io_file | io_here);
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
+ do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done];
+ for_clause = qi::skip(qi::ascii::blank)[For] >> name >>
+ (SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group;
+ pattern = argument >> *(qi::omit[VLINE] >> argument);
+
+ case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
+ (compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl;
+ case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
+ -compound_list >> *nl;
+
+ case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >>
+ qi::skip(qi::ascii::space)[In] >> *nl >>
+ *case_item >> -case_item_ns >>
+ qi::skip(qi::ascii::space)[Esac];
+
+ if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >>
+ qi::skip(qi::ascii::blank)[Then] >> compound_list >>
+ *(
+ qi::skip(qi::ascii::blank)[Elif] >> compound_list >>
+ qi::skip(qi::ascii::blank)[Then] >> compound_list
+ ) >>
+ -(
+ qi::skip(qi::ascii::blank)[Else] >> compound_list
+ ) >>
+ qi::skip(qi::ascii::space)[Fi];
+
+ compound_command =
+ qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
+ qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] |
+ for_clause |
+ case_clause |
+ if_clause |
+ qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group |
+ qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group;
+ function_definition =
+ variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command;
command = qi::omit[*qi::ascii::blank] >>
(
+ function_definition |
(assignments >> command_arguments | assignments | command_arguments) |
- qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
- qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen]
+ compound_command
) >> *io_redirect;
nl = qi::omit[*comment] >> qi::eol;
これで POSIX シェルスクリプト構文を受理できるようになった。
そしてこれは Shell Command Language 記載の BNF を Boost Spirit Qi で書き直したものになっている。最後に、全体像としてすべてのコードを載せておく。ちなみに、!qi::eps というのが稀に必要となっている箇所があるが、これがないと属性が重複してしまう現象を防いでいる。
#include <iostream>
#include <string>
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/support_utree.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace qi = boost::spirit::qi;
namespace spirit = boost::spirit;
namespace client {
template <typename Iterator>
struct sh_parser : qi::grammar<Iterator, spirit::utree(), qi::locals<std::string> > {
qi::rule<Iterator, spirit::utree(), qi::locals<std::string> > top,
variable_name,
single_quoted_string,
double_quoted_string,
unquoted_string,
command_name,
command_arguments,
argument,
arguments,
assignment,
assignments,
pipe_here,
io_here,
name,
word,
wordlist,
filename,
separator_op,
separator,
sequential_sep,
pipeline,
and_or,
list,
term,
compound_list,
io_redirect,
io_file,
do_group,
for_clause,
case_item, case_item_ns, pattern,
case_clause,
if_clause,
compound_command,
function_definition,
command,
nl,
nl_list,
comment;
qi::rule<Iterator, std::string()> here_end, quoted_here_end, unquoted_here_end;
qi::rule<Iterator, spirit::utree()> IO_NUMBER;
qi::rule<Iterator, spirit::utree()> AMP, VLINE, SEMI;
qi::rule<Iterator, spirit::utree()> AND_IF, OR_IF, DSEMI;
qi::rule<Iterator, spirit::utree()> LESS, GREAT, DLESS, DGREAT, LESSAND, GREATAND, LESSGREAT, CLOBBER, ANDGREAT, MINUS;
qi::rule<Iterator, spirit::utree()> If, Then, Elif, Else, Fi, For, While, Until, Do, Done, Case, Esac, In, Lparen, Rparen, Lbrace, Rbrace, Parens, Bang;
sh_parser() : sh_parser::base_type(top)
{
IO_NUMBER = +qi::digit;
AMP = '&';
VLINE = '|';
SEMI = ';';
AND_IF = qi::as<std::string>()["&&"];
OR_IF = qi::as<std::string>()["||"];
DSEMI = qi::as<std::string>()[";;"];
LESS = '<';
GREAT = '>';
DLESS = qi::as<std::string>()["<<"];
DGREAT = qi::as<std::string>()[">>"];
LESSAND = qi::as<std::string>()["<&"];
GREATAND = qi::as<std::string>()[">&"];
LESSGREAT = qi::as<std::string>()["<>"];
CLOBBER = qi::as<std::string>()[">|"];
ANDGREAT = qi::as<std::string>()["&>"];
MINUS = '-';
If = qi::as<std::string>()["if"];
Then = qi::as<std::string>()["then"];
Elif = qi::as<std::string>()["elif"];
Else = qi::as<std::string>()["else"];
Fi = qi::as<std::string>()["fi"];
For = qi::as<std::string>()["for"];
While = qi::as<std::string>()["while"];
Until = qi::as<std::string>()["until"];
Do = qi::as<std::string>()["do"];
Done = qi::as<std::string>()["done"];
Case = qi::as<std::string>()["case"];
Esac = qi::as<std::string>()["esac"];
In = qi::as<std::string>()["in"];
Lparen = '(';
Rparen = ')';
Lbrace = '{';
Rbrace = '}';
Parens = qi::as<std::string>()["()"];
Bang = '!';
top = *compound_list;
variable_name = qi::as_string[(qi::ascii::alpha | qi::char_('_')) >> *(qi::alnum | qi::char_('_'))];
single_quoted_string = qi::as_string[qi::char_("'") >> *~qi::char_("'") >> qi::char_("'")];
double_quoted_string = qi::as_string[qi::char_('"') >> *(
(qi::lit('\\') >> qi::char_) | (!qi::lit('\\') >> ~qi::lit('"'))
) >>
qi::char_('"')];
unquoted_string = qi::as_string[+~qi::char_("\t\n \"#&'();<=>\\`|")];
command_name = +(single_quoted_string | double_quoted_string | unquoted_string) -
(/*Bang | */Lbrace | Rbrace/* | Lparen | Rparen*/ | Case | Do | Done | Elif | Else | Esac | Fi | For | If | In | Then | Until | While);
command_arguments = command_name >> *(qi::omit[+qi::ascii::blank] >> argument);
argument = +(single_quoted_string | double_quoted_string | unquoted_string);
arguments = argument >> *(qi::omit[+qi::ascii::blank] >> argument);
assignment = variable_name >> '=' >> -argument;
assignments = assignment >> *(!qi::eps | qi::omit[+qi::ascii::blank] >> assignment);
pipe_here = qi::skip(qi::ascii::blank)[VLINE] >> *pipeline;
quoted_here_end = '\'' >> qi::as_string[(+(qi::char_ - '\''))][qi::labels::_val = qi::labels::_1] >> '\'';
unquoted_here_end %= qi::as_string[(+(qi::char_ - qi::ascii::space))];
here_end %= quoted_here_end | unquoted_here_end;
io_here %= qi::skip(qi::ascii::blank)[DLESS] >> qi::matches[MINUS] >>
here_end[qi::labels::_a = qi::labels::_1] >> -pipe_here >> nl >>
(*(qi::as_string[+(qi::char_ - (qi::eol >> qi::string(qi::labels::_a) >> qi::eol))]) >>
qi::omit[qi::eol >> qi::string(qi::labels::_a)] |
qi::omit[qi::string(qi::labels::_a)]);
name = qi::omit[+qi::ascii::blank] >> variable_name;
word = qi::omit[+qi::ascii::blank] >> argument;
wordlist = qi::omit[+qi::ascii::blank] >> arguments;
filename = qi::omit[*qi::ascii::blank] >> argument;
separator_op = qi::skip(qi::ascii::blank)[AMP | !DSEMI >> SEMI];
separator = (separator_op >> *nl | nl_list);
sequential_sep = qi::skip(qi::ascii::blank)[/*!DSEMI >> */SEMI] >> *nl | nl_list;
pipeline = -Bang >> command %
(qi::skip(qi::ascii::blank)[VLINE] >> *nl);
and_or = pipeline % (qi::skip(qi::ascii::blank)[AND_IF | OR_IF] >> *nl);
list = and_or % separator_op;
term = and_or % separator;
compound_list = *nl >> term >> -separator;
io_redirect = -IO_NUMBER >> (io_file | io_here);
io_file = qi::skip(qi::ascii::blank)[LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER] >> filename;
do_group = qi::skip(qi::ascii::space)[Do] >> compound_list >> qi::skip(qi::ascii::space)[Done];
for_clause = qi::skip(qi::ascii::blank)[For] >> name >>
(SEMI >> *nl | *nl >> -(qi::skip(qi::ascii::blank)[In] >> -wordlist >> -sequential_sep)) >> do_group;
pattern = argument >> *(qi::omit[VLINE] >> argument);
case_item = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
(compound_list | *nl) >> qi::skip(qi::ascii::space)[DSEMI] >> *nl;
case_item_ns = !qi::eps | -qi::skip(qi::ascii::blank)['('] >> pattern >> qi::lit(')') >>
-compound_list >> *nl;
case_clause = qi::skip(qi::ascii::blank)[Case] >> word >> qi::omit[+qi::ascii::blank] >> *nl >>
qi::skip(qi::ascii::space)[In] >> *nl >>
*case_item >> -case_item_ns >>
qi::skip(qi::ascii::space)[Esac];
if_clause = qi::skip(qi::ascii::blank)[If] >> compound_list >>
qi::skip(qi::ascii::blank)[Then] >> compound_list >>
*(
qi::skip(qi::ascii::blank)[Elif] >> compound_list >>
qi::skip(qi::ascii::blank)[Then] >> compound_list
) >>
-(
qi::skip(qi::ascii::blank)[Else] >> compound_list
) >>
qi::skip(qi::ascii::space)[Fi];
compound_command =
qi::skip(qi::ascii::space)[Lbrace] >> compound_list >> qi::skip(qi::ascii::space)[Rbrace] |
qi::skip(qi::ascii::space)[Lparen] >> compound_list >> qi::skip(qi::ascii::space)[Rparen] |
for_clause |
case_clause |
if_clause |
qi::skip(qi::ascii::blank)[While] >> compound_list >> do_group |
qi::skip(qi::ascii::blank)[Until] >> compound_list >> do_group;
function_definition =
variable_name >> *qi::ascii::blank >> Parens >> *nl >> compound_command;
command = qi::omit[*qi::ascii::blank] >>
(
function_definition |
(assignments >> command_arguments | assignments | command_arguments) |
compound_command
) >> *io_redirect;
nl = qi::omit[*comment] >> qi::eol;
nl_list = +(qi::omit[*comment] >> qi::eol);
comment = qi::as_string[qi::skip(qi::ascii::blank)['#'] >> *(~qi::char_("\r\n"))];
BOOST_SPIRIT_DEBUG_NODE(top);
}
};
}
int main()
{
client::sh_parser<std::string::const_iterator> sh_parser;
std::string str;
std::cin.unsetf(std::ios::skipws);
std::copy(std::istream_iterator<char>(std::cin), std::istream_iterator<char>(), std::back_inserter(str));
{
std::string::const_iterator it = str.begin(), end = str.end();
spirit::utree ut;
bool r = phrase_parse(it, end, sh_parser, qi::ascii::space/*, qi::skip_flag::dont_postskip*/, ut);
std::cout << str << std::endl;
if (r && it == end) {
std::cout << "succeeded:\t" << ut << std::endl;
}
else {
std::cout << "failed:\t" << std::string(it, end) << std::endl;
}
}
return 0;
}