#include <cstdio>
#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
#include <functional>
// 提供语法分析和转换工具
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/lex_static_lexertl.hpp>
#include <boost/spirit/include/lex_generate_static_lexertl.hpp>
// 可结合Qi使用
#include <boost/spirit/include/qi.hpp>
// 提供类Lambda表达式功能
#include <boost/spirit/include/phoenix.hpp>
// 生成的静态lex文件
#include "spirit_lex_static_test.hpp"
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
static std::string g_strLexAnaContent = "Hello World!\r\n I'm OWenT\r\nMy blog is http://www.owent.net";
static enum token_ids
{
ID_OWENT = 1000,
ID_EOL,
ID_CHAR,
IDANY = boost::spirit::lex::min_token_id + 1,
};
// 自定义functor
static void test_spirit_lex_custom_functor();
// 使用 Phoenix
void test_spirit_lex_phoenix_functor();
// 混合Qi使用
void test_spirit_lex_qi_functor();
// 静态lex生成器
void test_spirit_lex_static();
void test_spirit_lex_static_gencode(); // 静态lex生成器
void test_spirit_lex() {
using namespace boost::spirit;
using boost::spirit::ascii::space;
// ============================================================
// ============ Spirit Lex component, 语法分析器 ============
// ============================================================
// ====== 支持动态和静态分析的类似 Flex 的语法分析生成器 ======
std::cout<< "Spirit.Lex => Line "<< std::setw(4)<< __LINE__<< ": Lex::Parse content "<< std::endl<< g_strLexAnaContent<< std::endl;
// 自定义functor示例
test_spirit_lex_custom_functor();
// 使用 Phoenix示例
test_spirit_lex_phoenix_functor();
// 混合Qi使用示例
test_spirit_lex_qi_functor();
// 静态lex生成器示例
// 第一步,使用 lex::lexertl::generate_static_dfa 生成代码
test_spirit_lex_static_gencode();
// 第二步,使用第一步生成的代码进行编译(第一次编译注释掉下面这行代码,不然不能编译通过的)
test_spirit_lex_static();
}
// ========== 自定义functor ==========
template <typename Lexer>
struct word_count_tokens1 : lex::lexer<Lexer>
{
word_count_tokens1()
{
this->self.add
("(?i:owent)", ID_OWENT) // 忽略大小写,统计 owent 个数
("\n", ID_EOL) // 新的一行
(".", ID_CHAR) // 换行外的任意字符
;
}
};
struct counter1
{
int& o;
int& l;
counter1(int& _o, int& _l): o(_o), l(_l){}
template <typename Token>
bool operator()(Token const& t) const
{
switch (t.id()) {
case ID_OWENT:
++ o;
break;
case ID_EOL:
++l;
break;
case ID_CHAR:
break;
}
return true; // 继续分析
}
};
void test_spirit_lex_custom_functor()
{
int o = 0, l = 1;
word_count_tokens1<lex::lexertl::lexer<> > functor;
const char* begin = g_strLexAnaContent.c_str();
const char* end = begin + g_strLexAnaContent.size();
bool res = lex::tokenize(begin, end, functor, counter1(o, l));
std::cout<< "Spirit.Lex => Line "<< std::setw(4)<< __LINE__<< ": Lex::tokenize ("<< (res?"true": "false")<< ")"<< std::endl;
std::cout<< "Spirit.Lex => \t\t "<< "Lines: "<< l<< ", OWenTs: "<< o<< std::endl;
}
// ========== 使用 Phoenix ==========
template <typename Lexer>
struct word_count_tokens2 : lex::lexer<Lexer>
{
word_count_tokens2()
: o(0), l(1)
, owent("(?i:owent)")
, eol("\n")
, any(".")
{
this->self
= owent [++ boost::phoenix::ref(o)]
| eol [++ boost::phoenix::ref(l)]
| any
;
}
int o, l;
lex::token_def<> owent, eol, any;
};
void test_spirit_lex_phoenix_functor()
{
typedef lex::lexertl::token<std::string::iterator, lex::omit, boost::mpl::false_> token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
word_count_tokens2<lexer_type> word_count_lexer;
std::string::iterator begin = g_strLexAnaContent.begin();
std::string::iterator end = g_strLexAnaContent.end();
lexer_type::iterator_type iter = word_count_lexer.begin(begin, end);
using lex::lexertl::token_is_valid;
while(iter != word_count_lexer.end() && token_is_valid(*iter))
++ iter;
std::cout<< "Spirit.Lex => Line "<< std::setw(4)<< __LINE__<< ": Lex::token_is_valid ("<< (iter == word_count_lexer.end()?"true": "false")<< ")"<< std::endl;
std::cout<< "Spirit.Lex => \t\t "<< "Lines: "<< word_count_lexer.l<< ", OWenTs: "<< word_count_lexer.o<< std::endl;
}
// ========== 混合Qi使用 ==========
template <typename Lexer>
struct word_count_tokens3 : lex::lexer<Lexer>
{
word_count_tokens3()
{
// 命名模式
this->self.add_pattern("OWENT", "(?i:owent)");
// 命名模式绑定
owent = "{OWENT}";
this->self.add
(owent)
('\n')
(".", ID_CHAR)
;
}
lex::token_def<std::string> owent;
};
template <typename Iterator>
struct word_count_grammar3 : qi::grammar<Iterator>
{
template <typename TokenDef>
word_count_grammar3(TokenDef const& tok)
: word_count_grammar3::base_type(start)
, o(0), l(1)
{
start = *( tok.owent [++ boost::phoenix::ref(o)] // 使用Lex内的owent成员定义
| qi::lit('\n') [++ boost::phoenix::ref(l)] // 和Qi内的一样
| qi::token(ID_CHAR) // 使用Lex内定义ID的token
)
;
}
int o, l;
qi::rule<Iterator> start;
};
void test_spirit_lex_qi_functor()
{
typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<std::string> > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef word_count_tokens3<lexer_type>::iterator_type iterator_type;
word_count_tokens3<lexer_type> word_count; // lexer 分析器
word_count_grammar3<iterator_type> g(word_count); // 语法解析器
bool res = lex::tokenize_and_parse(g_strLexAnaContent.begin(), g_strLexAnaContent.end(), word_count, g);
std::cout<< "Spirit.Lex => Line "<< std::setw(4)<< __LINE__<< ": Lex::tokenize_and_parse ("<< (res?"true": "false")<< ")"<< std::endl;
std::cout<< "Spirit.Lex => \t\t "<< "Lines: "<< g.l<< ", OWenTs: "<< g.o<< std::endl;
}
// ========== 静态lex生成器 ==========
template <typename Lexer>
struct word_count_tokens4 : lex::lexer<Lexer>
{
word_count_tokens4()
{
// 命名模式
this->self.add_pattern("OWENT", "(?i:owent)");
// 命名模式绑定
owent = "{OWENT}";
this->self.add
(owent)
('\n')
(".", IDANY)
;
}
lex::token_def<std::string> owent;
};
template <typename Iterator>
struct word_count_grammar4 : qi::grammar<Iterator>
{
template <typename TokenDef>
word_count_grammar4(TokenDef const& tok)
: word_count_grammar4::base_type(start)
, o(0), l(1)
{
start = *( tok.owent [++ boost::phoenix::ref(o)] // 使用Lex内的owent成员定义
| qi::lit('\n') [++ boost::phoenix::ref(l)] // 和Qi内的一样
| qi::token(IDANY) // 使用Lex内定义ID的token
)
;
}
int o, l;
qi::rule<Iterator> start;
};
void test_spirit_lex_static()
{
typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<std::string> > token_type;
typedef lex::lexertl::static_lexer<token_type, lex::lexertl::static_::lexer_owent> lexer_type; // 【这里和第三个示例不一样】
typedef word_count_tokens4<lexer_type>::iterator_type iterator_type;
word_count_tokens4<lexer_type> word_count; // lexer 分析器
word_count_grammar4<iterator_type> g(word_count); // 语法解析器
bool res = lex::tokenize_and_parse(g_strLexAnaContent.begin(), g_strLexAnaContent.end(), word_count, g);
std::cout<< "Spirit.Lex => Line "<< std::setw(4)<< __LINE__<< ": Lex::tokenize_and_parse ("<< (res?"true": "false")<< ")"<< std::endl;
std::cout<< "Spirit.Lex => \t\t "<< "Lines: "<< g.l<< ", OWenTs: "<< g.o<< std::endl;
}
#include <fstream>
void test_spirit_lex_static_gencode()
{
word_count_tokens4<lex::lexertl::lexer<> > word_count;
std::ofstream out("spirit_lex_static_test.hpp");
lex::lexertl::generate_static_dfa(word_count, out, "owent");
}