flex和bison是常用的词法分析和语法分析工具, flex可以将源文本以指定个规则识别为单词, bison可以将这些识别出来的单词进行结构化处理。下面可以通过一个bind9 config 解析程序进行说明。
词法分析,语法分析编译过程也是两个过程,通过词法分析, 语法分析, 语义分析前端工作完成输入源代码的识别, 通过中间语言,优化方式完成实现代码可扩展和优化(中端工作), 最终通过生成目标码, 一般也成为后端工作, 实现把源代码变成目标代码的过程。
使用flex和bison可以方便的解析配置文件。就是把字符串分解为token,再将这些token解析到配置文件对应的结构。为之后提供查询修改。
仅简单分析一下包含zone block的bind9配置
识别token
bind9.l 标识出要是别的关键字, 也可以过滤掉一些注释和空白字符
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}
%%
[ \t\r\n]+ /* ignore whitespace*/
"//"(.)*"\n" /* ignore single-line comments */
"/*"(.)*"*/" /* ignore multi-line comments */
"{" { return LBRACE; }
"}" { return RBRACE; }
";" { return SEMICOLON; }
"=" { return EQUALS; }
"zone" { return ZONE; }
[a-zA-Z0-9\._/\-]+ { yylval.value = strdup(yytext); return NAME; }
\"[^"]*\" { yylval.value = strdup(yytext); return STRING; }
%%
int yywrap() {
return 1;
}
结构解析
将关键组合放入自定义结构体中, 这里使用树形接口,子节点为链表。
%{
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
extern int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
%}
%union {
struct TreeNode *node;
char *value;
}
%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING
%start config_file
%%
config_file:
statements
{
root = $1;
}
;
statements:
{
$$ = NULL;
}
| statements statement SEMICOLON
{
if ($1 == NULL) {
$$ = $2;
} else {
struct TreeNode *ptr = $1;
while (ptr->next_sibling != NULL) {
ptr = ptr->next_sibling;
}
ptr->next_sibling = $2;
$$ = $1;
}
}
;
statement:
zone_block
{
$$ = $1;
}
;
zone_block:
ZONE STRING LBRACE zone_options RBRACE {
$$ = createNode("zone", $2);
addChild($$, $4);
}
;
zone_options:
{
$$ = NULL;
}
| zone_options zone_option SEMICOLON
{
if ($1 == NULL) {
$$ = $2;
} else {
struct TreeNode *ptr = $1;
while (ptr->next_sibling != NULL) {
ptr = ptr->next_sibling;
}
ptr->next_sibling = $2;
$$ = $1;
}
}
;
zone_option:
NAME NAME {
$$ = createNode($1, $2);
}
|
NAME STRING {
$$ = createNode($1, $2);
}
;
%%
void
yyerror( const char *s)
{
fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}
struct TreeNode* createNode(char* key, char* value) {
struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
node->key = key;
node->value = value;
node->parent = NULL;
node->first_child = NULL;
node->next_sibling = NULL;
return node;
}
struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
child->parent = parent;
if (parent->first_child == NULL) {
parent->first_child = child;
} else {
setChild(parent->first_child, child);
}
return child;
}
void setChild(struct TreeNode* parent, struct TreeNode* child) {
while (parent->next_sibling != NULL) {
parent = parent->next_sibling;
}
parent->next_sibling = child;
}
void printTree(struct TreeNode* node, int depth) {
if (node == NULL) {
return;
}
int i = 0;
while (i < depth) {
printf(" ");
i++;
}
printf("%s", node->key);
if (node->value != NULL) {
printf("[%s]", node->value);
}
printf("\n");
printTree(node->first_child, depth + 1);
printTree(node->next_sibling, depth);
}
extern FILE *yyin;
int main(int argc, char *argv[]) {
FILE *input_file;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename\n", argv[0]);
return 1;
}
input_file = fopen(argv[1], "r");
if (input_file == NULL) {
fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
return 1;
}
yyin = input_file;
yyparse();
printTree(root, 0);
return 0;
}
编译
parser: lex.yy.c y.tab.c
gcc lex.yy.c y.tab.c -o parser
lex.yy.c: bind9.l
flex bind9.l
y.tab.c: bind9.y
bison --defines=y.tab.h -o y.tab.c bind9.y
clean:
rm lex.yy.c y.tab.c y.tab.h parser
验证
$ make
flex bind9.l
yacc -d bind9.y
gcc lex.yy.c y.tab.c -o parser
$ cat named.conf
zone "." {
type hint;
file "root.hints";
};
zone "0.0.127.in-addr.arpa" {
type master;
file "zone/127.0.0";
};
zone "land-5.com" {
type master;
file "zone/land-5.com";
};
zone "177.6.206.in-addr.arpa" {
type master;
file "zone/206.6.177";
};
$ ./parser named.conf
zone["."]
type[hint]
file["root.hints"]
zone["0.0.127.in-addr.arpa"]
type[master]
file["zone/127.0.0"]
zone["land-5.com"]
type[master]
file["zone/land-5.com"]
zone["177.6.206.in-addr.arpa"]
type[master]
file["zone/206.6.177"]
其他
-
Multiple flex/bison parsers 问题
如果项目中多组解析库同时使用, 可以通过增加编译选项或者配置的方式区分不同的解析库, 我觉得放到编译条件中比较好, 增加编译选项后
makefile
parser: lex.bind9.c bind9.tab.c
gcc lex.bind9.c bind9.tab.c -o parser
lex.bind9.c: bind9.l
flex -Pbind9 bind9.l
bind9.tab.c: bind9.y
bison --name-prefix=bind9 --defines=y.tab.h bind9.y
clean:
rm lex.bind9.c bind9.tab.c parser parsebind9.h
bind9.l和bind9.y需要进行响应修改
bind9.l
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}
%%
[ \t\r\n]+ /* ignore whitespace*/
"//"(.)*"\n" /* ignore single-line comments */
"/*"(.)*"*/" /* ignore multi-line comments */
"{" { return LBRACE; }
"}" { return RBRACE; }
";" { return SEMICOLON; }
"=" { return EQUALS; }
"zone" { return ZONE; }
[a-zA-Z0-9\._/\-]+ { bind9lval.value = strdup(yytext); return NAME; }
\"[^"]*\" { bind9lval.value = strdup(yytext); return STRING; }
%%
int yywrap() {
return 1;
}
bind9.y
%{
#define YYPARSER
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
extern FILE *bind9in;
%}
%union {
struct TreeNode *node;
char *value;
}
%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING
%start config_file
%%
config_file:
statements
{
root = $1;
}
;
statements:
{
$$ = NULL;
}
| statements statement SEMICOLON
{
if ($1 == NULL) {
$$ = $2;
} else {
struct TreeNode *ptr = $1;
while (ptr->next_sibling != NULL) {
ptr = ptr->next_sibling;
}
ptr->next_sibling = $2;
$$ = $1;
}
}
;
statement:
zone_block
{
$$ = $1;
}
;
zone_block:
ZONE STRING LBRACE zone_options RBRACE {
$$ = createNode("zone", $2);
addChild($$, $4);
}
;
zone_options:
{
$$ = NULL;
}
| zone_options zone_option SEMICOLON
{
if ($1 == NULL) {
$$ = $2;
} else {
struct TreeNode *ptr = $1;
while (ptr->next_sibling != NULL) {
ptr = ptr->next_sibling;
}
ptr->next_sibling = $2;
$$ = $1;
}
}
;
zone_option:
NAME NAME {
$$ = createNode($1, $2);
}
|
NAME STRING {
$$ = createNode($1, $2);
}
;
%%
void
yyerror( const char *s)
{
fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}
struct TreeNode* createNode(char* key, char* value) {
struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
node->key = key;
node->value = value;
node->parent = NULL;
node->first_child = NULL;
node->next_sibling = NULL;
return node;
}
struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
child->parent = parent;
if (parent->first_child == NULL) {
parent->first_child = child;
} else {
setChild(parent->first_child, child);
}
return child;
}
void setChild(struct TreeNode* parent, struct TreeNode* child) {
while (parent->next_sibling != NULL) {
parent = parent->next_sibling;
}
parent->next_sibling = child;
}
void printTree(struct TreeNode* node, int depth) {
if (node == NULL) {
return;
}
int i = 0;
while (i < depth) {
printf(" ");
i++;
}
printf("%s", node->key);
if (node->value != NULL) {
printf("[%s]", node->value);
}
printf("\n");
printTree(node->first_child, depth + 1);
printTree(node->next_sibling, depth);
}
int main(int argc, char *argv[]) {
FILE *input_file;
if (argc < 2) {
fprintf(stderr, "Usage: %s filename\n", argv[0]);
return 1;
}
input_file = fopen(argv[1], "r");
if (input_file == NULL) {
fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
return 1;
}
printf("read ok\n");
bind9in = input_file;
yyparse();
printTree(root, 0);
return 0;
}
自动生成代码lex.bind9.c 增加一下宏定义,
#define yy_create_buffer bind9_create_buffer #define yy_delete_buffer bind9_delete_buffer #define yy_scan_buffer bind9_scan_buffer #define yy_scan_string bind9_scan_string #define yy_scan_bytes bind9_scan_bytes #define yy_init_buffer bind9_init_buffer #define yy_flush_buffer bind9_flush_buffer #define yy_load_buffer_state bind9_load_buffer_state #define yy_switch_to_buffer bind9_switch_to_buffer #define yypush_buffer_state bind9push_buffer_state #define yypop_buffer_state bind9pop_buffer_state #define yyensure_buffer_stack bind9ensure_buffer_stack #define yy_flex_debug bind9_flex_debug #define yyin bind9in #define yyleng bind9leng #define yylex bind9lex #define yylineno bind9lineno #define yyout bind9out #define yyrestart bind9restart #define yytext bind9text #define yywrap bind9wrap #define yyalloc bind9alloc #define yyrealloc bind9realloc #define yyfree bind9free
参考及引用
-
https://stackoverflow.com/questions/1634704/multiple-flex-bison-parsers
-
图片from 洪村村


Comments are closed.