flex和bison是常用的词法分析和语法分析工具, flex可以将源文本以指定个规则识别为单词, bison可以将这些识别出来的单词进行结构化处理。下面可以通过一个bind9 config 解析程序进行说明。
词法分析,语法分析编译过程也是两个过程,通过词法分析, 语法分析, 语义分析前端工作完成输入源代码的识别, 通过中间语言,优化方式完成实现代码可扩展和优化(中端工作), 最终通过生成目标码, 一般也成为后端工作, 实现把源代码变成目标代码的过程。
使用flex和bison可以方便的解析配置文件。就是把字符串分解为token,再将这些token解析到配置文件对应的结构。为之后提供查询修改。
仅简单分析一下包含zone block的bind9配置
识别token
bind9.l 标识出要是别的关键字, 也可以过滤掉一些注释和空白字符
%{ #include <stdio.h> #include <stdlib.h> #include "y.tab.h" #include "tree.h" %} %% [ \t\r\n]+ /* ignore whitespace*/ "//"(.)*"\n" /* ignore single-line comments */ "/*"(.)*"*/" /* ignore multi-line comments */ "{" { return LBRACE; } "}" { return RBRACE; } ";" { return SEMICOLON; } "=" { return EQUALS; } "zone" { return ZONE; } [a-zA-Z0-9\._/\-]+ { yylval.value = strdup(yytext); return NAME; } \"[^"]*\" { yylval.value = strdup(yytext); return STRING; } %% int yywrap() { return 1; }
结构解析
将关键组合放入自定义结构体中, 这里使用树形接口,子节点为链表。
%{ #include <stdio.h> #include <stdlib.h> #include "tree.h" extern int yylex(); extern int yylineno; extern char* yytext; void yyerror( const char *s); struct TreeNode *root = NULL; %} %union { struct TreeNode *node; char *value; } %token ZONE %token LBRACE RBRACE SEMICOLON EQUALS %type <node> config_file statements statement zone_option zone_options zone_block %token <value> NAME STRING %start config_file %% config_file: statements { root = $1; } ; statements: { $$ = NULL; } | statements statement SEMICOLON { if ($1 == NULL) { $$ = $2; } else { struct TreeNode *ptr = $1; while (ptr->next_sibling != NULL) { ptr = ptr->next_sibling; } ptr->next_sibling = $2; $$ = $1; } } ; statement: zone_block { $$ = $1; } ; zone_block: ZONE STRING LBRACE zone_options RBRACE { $$ = createNode("zone", $2); addChild($$, $4); } ; zone_options: { $$ = NULL; } | zone_options zone_option SEMICOLON { if ($1 == NULL) { $$ = $2; } else { struct TreeNode *ptr = $1; while (ptr->next_sibling != NULL) { ptr = ptr->next_sibling; } ptr->next_sibling = $2; $$ = $1; } } ; zone_option: NAME NAME { $$ = createNode($1, $2); } | NAME STRING { $$ = createNode($1, $2); } ; %% void yyerror( const char *s) { fprintf(stderr,"error: %s on line %d\n", s, yylineno); } struct TreeNode* createNode(char* key, char* value) { struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode)); node->key = key; node->value = value; node->parent = NULL; node->first_child = NULL; node->next_sibling = NULL; return node; } struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) { child->parent = parent; if (parent->first_child == NULL) { parent->first_child = child; } else { setChild(parent->first_child, child); } return child; } void setChild(struct TreeNode* parent, struct TreeNode* child) { while (parent->next_sibling != NULL) { parent = parent->next_sibling; } parent->next_sibling = child; } void printTree(struct TreeNode* node, int depth) { if (node == NULL) { return; } int i = 0; while (i < depth) { printf(" "); i++; } printf("%s", node->key); if (node->value != NULL) { printf("[%s]", node->value); } printf("\n"); printTree(node->first_child, depth + 1); printTree(node->next_sibling, depth); } extern FILE *yyin; int main(int argc, char *argv[]) { FILE *input_file; if (argc < 2) { fprintf(stderr, "Usage: %s filename\n", argv[0]); return 1; } input_file = fopen(argv[1], "r"); if (input_file == NULL) { fprintf(stderr, "Error: Cannot open file %s\n", argv[1]); return 1; } yyin = input_file; yyparse(); printTree(root, 0); return 0; }
编译
parser: lex.yy.c y.tab.c gcc lex.yy.c y.tab.c -o parser lex.yy.c: bind9.l flex bind9.l y.tab.c: bind9.y bison --defines=y.tab.h -o y.tab.c bind9.y clean: rm lex.yy.c y.tab.c y.tab.h parser
验证
$ make flex bind9.l yacc -d bind9.y gcc lex.yy.c y.tab.c -o parser $ cat named.conf zone "." { type hint; file "root.hints"; }; zone "0.0.127.in-addr.arpa" { type master; file "zone/127.0.0"; }; zone "land-5.com" { type master; file "zone/land-5.com"; }; zone "177.6.206.in-addr.arpa" { type master; file "zone/206.6.177"; }; $ ./parser named.conf zone["."] type[hint] file["root.hints"] zone["0.0.127.in-addr.arpa"] type[master] file["zone/127.0.0"] zone["land-5.com"] type[master] file["zone/land-5.com"] zone["177.6.206.in-addr.arpa"] type[master] file["zone/206.6.177"]
其他
-
Multiple flex/bison parsers 问题
如果项目中多组解析库同时使用, 可以通过增加编译选项或者配置的方式区分不同的解析库, 我觉得放到编译条件中比较好, 增加编译选项后
makefile
parser: lex.bind9.c bind9.tab.c gcc lex.bind9.c bind9.tab.c -o parser lex.bind9.c: bind9.l flex -Pbind9 bind9.l bind9.tab.c: bind9.y bison --name-prefix=bind9 --defines=y.tab.h bind9.y clean: rm lex.bind9.c bind9.tab.c parser parsebind9.h
bind9.l和bind9.y需要进行响应修改
bind9.l
%{ #include <stdio.h> #include <stdlib.h> #include "y.tab.h" #include "tree.h" %} %% [ \t\r\n]+ /* ignore whitespace*/ "//"(.)*"\n" /* ignore single-line comments */ "/*"(.)*"*/" /* ignore multi-line comments */ "{" { return LBRACE; } "}" { return RBRACE; } ";" { return SEMICOLON; } "=" { return EQUALS; } "zone" { return ZONE; } [a-zA-Z0-9\._/\-]+ { bind9lval.value = strdup(yytext); return NAME; } \"[^"]*\" { bind9lval.value = strdup(yytext); return STRING; } %% int yywrap() { return 1; }
bind9.y
%{ #define YYPARSER #include <stdio.h> #include <stdlib.h> #include "tree.h" extern int yylex(); int yylineno; extern char* yytext; void yyerror( const char *s); struct TreeNode *root = NULL; extern FILE *bind9in; %} %union { struct TreeNode *node; char *value; } %token ZONE %token LBRACE RBRACE SEMICOLON EQUALS %type <node> config_file statements statement zone_option zone_options zone_block %token <value> NAME STRING %start config_file %% config_file: statements { root = $1; } ; statements: { $$ = NULL; } | statements statement SEMICOLON { if ($1 == NULL) { $$ = $2; } else { struct TreeNode *ptr = $1; while (ptr->next_sibling != NULL) { ptr = ptr->next_sibling; } ptr->next_sibling = $2; $$ = $1; } } ; statement: zone_block { $$ = $1; } ; zone_block: ZONE STRING LBRACE zone_options RBRACE { $$ = createNode("zone", $2); addChild($$, $4); } ; zone_options: { $$ = NULL; } | zone_options zone_option SEMICOLON { if ($1 == NULL) { $$ = $2; } else { struct TreeNode *ptr = $1; while (ptr->next_sibling != NULL) { ptr = ptr->next_sibling; } ptr->next_sibling = $2; $$ = $1; } } ; zone_option: NAME NAME { $$ = createNode($1, $2); } | NAME STRING { $$ = createNode($1, $2); } ; %% void yyerror( const char *s) { fprintf(stderr,"error: %s on line %d\n", s, yylineno); } struct TreeNode* createNode(char* key, char* value) { struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode)); node->key = key; node->value = value; node->parent = NULL; node->first_child = NULL; node->next_sibling = NULL; return node; } struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) { child->parent = parent; if (parent->first_child == NULL) { parent->first_child = child; } else { setChild(parent->first_child, child); } return child; } void setChild(struct TreeNode* parent, struct TreeNode* child) { while (parent->next_sibling != NULL) { parent = parent->next_sibling; } parent->next_sibling = child; } void printTree(struct TreeNode* node, int depth) { if (node == NULL) { return; } int i = 0; while (i < depth) { printf(" "); i++; } printf("%s", node->key); if (node->value != NULL) { printf("[%s]", node->value); } printf("\n"); printTree(node->first_child, depth + 1); printTree(node->next_sibling, depth); } int main(int argc, char *argv[]) { FILE *input_file; if (argc < 2) { fprintf(stderr, "Usage: %s filename\n", argv[0]); return 1; } input_file = fopen(argv[1], "r"); if (input_file == NULL) { fprintf(stderr, "Error: Cannot open file %s\n", argv[1]); return 1; } printf("read ok\n"); bind9in = input_file; yyparse(); printTree(root, 0); return 0; }
自动生成代码lex.bind9.c 增加一下宏定义,
#define yy_create_buffer bind9_create_buffer #define yy_delete_buffer bind9_delete_buffer #define yy_scan_buffer bind9_scan_buffer #define yy_scan_string bind9_scan_string #define yy_scan_bytes bind9_scan_bytes #define yy_init_buffer bind9_init_buffer #define yy_flush_buffer bind9_flush_buffer #define yy_load_buffer_state bind9_load_buffer_state #define yy_switch_to_buffer bind9_switch_to_buffer #define yypush_buffer_state bind9push_buffer_state #define yypop_buffer_state bind9pop_buffer_state #define yyensure_buffer_stack bind9ensure_buffer_stack #define yy_flex_debug bind9_flex_debug #define yyin bind9in #define yyleng bind9leng #define yylex bind9lex #define yylineno bind9lineno #define yyout bind9out #define yyrestart bind9restart #define yytext bind9text #define yywrap bind9wrap #define yyalloc bind9alloc #define yyrealloc bind9realloc #define yyfree bind9free
参考及引用
-
https://stackoverflow.com/questions/1634704/multiple-flex-bison-parsers
-
图片from 洪村村
Comments are closed.