flex和bison是常用的词法分析和语法分析工具, flex可以将源文本以指定个规则识别为单词, bison可以将这些识别出来的单词进行结构化处理。下面可以通过一个bind9 config 解析程序进行说明。

词法分析,语法分析编译过程也是两个过程,通过词法分析, 语法分析, 语义分析前端工作完成输入源代码的识别, 通过中间语言,优化方式完成实现代码可扩展和优化(中端工作), 最终通过生成目标码, 一般也成为后端工作, 实现把源代码变成目标代码的过程。

使用flex和bison可以方便的解析配置文件。就是把字符串分解为token,再将这些token解析到配置文件对应的结构。为之后提供查询修改。

 

仅简单分析一下包含zone block的bind9配置

识别token

bind9.l 标识出要是别的关键字, 也可以过滤掉一些注释和空白字符

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}

%%

[ \t\r\n]+                  /* ignore whitespace*/

"//"(.)*"\n"                /* ignore single-line comments */
"/*"(.)*"*/"                /* ignore multi-line comments */

"{"                         { return LBRACE; }
"}"                         { return RBRACE; }
";"                         { return SEMICOLON; }
"="                         { return EQUALS; }

"zone"                      { return ZONE; }
[a-zA-Z0-9\._/\-]+           { yylval.value = strdup(yytext); return NAME; }
\"[^"]*\"                   { yylval.value = strdup(yytext); return STRING; }

%%

int yywrap() {
    return 1;
}

 

结构解析

将关键组合放入自定义结构体中, 这里使用树形接口,子节点为链表。

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
%{
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
extern int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
%}

%union {
    struct TreeNode *node;
    char *value;
}

%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING

%start config_file

%%

config_file:
    statements
    {
        root = $1;
    }
    ;

statements:
    {
        $$ = NULL;
    }
    | statements statement SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;

statement:
    zone_block
    {
       $$ = $1;
    }
    ;

zone_block:
    ZONE STRING LBRACE zone_options RBRACE {
        $$ = createNode("zone", $2);
        addChild($$, $4);
    }
    ;
zone_options:
    {
        $$ = NULL;
    }
    | zone_options zone_option  SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;
zone_option:
    NAME NAME {
        $$ = createNode($1, $2);
    }
    |
    NAME STRING  {
        $$ = createNode($1, $2);
    }
    ;

%%

void
yyerror( const char *s)
{
  fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}

struct TreeNode* createNode(char* key, char* value) {
    struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
    node->key = key;
    node->value = value;
    node->parent = NULL;
    node->first_child = NULL;
    node->next_sibling = NULL;
    return node;
}

struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
    child->parent = parent;
    if (parent->first_child == NULL) {
        parent->first_child = child;
    } else {
        setChild(parent->first_child, child);
    }
    return child;
}

void setChild(struct TreeNode* parent, struct TreeNode* child) {
    while (parent->next_sibling != NULL) {
        parent = parent->next_sibling;
    }
    parent->next_sibling = child;
}

void printTree(struct TreeNode* node, int depth) {
    if (node == NULL) {
        return;
    }
    int i = 0;
    while (i < depth) {
        printf("  ");
        i++;
    }
    printf("%s", node->key);
    if (node->value != NULL) {
        printf("[%s]", node->value);
    }
    printf("\n");
    printTree(node->first_child, depth + 1);
    printTree(node->next_sibling, depth);
}

extern FILE *yyin;

int main(int argc, char *argv[]) {

  FILE *input_file;
  if (argc < 2) {
    fprintf(stderr, "Usage: %s filename\n", argv[0]);
    return 1;
  }
  input_file = fopen(argv[1], "r");
  if (input_file == NULL) {
    fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
    return 1;
  }
  yyin = input_file;
  yyparse();

  printTree(root, 0);

  return 0;
}

 

编译

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
parser: lex.yy.c y.tab.c
        gcc lex.yy.c y.tab.c  -o parser

lex.yy.c: bind9.l
        flex bind9.l

y.tab.c: bind9.y
        bison  --defines=y.tab.h -o  y.tab.c bind9.y

clean:
        rm lex.yy.c y.tab.c y.tab.h parser

 

验证

 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
$ make
flex bind9.l
yacc -d bind9.y
gcc lex.yy.c y.tab.c  -o parser
$ cat named.conf
zone "." {
        type hint;
        file "root.hints";
};
zone "0.0.127.in-addr.arpa" {
        type master;
        file "zone/127.0.0";
};

zone "land-5.com" {
        type master;
        file "zone/land-5.com";
};

zone "177.6.206.in-addr.arpa" {
        type master;
        file "zone/206.6.177";
};
$ ./parser named.conf
zone["."]
  type[hint]
  file["root.hints"]
zone["0.0.127.in-addr.arpa"]
  type[master]
  file["zone/127.0.0"]
zone["land-5.com"]
  type[master]
  file["zone/land-5.com"]
zone["177.6.206.in-addr.arpa"]
  type[master]
  file["zone/206.6.177"]

 

其他

  1. Multiple flex/bison parsers 问题

如果项目中多组解析库同时使用, 可以通过增加编译选项或者配置的方式区分不同的解析库, 我觉得放到编译条件中比较好, 增加编译选项后

makefile

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
parser: lex.bind9.c bind9.tab.c
        gcc lex.bind9.c bind9.tab.c  -o parser

lex.bind9.c: bind9.l
        flex -Pbind9 bind9.l

bind9.tab.c: bind9.y
        bison --name-prefix=bind9 --defines=y.tab.h  bind9.y

clean:
        rm lex.bind9.c bind9.tab.c parser parsebind9.h

 

bind9.l和bind9.y需要进行响应修改

bind9.l

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}

%%

[ \t\r\n]+                  /* ignore whitespace*/

"//"(.)*"\n"                /* ignore single-line comments */
"/*"(.)*"*/"                /* ignore multi-line comments */

"{"                         { return LBRACE; }
"}"                         { return RBRACE; }
";"                         { return SEMICOLON; }
"="                         { return EQUALS; }

"zone"                      { return ZONE; }
[a-zA-Z0-9\._/\-]+          { bind9lval.value = strdup(yytext); return NAME; }
\"[^"]*\"                   { bind9lval.value = strdup(yytext); return STRING; }

%%

int yywrap() {
    return 1;
}

 

bind9.y

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
%{
#define YYPARSER
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
extern FILE *bind9in;
%}

%union {
    struct TreeNode *node;
    char *value;
}

%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING

%start config_file

%%

config_file:
    statements
    {
        root = $1;
    }
    ;

statements:
    {
        $$ = NULL;
    }
    | statements statement SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;

statement:
    zone_block
    {
       $$ = $1;
    }
    ;

zone_block:
    ZONE STRING LBRACE zone_options RBRACE {
        $$ = createNode("zone", $2);
        addChild($$, $4);
    }
    ;
zone_options:
    {
        $$ = NULL;
    }
    | zone_options zone_option  SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;
zone_option:
    NAME NAME {
        $$ = createNode($1, $2);
    }
    |
    NAME STRING  {
        $$ = createNode($1, $2);
    }
    ;

%%

void
yyerror( const char *s)
{
  fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}

struct TreeNode* createNode(char* key, char* value) {
    struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
    node->key = key;
    node->value = value;
    node->parent = NULL;
    node->first_child = NULL;
    node->next_sibling = NULL;
    return node;
}

struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
    child->parent = parent;
    if (parent->first_child == NULL) {
        parent->first_child = child;
    } else {
        setChild(parent->first_child, child);
    }
    return child;
}

void setChild(struct TreeNode* parent, struct TreeNode* child) {
    while (parent->next_sibling != NULL) {
        parent = parent->next_sibling;
    }
    parent->next_sibling = child;
}

void printTree(struct TreeNode* node, int depth) {
    if (node == NULL) {
        return;
    }
    int i = 0;
    while (i < depth) {
        printf("  ");
        i++;
    }
    printf("%s", node->key);
    if (node->value != NULL) {
        printf("[%s]", node->value);
    }
    printf("\n");
    printTree(node->first_child, depth + 1);
    printTree(node->next_sibling, depth);
}

int main(int argc, char *argv[]) {

  FILE *input_file;
  if (argc < 2) {
    fprintf(stderr, "Usage: %s filename\n", argv[0]);
    return 1;
  }
  input_file = fopen(argv[1], "r");
  if (input_file == NULL) {
    fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
    return 1;
  }
  printf("read ok\n");
  bind9in = input_file;
  yyparse();

  printTree(root, 0);

  return 0;
}

 

 

自动生成代码lex.bind9.c 增加一下宏定义,

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#define yy_create_buffer bind9_create_buffer
#define yy_delete_buffer bind9_delete_buffer
#define yy_scan_buffer bind9_scan_buffer
#define yy_scan_string bind9_scan_string
#define yy_scan_bytes bind9_scan_bytes
#define yy_init_buffer bind9_init_buffer
#define yy_flush_buffer bind9_flush_buffer
#define yy_load_buffer_state bind9_load_buffer_state
#define yy_switch_to_buffer bind9_switch_to_buffer
#define yypush_buffer_state bind9push_buffer_state
#define yypop_buffer_state bind9pop_buffer_state
#define yyensure_buffer_stack bind9ensure_buffer_stack
#define yy_flex_debug bind9_flex_debug
#define yyin bind9in
#define yyleng bind9leng
#define yylex bind9lex
#define yylineno bind9lineno
#define yyout bind9out
#define yyrestart bind9restart
#define yytext bind9text
#define yywrap bind9wrap
#define yyalloc bind9alloc
#define yyrealloc bind9realloc
#define yyfree bind9free

 

 

参考及引用

  1. https://stackoverflow.com/questions/1634704/multiple-flex-bison-parsers

  2. 图片from 洪村村