用 flex和bison 解析配置文件

flex和bison是常用的词法分析和语法分析工具, flex可以将源文本以指定个规则识别为单词, bison可以将这些识别出来的单词进行结构化处理。下面可以通过一个bind9 config 解析程序进行说明。

词法分析,语法分析编译过程也是两个过程,通过词法分析, 语法分析, 语义分析前端工作完成输入源代码的识别, 通过中间语言,优化方式完成实现代码可扩展和优化(中端工作), 最终通过生成目标码, 一般也成为后端工作, 实现把源代码变成目标代码的过程。

使用flex和bison可以方便的解析配置文件。就是把字符串分解为token,再将这些token解析到配置文件对应的结构。为之后提供查询修改。

 

仅简单分析一下包含zone block的bind9配置

识别token

bind9.l 标识出要是别的关键字, 也可以过滤掉一些注释和空白字符

%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}

%%

[ \t\r\n]+                  /* ignore whitespace*/

"//"(.)*"\n"                /* ignore single-line comments */
"/*"(.)*"*/"                /* ignore multi-line comments */

"{"                         { return LBRACE; }
"}"                         { return RBRACE; }
";"                         { return SEMICOLON; }
"="                         { return EQUALS; }

"zone"                      { return ZONE; }
[a-zA-Z0-9\._/\-]+           { yylval.value = strdup(yytext); return NAME; }
\"[^"]*\"                   { yylval.value = strdup(yytext); return STRING; }


%%

int yywrap() {
    return 1;
}

 

结构解析

将关键组合放入自定义结构体中, 这里使用树形接口,子节点为链表。

%{
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
extern int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
%}

%union {
    struct TreeNode *node;
    char *value;
}

%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING

%start config_file

%%

config_file:
    statements
    {
        root = $1;
    }
    ;

statements:
    {
        $$ = NULL;
    }
    | statements statement SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;

statement:
    zone_block
    {
       $$ = $1;
    }
    ;

zone_block:
    ZONE STRING LBRACE zone_options RBRACE {
        $$ = createNode("zone", $2);
        addChild($$, $4);
    }
    ;
zone_options:
    {
        $$ = NULL;
    }
    | zone_options zone_option  SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;
zone_option:
    NAME NAME {
        $$ = createNode($1, $2);
    }
    |
    NAME STRING  {
        $$ = createNode($1, $2);
    }
    ;



%%

void
yyerror( const char *s)
{
  fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}

struct TreeNode* createNode(char* key, char* value) {
    struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
    node->key = key;
    node->value = value;
    node->parent = NULL;
    node->first_child = NULL;
    node->next_sibling = NULL;
    return node;
}

struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
    child->parent = parent;
    if (parent->first_child == NULL) {
        parent->first_child = child;
    } else {
        setChild(parent->first_child, child);
    }
    return child;
}

void setChild(struct TreeNode* parent, struct TreeNode* child) {
    while (parent->next_sibling != NULL) {
        parent = parent->next_sibling;
    }
    parent->next_sibling = child;
}

void printTree(struct TreeNode* node, int depth) {
    if (node == NULL) {
        return;
    }
    int i = 0;
    while (i < depth) {
        printf("  ");
        i++;
    }
    printf("%s", node->key);
    if (node->value != NULL) {
        printf("[%s]", node->value);
    }
    printf("\n");
    printTree(node->first_child, depth + 1);
    printTree(node->next_sibling, depth);
}

extern FILE *yyin;

int main(int argc, char *argv[]) {

  FILE *input_file;
  if (argc < 2) {
    fprintf(stderr, "Usage: %s filename\n", argv[0]);
    return 1;
  }
  input_file = fopen(argv[1], "r");
  if (input_file == NULL) {
    fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
    return 1;
  }
  yyin = input_file;
  yyparse();

  printTree(root, 0);

  return 0;
}

 

编译

parser: lex.yy.c y.tab.c
        gcc lex.yy.c y.tab.c  -o parser

lex.yy.c: bind9.l
        flex bind9.l

y.tab.c: bind9.y
        bison  --defines=y.tab.h -o  y.tab.c bind9.y

clean:
        rm lex.yy.c y.tab.c y.tab.h parser

 

验证

 

$ make
flex bind9.l
yacc -d bind9.y
gcc lex.yy.c y.tab.c  -o parser
$ cat named.conf
zone "." {
        type hint;
        file "root.hints";
};
zone "0.0.127.in-addr.arpa" {
        type master;
        file "zone/127.0.0";
};

zone "land-5.com" {
        type master;
        file "zone/land-5.com";
};

zone "177.6.206.in-addr.arpa" {
        type master;
        file "zone/206.6.177";
};
$ ./parser named.conf
zone["."]
  type[hint]
  file["root.hints"]
zone["0.0.127.in-addr.arpa"]
  type[master]
  file["zone/127.0.0"]
zone["land-5.com"]
  type[master]
  file["zone/land-5.com"]
zone["177.6.206.in-addr.arpa"]
  type[master]
  file["zone/206.6.177"]

 

其他

  1. Multiple flex/bison parsers 问题

如果项目中多组解析库同时使用, 可以通过增加编译选项或者配置的方式区分不同的解析库, 我觉得放到编译条件中比较好, 增加编译选项后

makefile

parser: lex.bind9.c bind9.tab.c
        gcc lex.bind9.c bind9.tab.c  -o parser

lex.bind9.c: bind9.l
        flex -Pbind9 bind9.l

bind9.tab.c: bind9.y
        bison --name-prefix=bind9 --defines=y.tab.h  bind9.y

clean:
        rm lex.bind9.c bind9.tab.c parser parsebind9.h

 

bind9.l和bind9.y需要进行响应修改

bind9.l

%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
#include "tree.h"
%}


%%

[ \t\r\n]+                  /* ignore whitespace*/

"//"(.)*"\n"                /* ignore single-line comments */
"/*"(.)*"*/"                /* ignore multi-line comments */

"{"                         { return LBRACE; }
"}"                         { return RBRACE; }
";"                         { return SEMICOLON; }
"="                         { return EQUALS; }

"zone"                      { return ZONE; }
[a-zA-Z0-9\._/\-]+          { bind9lval.value = strdup(yytext); return NAME; }
\"[^"]*\"                   { bind9lval.value = strdup(yytext); return STRING; }


%%

int yywrap() {
    return 1;
}

 

bind9.y

%{
#define YYPARSER
#include <stdio.h>
#include <stdlib.h>
#include "tree.h"
extern int yylex();
int yylineno;
extern char* yytext;
void yyerror( const char *s);
struct TreeNode *root = NULL;
extern FILE *bind9in;
%}

%union {
    struct TreeNode *node;
    char *value;
}

%token ZONE
%token LBRACE RBRACE SEMICOLON EQUALS
%type <node> config_file statements statement zone_option zone_options zone_block
%token <value> NAME STRING

%start config_file

%%

config_file:
    statements
    {
        root = $1;
    }
    ;

statements:
    {
        $$ = NULL;
    }
    | statements statement SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;

statement:
    zone_block
    {
       $$ = $1;
    }
    ;

zone_block:
    ZONE STRING LBRACE zone_options RBRACE {
        $$ = createNode("zone", $2);
        addChild($$, $4);
    }
    ;
zone_options:
    {
        $$ = NULL;
    }
    | zone_options zone_option  SEMICOLON
    {
        if ($1 == NULL) {
            $$ = $2;
        } else {
            struct TreeNode *ptr = $1;
            while (ptr->next_sibling != NULL) {
                ptr = ptr->next_sibling;
            }
            ptr->next_sibling = $2;
            $$ = $1;
        }
    }
    ;
zone_option:
    NAME NAME {
        $$ = createNode($1, $2);
    }
    |
    NAME STRING  {
        $$ = createNode($1, $2);
    }
    ;



%%

void
yyerror( const char *s)
{
  fprintf(stderr,"error: %s on line %d\n", s, yylineno);
}

struct TreeNode* createNode(char* key, char* value) {
    struct TreeNode* node = (struct TreeNode*)malloc(sizeof(struct TreeNode));
    node->key = key;
    node->value = value;
    node->parent = NULL;
    node->first_child = NULL;
    node->next_sibling = NULL;
    return node;
}

struct TreeNode* addChild(struct TreeNode* parent, struct TreeNode* child) {
    child->parent = parent;
    if (parent->first_child == NULL) {
        parent->first_child = child;
    } else {
        setChild(parent->first_child, child);
    }
    return child;
}

void setChild(struct TreeNode* parent, struct TreeNode* child) {
    while (parent->next_sibling != NULL) {
        parent = parent->next_sibling;
    }
    parent->next_sibling = child;
}

void printTree(struct TreeNode* node, int depth) {
    if (node == NULL) {
        return;
    }
    int i = 0;
    while (i < depth) {
        printf("  ");
        i++;
    }
    printf("%s", node->key);
    if (node->value != NULL) {
        printf("[%s]", node->value);
    }
    printf("\n");
    printTree(node->first_child, depth + 1);
    printTree(node->next_sibling, depth);
}


int main(int argc, char *argv[]) {

  FILE *input_file;
  if (argc < 2) {
    fprintf(stderr, "Usage: %s filename\n", argv[0]);
    return 1;
  }
  input_file = fopen(argv[1], "r");
  if (input_file == NULL) {
    fprintf(stderr, "Error: Cannot open file %s\n", argv[1]);
    return 1;
  }
  printf("read ok\n");
  bind9in = input_file;
  yyparse();

  printTree(root, 0);

  return 0;
}

 

 

自动生成代码lex.bind9.c 增加一下宏定义,

#define yy_create_buffer bind9_create_buffer
#define yy_delete_buffer bind9_delete_buffer
#define yy_scan_buffer bind9_scan_buffer
#define yy_scan_string bind9_scan_string
#define yy_scan_bytes bind9_scan_bytes
#define yy_init_buffer bind9_init_buffer
#define yy_flush_buffer bind9_flush_buffer
#define yy_load_buffer_state bind9_load_buffer_state
#define yy_switch_to_buffer bind9_switch_to_buffer
#define yypush_buffer_state bind9push_buffer_state
#define yypop_buffer_state bind9pop_buffer_state
#define yyensure_buffer_stack bind9ensure_buffer_stack
#define yy_flex_debug bind9_flex_debug
#define yyin bind9in
#define yyleng bind9leng
#define yylex bind9lex
#define yylineno bind9lineno
#define yyout bind9out
#define yyrestart bind9restart
#define yytext bind9text
#define yywrap bind9wrap
#define yyalloc bind9alloc
#define yyrealloc bind9realloc
#define yyfree bind9free

 

 

参考及引用

  1. https://stackoverflow.com/questions/1634704/multiple-flex-bison-parsers

  2. 图片from 洪村村

 

 

Comments are closed.