common/parser.c - lbreakout2 (HEAD)

Tree @HEAD (Download .tar.gz)

parser.c @HEAD — raw · history · blame

/***************************************************************************
                          parser.c  -  description
                             -------------------
    begin                : Sat Mar 9 2002
    copyright            : (C) 2001 by Michael Speck
    email                : kulkanie@gmx.net
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include <stdlib.h>
#include <string.h>
#include "parser.h"
#include "tools.h"

/*
====================================================================
Error string.
====================================================================
*/
static char parser_sub_error[1024];
static char parser_error[1024];

/*
====================================================================
This buffer is used to fully load resource files when the 
compact format is used.
====================================================================
*/
enum { CBUFFER_SIZE = 131072 }; /* 128 KB */
static char cbuffer[CBUFFER_SIZE];
static char* cbuffer_pos = 0; /* position in cbuffer */

/*
====================================================================
As we need constant strings sometimes we have to define a maximum
length for tokens.
====================================================================
*/
enum { PARSER_MAX_TOKEN_LENGTH = 1024 };

/*
====================================================================
Locals
====================================================================
*/

/*
====================================================================
Macro to shorten the fread call for a single character.
====================================================================
*/
#define FILE_READCHAR( file, c ) fread( &c, sizeof( char ), 1, file )

/*
====================================================================
Find next newline in cbuffer and replace it with \0 and return the 
pointer to the current line.
====================================================================
*/
static char* parser_get_next_line()
{
    char *line = cbuffer_pos;
    char *newpos;
    if ( cbuffer_pos[0] == 0 )
        return 0; /* completely read. no more lines. */
    if ( ( newpos = strchr( cbuffer_pos, 10 ) ) == 0 )
        cbuffer_pos += strlen( cbuffer_pos ); /* last line */
    else {
        cbuffer_pos = newpos + 1; /* set pointer to next line */
        newpos[0] = 0; /* terminate current line */
    }
    return line;
}

/*
====================================================================
Set parse error string: "file:line: error"
====================================================================
*/
static void parser_set_parse_error( char *fname, FILE *file, char *error )
{
    int end, pos;
    int line_count = 1;
    char c;
    end = ftell( file ); pos = 0;
    fseek( file, 0, SEEK_SET );
    while ( pos < end ) {
        FILE_READCHAR( file, c ); pos++;
        if ( c == 10 ) line_count++;
    }
    sprintf( parser_error, "%s: %i: %s", 
             fname, line_count, error ); 
}

/*
====================================================================
Check if the given character occurs in the symbol list.
If the first symbol is ' ' it is used as wildcard for all
white-spaces.
====================================================================
*/
static int is_symbol( int c, char *symbols )
{
    int i = 0;
    if ( symbols[0] == ' ' && c <= 32 ) return 1;
    while ( symbols[i] != 0 )
        if ( c == symbols[i++] ) 
            return 1;
    return 0;
}

/*
====================================================================
Move file position forward until reading in the given character.
If stop is ' ' whitespaces will be ignored.
====================================================================
*/
static void file_skip( FILE *file, char stop )
{
    char c = 0;
    FILE_READCHAR( file, c );
    while ( ( ( stop == ' ' && c <= 32 ) || ( stop != ' ' && c != stop ) ) && !feof( file ) )
        FILE_READCHAR( file, c );
    if ( !feof( file ) )
        fseek( file, -1, SEEK_CUR );
}

/*
====================================================================
Read next token from current file position where symbols is a
list of characters used to break up the tokens. The symbols 
themself are returned as tokens. If ' ' occurs in the symbol list
it will be ignored and whitespaces are removed automatically.
The token does not exceed PARSER_MAX_TOKEN_LENGTH.
Enclosing ".." are kept at the token. Use file_compare_token()
to test it's contents.
Returns False on EoF.
====================================================================
*/
static int file_read_token_intern( FILE *file, char *symbols, char *token )
{
    int pos = 0;
    char c;
    token[0] = 0;
    file_skip( file, ' ' );
    FILE_READCHAR( file, c );
    if ( feof( file ) ) { 
        sprintf( parser_sub_error, "unexpected end of file" ); 
        return 0;
    }
    /* string? */
    if ( c == '"' ) {
        token[pos++] = '"';
        FILE_READCHAR( file, c );
        while ( ( !feof( file ) && c != '"' ) ) {
            token[pos++] = c;
            if ( pos == PARSER_MAX_TOKEN_LENGTH - 2 ) {
                token[pos++] = '"';
                token[pos] = 0;
                sprintf( parser_sub_error, "token exceeds limit" );
                return 0;
            }
            FILE_READCHAR( file, c );
        }
        token[pos++] = '"';
        token[pos] = 0;
        if ( feof( file ) ) { 
            sprintf( parser_sub_error, "unexpected end of file" ); 
            token[0] = 0; 
            return 0;
        }
        return 1;
    }
    /* symbol? */
    if ( is_symbol( c, symbols ) ) {
        token[0] = c; token[1] = 0;
        return 1;
    }
    /* other token */
    while ( !is_symbol( c, symbols ) && !feof( file ) ) {
        token[pos++] = c;
        if ( pos == PARSER_MAX_TOKEN_LENGTH - 1 ) {
            token[pos] = 0;
            sprintf( parser_sub_error, "token exceeds limit" );
            return 0;
        }
        FILE_READCHAR( file, c );
    }
    token[pos] = 0;
    if ( feof( file ) ) 
        return 1;
    fseek( file, -1, SEEK_CUR );
    return 1;
}

/*
====================================================================
Skip all tokens until one begins with character 'stop'. This
token is also ignored.
====================================================================
*/
static void file_skip_section( FILE *file, char stop )
{
    char token[PARSER_MAX_TOKEN_LENGTH];
    do {
        file_read_token_intern( file, PARSER_SYMBOLS, token );
    } while ( !feof( file ) && token[0] != stop );
}

/*
====================================================================
Read next token and skip comments enclosed in tokens
skip[0], skip[1] (if skip is not NULL).
Return 0 if EoF.
====================================================================
*/
static int file_read_token( FILE *file, char *symbols, char *skip, char *token )
{
    while ( 1 ) {
        if ( !file_read_token_intern( file, symbols, token ) )
            return 0;
        if ( skip ) {
            if ( token[0] == skip[0] )
                file_skip_section( file, skip[1] );
            else
                break;
        }
        else
            break;
    }
    return 1;
}

/*
====================================================================
Remove quotes if any and return result as newly allocated string.
====================================================================
*/
static char* parser_remove_quotes( char *string )
{
    char *new;
    if ( string[0] != '"' ) 
        return strdup( string );
    new = calloc( strlen( string ) - 1, sizeof( char ) );
    strncpy( new, string + 1, strlen( string ) - 2 );
    new[strlen( string ) - 2] = 0;
    return new;
}

/*
====================================================================
Proceed in the given string until it ends or non-whitespace occurs
and return the new position.
====================================================================
*/
static char* string_ignore_whitespace( char *string )
{
    int i = 0;
    while ( string[i] != 0 && string[i] <= 32 ) i++;
    return string + i;
}

/*
====================================================================
This function searches file from the current position for the next
pdata entry.
====================================================================
*/
static PData* parser_parse_file( FILE *file )
{
    char token[PARSER_MAX_TOKEN_LENGTH];
    PData *pd = 0, *sub = 0;
    
    /* get name */
    if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) )
        return 0;
    if ( is_symbol( token[0], PARSER_SYMBOLS ) ) {
        sprintf( parser_sub_error, "parse error before '%s'", token );
        return 0;
    }
    pd = calloc( 1, sizeof( PData ) );
    pd->name = parser_remove_quotes( token );
    /* check type */
    if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) )
        goto failure;
    switch ( token[0] ) {
        case PARSER_SET:
            /* assign single value or list */
            pd->values = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK );
            if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) 
                goto failure;
            if ( token[0] != PARSER_LIST_BEGIN ) {
                if ( is_symbol( token[0], PARSER_SYMBOLS ) ) {
                    sprintf( parser_sub_error, "parse error before '%s'", token );
                    goto failure;
                }
                else
                    list_add( pd->values, parser_remove_quotes( token ) );
            }
            else {
                if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) )
                    goto failure;
                while ( token[0] != PARSER_LIST_END ) {
                    if ( is_symbol( token[0], PARSER_SYMBOLS ) ) {
                        sprintf( parser_sub_error, "parse error before '%s'", token );
                        goto failure;
                    }
                    else
                        list_add( pd->values, parser_remove_quotes( token ) );
                    if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) )
                        goto failure;
                }
            }
            break;
        case PARSER_GROUP_BEGIN:
            /* check all entries until PARSER_GROUP_END */
            pd->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK );
            while ( 1 ) {
                if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) 
                    goto failure;
                if ( token[0] == PARSER_GROUP_END )
                    break;
                fseek( file, -strlen( token ), SEEK_CUR );
                sub = parser_parse_file( file );
                if ( sub ) 
                    list_add( pd->entries, sub );
                else
                    goto failure;
            }
            break;
        default:
            sprintf( parser_sub_error, "parse error before '%s'", token );
            goto failure;
    }
    return pd;
failure:
    parser_free( &pd );
    return 0;
}

/*
====================================================================
Publics
====================================================================
*/

/*
====================================================================
This function splits a string into tokens using the characters
found in symbols as breakpoints. If the first symbol is ' ' all
whitespaces are used as breakpoints though NOT added as a token 
(thus removed from string).
====================================================================
*/
List* parser_split_string( char *string, char *symbols )
{
    int pos;
    char *token = 0;
    List *list = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK );
    while ( string[0] != 0 ) {
        if ( symbols[0] == ' ' )
            string = string_ignore_whitespace( string ); 
        if ( string[0] == 0 ) break;
        pos = 1; /* 'read in' first character */
        while ( string[pos - 1] != 0 && !is_symbol( string[pos - 1], symbols ) && string[pos - 1] != '"' ) pos++;
        if ( pos > 1 ) 
            pos--;
        else
            if ( string[pos - 1] == '"' ) {
                /* read a string */
                string = string + 1; pos = 0;
                while ( string[pos] != 0 && string[pos] != '"' ) pos++;
                token = calloc( pos + 1, sizeof( char ) );
                strncpy( token, string, pos ); token[pos] = 0;
                list_add( list, token );
                string = string + pos + (string[pos] != 0);
                continue;
            }
        token = calloc( pos + 1, sizeof( char ) );
        strncpy( token, string, pos); token[pos] = 0;
        list_add( list, token );
        string = string + pos;
    }
    return list;
}
/*
====================================================================
This is the light version of parser_split_string which checks for
just one character and does not add this glue characters to the 
list. It's about 2% faster. Wow.
====================================================================
*/
List *parser_explode_string( char *string, char c )
{
    List *list = list_create( LIST_AUTO_DELETE, LIST_NO_CALLBACK );
    char *next_slash = 0;
    char buffer[64];
    while ( string[0] != 0 && ( next_slash = strchr( string, c ) ) != 0 ) {
        if ( next_slash != string ) {
            strcpy_lt( buffer, string, (next_slash-string>63)?63:(next_slash-string) );
            list_add( list, strdup( buffer ) );
        }
        string += next_slash - string + 1;
    }
    if ( string[0] != 0 )
        list_add( list, strdup( string ) );
    return list;
}

/*
====================================================================
This function reads in a whole file and converts it into a
PData tree struct. If an error occurs NULL is returned and 
parser_error is set.
====================================================================
*/
static int parser_read_file_full( FILE *file, PData *top )
{
    PData *sub = 0;
    char token[1024];
    /* parse file */
    while ( !feof( file ) ) {
        if ( ( sub = parser_parse_file( file ) ) != 0 )
            list_add( top->entries, sub );
        else
            return 0; 
        /* skip comments and whitespaces */
        if ( !file_read_token( file, PARSER_SYMBOLS, PARSER_SKIP_SYMBOLS, token ) ) {
            if ( token[0] != 0 )
                return 0;
            break;
        }
        else
            fseek( file, -strlen( token ), SEEK_CUR );
    }
    return 1;
}
static int parser_read_file_compact( PData *section )
{
    /* section is the parent pdata that needs some 
       entries */
    PData *pd = 0;
    char *line, *cur;
    while ( ( line = parser_get_next_line() ) ) {
        switch ( line[0] ) {
            case '>':
                /* this section is finished */
                return 1;
            case '<':
                /* add a whole subsection */
                pd = calloc( 1, sizeof( PData ) );
                pd->name = strdup( line + 1 );
                pd->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK );
                parser_read_file_compact( pd );
                /* add to section */
                list_add( section->entries, pd );
                break;
            default:
                /* read values as subsection */
                pd = calloc( 1, sizeof( PData ) );
                /* check name */
                if ( ( cur = strchr( line, '»' ) ) == 0 ) {
                    sprintf( parser_sub_error, "parse error: use '»' for assignment or '<' for section" );
                    return 0;
                }
                cur[0] = 0; cur++;
                pd->name = strdup( line );
                /* get values */
                pd->values = parser_explode_string( cur, '°' );
                /* add to section */
                list_add( section->entries, pd );
                break;
        }
    }
    return 1;
}
PData* parser_read_file( char *tree_name, char *fname )
{
    int size;
    char magic = 0;
    FILE *file = 0;
    PData *top = 0;
    /* open file */
    if ( ( file = fopen( fname, "r" ) ) == 0 ) {
        sprintf( parser_error, "%s: file not found", fname );
        return 0;
    }
    /* create top level pdata */
    top = calloc( 1, sizeof( PData ) );
    top->name = strdup( tree_name );
    top->entries = list_create( LIST_NO_AUTO_DELETE, LIST_NO_CALLBACK );
    /* parse */
    FILE_READCHAR( file, magic );
    if ( magic == '@' ) {
        /* get the whole contents -- 1 and CBUFFER_SIZE are switched */
        fseek( file, 0, SEEK_END ); size = ftell( file ) - 2;
        if ( size >= CBUFFER_SIZE ) {
            fprintf( stderr, "%s: file's too big to fit the compact buffer (128KB)\n", fname );
            size = CBUFFER_SIZE - 1;
        }
        fseek( file, 2, SEEK_SET );
        fread( cbuffer, 1, size, file );
        cbuffer[size] = 0;
        /* set indicator to beginning of text */
        cbuffer_pos = cbuffer;
        /* parse cbuffer */
        if ( !parser_read_file_compact( top ) ) {
            parser_set_parse_error( fname, file, parser_sub_error );
            goto failure;
        }
    }
    else {
        fseek( file, 0, SEEK_SET );
        if ( !parser_read_file_full( file, top ) ) {
            parser_set_parse_error( fname, file, parser_sub_error );
            goto failure;
        }
    }
    /* finalize */
    fclose( file );
    return top;
failure:
    fclose( file );
    parser_free( &top );
    return 0;
}

/*
====================================================================
This function frees a PData tree struct.
====================================================================
*/
void parser_free( PData **pdata )
{
    PData *entry = 0;
    if ( (*pdata) == 0 ) return;
    if ( (*pdata)->name ) free( (*pdata)->name );
    if ( (*pdata)->values ) list_delete( (*pdata)->values );
    if ( (*pdata)->entries ) {
        list_reset( (*pdata)->entries );
        while ( ( entry = list_next( (*pdata)->entries ) ) )
            parser_free( &entry );
        list_delete( (*pdata)->entries );
    }
    free( *pdata ); *pdata = 0;
}

/*
====================================================================
Functions to access a PData tree. 
'name' is the pass within tree 'pd' where subtrees are separated 
by '/' (e.g.: name = 'config/graphics/animations')
parser_get_pdata   : get pdata entry associated with 'name'
parser_get_entries : get list of subtrees (PData structs) in 'name'
parser_get_values  : get value list of 'name'
parser_get_value   : get a single value from value list of 'name'
parser_get_int     : get first value of 'name' converted to integer
parser_get_double  : get first value of 'name' converted to double
parser_get_string  : get first value of 'name' _duplicated_
If an error occurs result is set NULL, False is returned and
parse_error is set.
====================================================================
*/
int parser_get_pdata  ( PData *pd, char *name, PData  **result )
{
    int i, found;
    PData *pd_next = pd;
    PData *entry = 0;
    char *sub = 0;
    List *path = parser_explode_string( name, '/' );
    for ( i = 0, list_reset( path ); i < path->count; i++ ) {
        sub = list_next( path );
        if ( !pd_next->entries ) {
            sprintf( parser_sub_error, "%s: no subtrees", pd_next->name );
            goto failure;
        }
        list_reset( pd_next->entries ); found = 0;
        while ( ( entry = list_next( pd_next->entries ) ) )
            if ( strlen( entry->name ) == strlen( sub ) && !strncmp( entry->name, sub, strlen( sub ) ) ) {
                pd_next = entry;
                found = 1;
                break;
            }
        if ( !found ) {
            sprintf( parser_sub_error, "%s: subtree '%s' not found", pd_next->name, sub );
            goto failure;
        }
    }
    list_delete( path );
    *result = pd_next;
    return 1;
failure:
    sprintf( parser_error, "parser_get_pdata: %s/%s: %s", pd->name, name, parser_sub_error );
    list_delete( path );
    *result = 0;
    return 0;
}
int parser_get_entries( PData *pd, char *name, List   **result )
{
    PData *entry;
    *result = 0;
    if ( !parser_get_pdata( pd, name, &entry ) ) {
        sprintf( parser_sub_error, "parser_get_entries:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    if ( !entry->entries || entry->entries->count == 0 ) {
        sprintf( parser_error, "parser_get_entries: %s/%s: no subtrees", pd->name, name );
        return 0;
    }
    *result = entry->entries;
    return 1;
}
int parser_get_values ( PData *pd, char *name, List   **result )
{
    PData *entry;
    *result = 0;
    if ( !parser_get_pdata( pd, name, &entry ) ) {
        sprintf( parser_sub_error, "parser_get_values:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    if ( !entry->values || entry->values->count == 0 ) {
        sprintf( parser_error, "parser_get_values: %s/%s: no values", pd->name, name );
        return 0;
    }
    *result = entry->values;
    return 1;
}
int parser_get_value  ( PData *pd, char *name, char   **result, int index )
{
    List *values;
    if ( !parser_get_values( pd, name, &values ) ) {
        sprintf( parser_sub_error, "parser_get_value:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    if ( index >= values->count ) {
        sprintf( parser_error, "parser_get_value: %s/%s: index %i out of range (%i elements)", 
                 pd->name, name, index, values->count );
        return 0;
    }
    *result = list_get( values, index );
    return 1;
}
int parser_get_int    ( PData *pd, char *name, int     *result )
{
    char *value;
    if ( !parser_get_value( pd, name, &value, 0 ) ) {
        sprintf( parser_sub_error, "parser_get_int:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    *result = atoi( value );
    return 1;
}
int parser_get_double ( PData *pd, char *name, double  *result )
{
    char *value;
    if ( !parser_get_value( pd, name, &value, 0 ) ) {
        sprintf( parser_sub_error, "parser_get_double:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    *result = strtod( value, 0 );
    return 1;
}
int parser_get_string ( PData *pd, char *name, char   **result )
{
    char *value;
    if ( !parser_get_value( pd, name, &value, 0 ) ) {
        sprintf( parser_sub_error, "parser_get_string:\n %s", parser_error );
        strcpy( parser_error, parser_sub_error );
        return 0;
    }
    *result = strdup( value );
    return 1;
}

/*
====================================================================
If an error occurred you can query the reason with this function.
====================================================================
*/
char* parser_get_error( void )
{
    return parser_error;
}