This is an archival dump of old wiki content --- see scipy.org for current material.
Please see http://scipy-cookbook.readthedocs.org/

Attachment 'ConfigNumParser_v0.1.py'

Download

   1 #! /usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 
   4 """
   5 This Module contains several convenient parsers and fonctions to easily parse
   6 formatted text into values usable with python of Numpy fonctions. 
   7 
   8 The main function is _ parseConfigFile _ which allow to read a text file
   9 structured in sections and convert it into a python object with the same
  10 structure as the file but where information is automatically converted into
  11 python or numpy objects 
  12 (see http://www.scipy.org/Cookbook/Reading_Custom_Text_Files_with_Pyparsing)
  13 
  14 
  15 == Parser definitions ==
  16 
  17  * number  : match string representing integers or floats and return the
  18              corresponding python object  
  19 
  20  * pyValue : match string representing number, None, True, False, NaN or
  21              quoted strings and return the corresponding python object
  22 
  23 
  24 == Parser generators == 
  25 
  26  * variableParser : create a parser to match variable names and return a cleaned version of it
  27 
  28  * paramParser    : create a parser to match a set of parameter definitions
  29 
  30  * tableColParser : create a parser to match a table, defined column by column
  31 
  32  * tableRowParser : create a parser to match a table, defined row by row
  33 
  34  * matrixParser   : create a parser to match a matrix
  35 
  36 See the corresponding docstring for more information and parseConfigFile for an
  37 example of utilisation.
  38 """
  39 from pyparsing import *
  40 from numpy     import array, NAN
  41 from re        import VERBOSE
  42 
  43 __version__ = '0.1'
  44 __all__     = '''number pyValue matrixParser tableRowParser
  45                  tableColParser paramParser variableParser
  46                  parseConfigFile
  47               '''.split()
  48 
  49 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  50 # Utility functions and definitions
  51 
  52 def variableParser(escapedChars, baseChars=alphanums):
  53     """ Return a parser matching any characters in baseChars separated by
  54     characters defined in escapedChars. Thoses characters are replaced with '_'
  55 
  56     The '_' character is therefore automatically in escapedChars.
  57     """
  58     escapeDef = Word(escapedChars + '_').setParseAction(replaceWith('_'))
  59     whitespaceChars = ''.join( x for x in ' \t\r' if not x in escapedChars )
  60     escapeDef = escapeDef.setWhitespaceChars(whitespaceChars)
  61     return Combine(Word(baseChars) + Optional(OneOrMore(escapeDef + Word(baseChars))))
  62 
  63 def convertNumber(t):
  64     """Convert a string matching a number to a python number"""
  65     if t.float1 or t.float2 or t.float3 : return [float(t[0])]
  66     else                                : return [int(t[0])  ]
  67 
  68 # number : match any number and return asscoiated python value
  69 number = Regex(r"""
  70         [+-]?                           # optional sign
  71          (
  72             (?:\d+(?P<float1>\.\d*)?)   # match 2 or 2.02
  73           |                             # or
  74             (?P<float2>\.\d+)           # match .02
  75          )
  76          (?P<float3>[Ee][+-]?\d+)?      # optional exponent
  77         """, flags=VERBOSE
  78         )
  79 number.setParseAction(convertNumber)
  80 
  81 pyValue_list = [ number                                                        , 
  82                  Keyword('True').setParseAction(replaceWith(True))             ,
  83                  Keyword('False').setParseAction(replaceWith(False))           ,
  84                  Keyword('NAN', caseless=True).setParseAction(replaceWith(NAN)),
  85                  Keyword('None').setParseAction(replaceWith(None))             ,
  86                  QuotedString('"""', multiline=True)                           , 
  87                  QuotedString("'''", multiline=True)                           , 
  88                  QuotedString('"')                                             , 
  89                  QuotedString("'")                                             , 
  90                ]
  91 
  92 # Common patterns
  93 EOL         = LineEnd().suppress()
  94 pyValue     = MatchFirst( e.setWhitespaceChars(' \t\r') for e in pyValue_list)
  95 unitDef     = Suppress('(') + Word(alphanums + '^*/-._') + Suppress(')')
  96 keyName     = variableParser(' _-./').setParseAction(downcaseTokens)
  97 keyNameWithoutSpace = variableParser('_-./').setParseAction(downcaseTokens)
  98 
  99 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 100 # Parameter Definition
 101 
 102 def paramParser(comment='#'):
 103     """ Create a pattern matching any definition of parameters with the form
 104 
 105         variable_name (unit) = value            (unit is optional)
 106 
 107     Variable names can have spaces in them or any characters in '_-./' but
 108     theses characters are replaced with '_' and the resulting variable name
 109     will be cast to lowercase.
 110 
 111     Value can be any standard python value (int, number, None, False, True, NaN
 112     or quoted strings) or a raw string, which can be multiline if additional
 113     lines start with a whitespace. 
 114 
 115     Return a Dict element to allow accessing data using the varible name as a key.
 116 
 117     This Dict has two special fields :
 118         names_ : the list of column names found 
 119         units_ : a dict in the form {key : unit}
 120     """
 121 
 122     def formatBloc(t):
 123         """ Format the result to have a list of (key, values) easily usable with Dict
 124 
 125         Add two fields :
 126             names_ : the list of column names found 
 127             units_ : a dict in the form {key : unit}
 128         """
 129         rows = []
 130 
 131         # store units and names
 132         units = {}
 133         names = [] 
 134 
 135         for row in t :
 136             rows.append(ParseResults([ row.name, row.value ]))
 137             names.append(row.name)
 138             if row.unit : units[row.name] = row.unit[0]
 139 
 140         rows.append( ParseResults([ 'names_', names ]))
 141         rows.append( ParseResults([ 'unit_',  units]))
 142 
 143         return rows
 144 
 145     # rawValue can be multiline but theses lines should start with a Whitespace
 146     rawLine    = CharsNotIn(comment + '\n') + (lineEnd | Suppress(comment+restOfLine))
 147     rawValue   = Combine( rawLine + ZeroOrMore(White(' \t').suppress()+ NotAny('[') + rawLine)) 
 148     rawValue.setParseAction(lambda t: [x.strip() for x in t])
 149 
 150     valueDef   = pyValue | rawValue
 151     paramDef  = keyName('name') + Optional(unitDef)('unit') + Suppress("="+empty) + valueDef('value')
 152     paramBloc = OneOrMore( Group(paramDef)).setParseAction(formatBloc)
 153     
 154     return Dict(paramBloc)
 155 
 156 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 157 # Table described in columns Definition
 158 
 159 def tableColParser():
 160     """ Define a pattern matching a table described in columns according to this schema : 
 161             Name_1       Name_2     ...      Name_n
 162             (unit_1)    (unit_2)    ...     (unit_n)
 163             value_11    value_21    ...     value_n1
 164               ...         ...       ...       ...
 165 
 166     Names can't contains any whitespaces.
 167     Units are mandatory.
 168 
 169     Value can be any standard python value (int, number, None, False, True, NaN
 170     or quoted strings) or a raw string which can't contains spaces or '['.
 171 
 172     Return a Dict element to allow accessing data using the column name as a key.
 173 
 174     This Dict has two special fields :
 175         names_ : the list of column names found 
 176         units_ : a dict in the form {key : unit}
 177     """
 178 
 179     def formatBloc(t):
 180         """ Format the result to have a list of (key, values) easily usable
 181         with Dict and transform data into array
 182 
 183         Add two fields :
 184             names_ : the list of column names found 
 185             units_ : a dict in the form {key : unit}
 186         """
 187         columns = []
 188 
 189         # store names and units names 
 190         names = t.header
 191         units   = {}
 192 
 193         transposedData = zip(*t.data)
 194         for header, unit, data in zip(t.header, t.unit, transposedData):
 195             units[header] = unit
 196             columns.append(ParseResults([header, array(data)]))
 197 
 198         columns.append(ParseResults(['names_', names]))
 199         columns.append(ParseResults(['unit_'   , units  ]))
 200 
 201         return columns
 202 
 203     def defineColNumber(t):
 204         """ Define unitLine and tabValueLine to match the same number of row than
 205         in header"""
 206         nbcols = len(t.header)
 207         unitLine     << Group( unitDef*nbcols + EOL)
 208         tabValueLine << Group( tabValueDef*nbcols + EOL)
 209 
 210     tabValueDef  = pyValue | CharsNotIn('[ \t\r\n').setWhitespaceChars(" \t")
 211     firstLine    = Group(OneOrMore(keyNameWithoutSpace)+EOL)
 212     unitLine     = Forward()
 213     tabValueLine = Forward()
 214 
 215     tableCol = (   firstLine('header').setParseAction(defineColNumber)
 216                     + unitLine('unit')
 217                     + Group(OneOrMore(tabValueLine))('data')
 218                   ).setParseAction(formatBloc)
 219 
 220     return Dict(tableCol)
 221 
 222 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 223 # Matrix parser
 224 def matrixParser(pattern=pyValue):
 225     """ Return a pattern matching a matrix containing only element matching pattern"""
 226 
 227     def formatBloc(t):
 228         'return an array object'
 229         return [array(t.asList())]
 230 
 231     def defineColNumber(t):
 232         """ define matrixLine to match the same number of col than t has """
 233         nbcols = len(t[0])
 234         matrixLine << Group( pattern*nbcols + EOL)
 235 
 236     firstLine  = Group( OneOrMore(pattern) + EOL).setParseAction(defineColNumber)
 237     matrixLine = Forward()
 238     matrixDef  = (firstLine + OneOrMore(matrixLine)).setParseAction(formatBloc)
 239 
 240     return matrixDef
 241 
 242 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 243 def tableRowParser():
 244     """ Define a pattern matching a table described in row following the schema :
 245     
 246         Name_1 (unit)   value_11     value_12   ... value_1n
 247         Name_2 (unit)   value_21     value_22   ... value_2n
 248          ...     ...        ...      ...        ...     ...
 249 
 250     Units are optional.
 251     Name can contains spaces if theyt are followed by an unit, otherwise, they can't.
 252 
 253     Value can be any standard python value (int, number, None, False, True, NaN
 254     or quoted strings) or a raw string which can't contains spaces or '['.
 255 
 256     Return a Dict element to allow accessing data using the column name as a key.
 257 
 258     This Dict has two special fields :
 259         names_ : the list of row names found 
 260         units_ : a dict in the form {key : unit}
 261     """
 262 
 263     def formatBloc(t):
 264         """ Format the result to have a list of (key, values) easily usable with Dict
 265         and transform values into array
 266 
 267         Add two fields :
 268             names_ : the list of row names found 
 269             units_ : a dict in the form {key : unit}
 270         """
 271         rows = []
 272 
 273         # store units and names
 274         units = {}
 275         names = [] 
 276 
 277         for row in t :
 278             rows.append(ParseResults([ row.header, array(tuple(row.value)) ]))
 279             names.append(row.header)
 280             if row.unit : units[row.header] = row.unit[0]
 281 
 282         rows.append( ParseResults([ 'names_', names ]))
 283         rows.append( ParseResults([ 'unit_',  units]))
 284 
 285         return rows
 286 
 287     def defineColNumber(t):
 288         """ Define unitLine and tabValueLine to match the same number of columns than
 289         the first line had"""
 290         nbcols = len(t[0].value)
 291         tabValueLine << Group(rowHeader + Group(tabValueDef*nbcols)('value') + EOL)
 292 
 293     # Table described in rows
 294     tabValueDef  = pyValue | CharsNotIn('[ \t\r\n').setWhitespaceChars(" \t")
 295     rowHeader    = (keyName("header") + unitDef('unit')) | keyNameWithoutSpace('header') 
 296     firstLine    = Group(rowHeader + Group(OneOrMore(tabValueDef))('value') + EOL).setParseAction(defineColNumber)
 297     tabValueLine = Forward()
 298 
 299     tableRowDef = (firstLine + OneOrMore(tabValueLine)).setParseAction(formatBloc)
 300     
 301     return Dict(tableRowDef)
 302 
 303 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 304 
 305 def test_pattern(pattern, fname='data.txt'):
 306     """ A simple function to test a ParserElement"""
 307     for r, s, t, in pattern.scanString(file(fname).read()):
 308         print 'found : ', r
 309 
 310 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 311 # section parser
 312 def parseConfigFile(fname):
 313     """ Parse a file structured in section according to the schema
 314             [ section Name ]
 315                 <bloc> 
 316                 ....
 317 
 318     where <bloc> can be any text matching one of elements created by :
 319 
 320      * paramParser    <=> a set of variable definitions 
 321      * tableColParser <=> a table defined column by column
 322      * tableRowParser <=> a table defined row by row
 323      * matrixParser   <=> a Matrix containg only python values or NaN
 324 
 325     Any text after the character # is considered as a comment and is ignored
 326 
 327     Return a Dict element to allow accessing bloc using the section name as a key.
 328     """
 329 
 330     # Creation of the parser
 331     sectionDef = Suppress('[') + keyName + Suppress(']')
 332 
 333     parser = Dict( OneOrMore( 
 334                 Group( 
 335                     sectionDef + (
 336                       paramParser()
 337                     | tableColParser()
 338                     | tableRowParser()
 339                     | matrixParser()
 340                 )
 341     )   )   )
 342 
 343     parser.ignore('#' + restOfLine)
 344 
 345     # parse file
 346     try : 
 347        return parser.parseFile(fname, parseAll=True) 
 348 
 349     except ParseException, pe:
 350         # complete the error message
 351         msg  = "ERROR during parsing of %s,  line %d:" % (fname, pe.lineno)
 352         msg += '\n' + '-'*40 + '\n'
 353         msg += pe.line + '\n'
 354         msg += ' '*(pe.col-1) + '^\n'
 355         msg += '-'*40 + '\n' + pe.msg
 356         pe.msg = msg
 357         raise
 358 
 359 if __name__ == '__main__' :
 360     
 361     from sys    import argv
 362     from pprint import pprint
 363 
 364     if len(argv) < 2 : fname = 'data.txt'
 365     else             : fname = argv[1]
 366 
 367     data = parseConfigFile(fname)
 368     pprint(data.asList())
 369 
 370 # vim: set et sts=4 sw=4:

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.