This is an archival dump of old wiki content --- see for current material.
Please see

Attachment ''


   1 ## MetaArray
   2 ## Luke Campagnola, 2008
   3 ##
   4 ## Class for storing n-dimensional data sets with per-axis meta information
   5 ## Free for any kind of use.
   8 from numpy import ndarray, array, empty, fromstring, arange
   9 import types, copy
  12 def axis(name=None, cols=None, values=None, units=None):
  13   """Convenience function for generating axis descriptions when defining MetaArrays
  15   Example:
  16     MetaArray([...], info=[
  17       axis('Time', values=[0.0, 0.1, 0.2, 0.3], units='s'), 
  18       axis('Signal', cols=[('V0', 'V', 'Voltage0'), ('V1', 'V', 'Voltage1'), ('I0', 'A', 'Current0')])
  19     ])
  20   """
  21   ax = {}
  22   cNameOrder = ['name', 'units', 'title']
  23   if name is not None:
  24     ax['name'] = name
  25   if values is not None:
  26     ax['values'] = values
  27   if units is not None:
  28     ax['units'] = units
  29   if cols is not None:
  30     ax['cols'] = []
  31     for c in cols:
  32       if type(c) != types.ListType and type(c) != types.TupleType:
  33         c = [c]
  34       col = {}
  35       for i in range(0,len(c)):
  36         col[cNameOrder[i]] = c[i]
  37       ax['cols'].append(col)
  38   return ax
  41 class MetaArray(ndarray):
  42   """N-dimensional array with meta data such as axis titles, units, and column names.
  44   May be initialized with a file name, a tuple representing the dimensions of the array,
  45   or any arguments that could be passed on to numpy.array()
  47   The info argument sets the metadata for the entire array. It is composed of a list
  48   of axis descriptions where each axis may have a name, title, units, and a list of column 
  49   descriptions. An additional dict at the end of the axis list may specify parameters
  50   that apply to values in the entire array.
  52   For example:
  53     A 2D array of altitude values for a topographical map might look like
  54       info=[
  55         {'name': 'lat', 'title': 'Lattitude'}, 
  56         {'name': 'lon', 'title': 'Longitude'}, 
  57         {'title': 'Altitude', 'units': 'm'}
  58       ]
  59     In this case, every value in the array represents the altitude in feet at the lat, lon
  60     position represented by the array index. All of the following return the 
  61     value at lat=10, lon=5:
  62       array[10, 5]
  63       array['lon':5, 'lat':10]
  64       array['lat':10][5]
  65     Now suppose we want to combine this data with another array of equal dimensions that
  66     represents the average rainfall for each location. We could easily store these as two 
  67     separate arrays or combine them into a 3D array with this description:
  68       info=[
  69         {'name': 'vals', 'cols': [
  70           {'name': 'altitude', 'units': 'm'}, 
  71           {'name': 'rainfall', 'units': 'cm/year'}
  72         ]},
  73         {'name': 'lat', 'title': 'Lattitude'}, 
  74         {'name': 'lon', 'title': 'Longitude'}
  75       ]
  76     We can now access the altitude values with array[0] or array['altitude'], and the
  77     rainfall values with array[1] or array['rainfall']. All of the following return
  78     the rainfall value at lat=10, lon=5:
  79       array[1, 10, 5]
  80       array['lon':5, 'lat':10, 'val': 'rainfall']
  81       array['rainfall', 'lon':5, 'lat':10]
  82     Notice that in the second example, there is no need for an extra (4th) axis description
  83     since the actual values are described (name and units) in the column info for the first axis.
  84   """
  86   def __new__(subtype, data=None, file=None, info=None, dtype=None, copy=False):
  87     if data is not None:
  88       if type(data) is types.TupleType:
  89         subarr = empty(data, dtype=dtype)
  90       else:
  91         subarr = array(data, dtype=dtype, copy=copy)
  92       subarr = subarr.view(subtype)
  94       if info is not None:
  95         try:
  96           info = list(info)
  97         except:
  98           raise Exception("Info must be a list of axis specifications")
  99         if len(info) < subarr.ndim+1:
 100           info.extend([{}]*(subarr.ndim+1-len(info)))
 101         elif len(info) > subarr.ndim+1:
 102           raise Exception("Info parameter must be list of length ndim+1 or less.")
 103         for i in range(0,len(info)):
 104           if type(info[i]) != types.DictType:
 105             if info[i] is None:
 106               info[i] = {}
 107             else:
 108               raise Exception("Axis specification must be Dict or None")
 109           if info[i].has_key('values'):
 110             if type(info[i]['values']) is types.ListType:
 111               info[i]['values'] = array(info[i]['values'])
 112             elif type(info[i]['values']) is not ndarray:
 113               raise Exception("Axis values must be specified as list or ndarray")
 114         subarr._info = info
 115       elif hasattr(data, '_info'):
 116         subarr._info = data._info
 118     elif file is not None:
 119       fd = open(file, 'r')
 120       meta = ''
 121       while True:
 122         line = fd.readline().strip()
 123         if line == '':
 124           break
 125         meta += line
 126       meta = eval(meta)
 128       ## read in axis values
 129       for ax in meta['info']:
 130         if ax.has_key('values_len'):
 131           ax['values'] = fromstring(['values_len']), dtype=ax['values_type'])
 132           del ax['values_len']
 133           del ax['values_type']
 135       subarr = fromstring(, dtype=meta['type'])
 136       subarr = subarr.view(subtype)
 137       subarr.shape = meta['shape']
 138       subarr._info = meta['info']
 140     # Finally, we must return the newly created object:
 141     return subarr
 144   def __array_finalize__(self,obj):
 145     # We use the getattr method to set a default if 'obj' doesn't have the 'info' attribute
 146     self._info = getattr(obj, 'info', [{}]*(obj.ndim+1))
 147     self._infoOwned = False  ## Do not make changes to _info until it is copied at least once
 149     # We could have checked first whether self._info was already defined:
 150     #if not hasattr(self, 'info'):
 151     #    self._info = getattr(obj, 'info', {})
 154   def __getitem__(self, ind):
 155     nInd = self._interpretIndexes(ind)
 156     a = ndarray.__getitem__(self, nInd)
 157     if type(a) == type(self):  ## generate new info array
 158       a._info = []
 159       for i in range(0, len(nInd)):   ## iterate over all axes
 160         if type(nInd[i]) == types.SliceType or type(nInd[i]) == types.ListType:  ## If the axis is sliced, keep the info but chop if necessary
 161           a._info.append(self._axisSlice(i, nInd[i]))
 162       a._info.append(self._info[-1])   ## Tack on extra data
 163     return a
 166   def __setitem__(self, ind, val):
 167     nInd = self._interpretIndexes(ind)
 168     return ndarray.__setitem__(self, nInd, val)
 170   def axisValues(self, axis):
 171     """Return the list of values for an axis"""
 172     ax = self._interpretAxis(axis)
 173     if self._info[ax].has_key('values'):
 174       return self._info[ax]['values']
 175     else:
 176       raise Exception('Array axis %s (%d) has no associated values.' % (str(axis), ax))
 178   def xvals(self, axis):
 179     """Synonym for axisValues()"""
 180     return self.axisValues(axis)
 182   def axisUnits(self, axis):
 183     """Return the units for axis"""
 184     ax = self._info[self._interpretAxis(axis)]
 185     if ax.has_key('units'):
 186       return ax['units']
 188   def columnUnits(self, axis, column):
 189     """Return the units for column in axis"""
 190     ax = self._info[self._interpretAxis(axis)]
 191     if ax.has_key('cols'):
 192       for c in ax['cols']:
 193         if c['name'] == column:
 194           return c['units']
 195       raise Exception("Axis %s has no column named %s" % (str(axis), str(column)))
 196     else:
 197       raise Exception("Axis %s has no column definitions" % str(axis))
 199   def rowsort(self, axis, key=0):
 200     """Return this object with all records sorted along axis using key as the index to the values to compare. Does not yet modify meta info."""
 201     ## make sure _info is copied locally before modifying it!
 203     keyList = self[key]
 204     order = keyList.argsort()
 205     if type(axis) == types.IntType:
 206       ind = [slice(None)]*axis
 207       ind.append(order)
 208     elif type(axis) == types.StringType:
 209       ind = (slice(axis, order),)
 210     return self[tuple(ind)]
 212   def append(self, val, axis):
 213     """Return this object with val appended along axis. Does not yet combine meta info."""
 214     ## make sure _info is copied locally before modifying it!
 216     s = list(self.shape)
 217     axis = self._interpretAxis(axis)
 218     s[axis] += 1
 219     n = MetaArray(tuple(s), info=self._info, dtype=self.dtype)
 220     ind = [slice(None)]*self.ndim
 221     ind[axis] = slice(None,-1)
 222     n[tuple(ind)] = self
 223     ind[axis] = -1
 224     n[tuple(ind)] = val
 225     return n
 227   def extend(self, val, axis):
 228     """Return the concatenation along axis of this object and val. Does not yet combine meta info."""
 229     ## make sure _info is copied locally before modifying it!
 231     axis = self._interpretAxis(axis)
 232     return MetaArray(concatenate(self, val, axis), info=self._info)
 234   def infoCopy(self):
 235     """Return a deep copy of the axis meta info for this object"""
 236     return copy.deepcopy(self._info)
 239   def write(self, fileName):
 240     """Write this object to a file. The object can be restored by calling MetaArray(file=fileName)"""
 242     meta = { 'shape': self.shape, 'type': str(self.dtype), 'info': self.infoCopy()}
 243     axstrs = []
 244     for ax in meta['info']:
 245       if ax.has_key('values'):
 246         axstrs.append(ax['values'].tostring())
 247         ax['values_len'] = len(axstrs[-1])
 248         ax['values_type'] = str(ax['values'].dtype)
 249         del ax['values']
 250     fd = open(fileName, 'w')
 251     fd.write(str(meta) + '\n\n')
 252     for ax in axstrs:
 253       fd.write(ax)
 254     fd.write(self.tostring())
 255     fd.close()
 257   def _interpretIndexes(self, ind):
 258     if type(ind) != types.TupleType:
 259       ind = (ind,)
 260     nInd = [slice(None)]*self.ndim
 261     numOk = True  ## Named indices not started yet; numbered sill ok
 262     for i in range(0,len(ind)):
 263       (axis, index, isNamed) = self._interpretIndex(ind[i], i, numOk)
 264       nInd[axis] = index
 265       if isNamed:
 266         numOk = False
 267     return tuple(nInd)
 269   def _interpretAxis(self, axis):
 270     if type(axis) == types.StringType:
 271       return self._getAxis(axis)
 272     else:
 273       return axis
 275   def _interpretIndex(self, ind, pos, numOk):
 276     if type(ind) == types.StringType:
 277       if not numOk:
 278         raise Exception("string and integer indexes may not follow named indexes")
 279       return (pos, self._getIndex(pos, ind), False)
 280     elif type(ind) == types.SliceType:
 281       if type(ind.start) == types.StringType or type(ind.stop) == types.StringType:  ## Not an actual slice!
 282         axis = self._interpretAxis(ind.start)
 283         #if type(ind.start) == types.StringType:
 284           #axis = self._getAxis(ind.start)
 285         #else:
 286           #axis = ind.start
 287         if type(ind.stop) == types.StringType:
 288           index = self._getIndex(axis, ind.stop)
 289         else:
 290           index = ind.stop
 291         return (axis, index, True)
 292       else:
 293         return (pos, ind, False)
 294     elif type(ind) == types.ListType:
 295       indList = [self._interpretIndex(i, pos, numOk)[1] for i in ind]
 296       return (pos, indList, False)
 297     else:
 298       if not numOk:
 299         raise Exception("string and integer indexes may not follow named indexes")
 300       return (pos, ind, False)
 302   def _getAxis(self, name):
 303     for i in range(0, len(self._info)):
 304       axis = self._info[i]
 305       if axis.has_key('name') and axis['name'] == name:
 306         return i
 307     raise Exception("No axis named %s.\n  info=%s" % (name, self._info))
 309   def _getIndex(self, axis, name):
 310     ax = self._info[axis]
 311     if ax is not None and ax.has_key('cols'):
 312       for i in range(0, len(ax['cols'])):
 313         if ax['cols'][i].has_key('name') and ax['cols'][i]['name'] == name:
 314           return i
 315     raise Exception("Axis %d has no column named %s.\n  info=%s" % (axis, name, self._info))
 317   def _axisCopy(self, i):
 318     return copy.deepcopy(self._info[i])
 320   def _axisSlice(self, i, cols):
 321     if self._info[i].has_key('cols') or self._info[i].has_key('values'):
 322       ax = self._axisCopy(i)
 323       if type(cols) == types.SliceType:
 324         if ax.has_key('cols'):
 325           ax['cols'] = ax['cols'][cols]
 326         if ax.has_key('values'):
 327           ax['values'] = ax['values'][cols]
 328       if type(cols) == types.ListType:
 329         if ax.has_key('cols'):
 330           ax['cols'] = [ax['cols'][i] for i in cols]
 331         if ax.has_key('values'):
 332           ax['values'] = [ax['values'][i] for i in cols]
 333     else:
 334       ax = self._info[i]
 335     return ax
 337   def __repr__(self):
 338     return "%s\n  axis info: %s" % (ndarray.__repr__(self), str(self._info))
 340   def __str__(self):
 341     return self.__repr__()

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.