This is an archival dump of old wiki content --- see scipy.org for current material.
Please see http://scipy-cookbook.readthedocs.org/

Attachment 'dbase.0.2.py'

Download

   1 from scipy import c_, arange
   2 from scipy.io import read_array
   3 from numpy.random import randn
   4 from pylab import plot, show, figure
   5 import pickle, csv, os
   6 
   7 class dbase:
   8 	"""
   9 	A simple data-frame, that reads and write csv/pickle files with variable names.
  10 	Columns in the data can be accessed using x.get('a','c') where 'a' and 'c' are
  11 	variable names.
  12 	"""
  13 	def __init__(self,f):
  14 		"""
  15 		Initializing the dbase class. Loading file f.
  16 		"""
  17 		self.load(f)
  18 		self.DBname = os.getcwd() + '/' + f
  19 
  20 	def load(self,fname):
  21 		"""
  22 		Loading data from a csv or a pickle file of the dbase class
  23 		"""
  24 		fext = self.__ext(fname)
  25 		f = open(fname,'r')
  26 		if fext == 'csv':
  27 			self.varnm = self.__vardic(f.readline().split(','))
  28 			self.data = read_array(f, separator=',', lines=(0,-1))
  29 		elif fext == 'pickle':
  30 			a = pickle.load(f)
  31 			self.varnm = a.varnm
  32 			self.data = a.data
  33 		else:
  34 			raise 'This class only works on csv and pickle files'
  35 		f.close()
  36 
  37 	def dump(self,fname):
  38 		"""
  39 		Dumping the instance of the class into a csv or pickle file
  40 		"""
  41 		fext = self.__ext(fname)
  42 		f = open(fname,'w')
  43 		if fext == 'csv':
  44 			writer = csv.writer(f)
  45 			writer.writerow(self.__sort_keys())
  46 			writer.writerows(self.data)
  47 		elif fext == 'pickle':
  48 			pickle.dump(self,f)
  49 		else:
  50 			raise 'This class only outputs csv or pickle files'
  51 		f.close()
  52 
  53 	def get(self,*var):
  54 		"""
  55 		Selecting a column based on variable labels. Assumes data are in columns.
  56 		"""
  57 
  58 		a = self.data[:,self.varnm[var[0]]]				# getting the data for the 1st element in self.data
  59 
  60 		for i in var[1:]:						
  61 			a = c_[a,self.data[:,self.varnm[i]]]		# concatenate column-wise, along last axis
  62 	
  63 		return a
  64 
  65 	def addvar(self,a,v):
  66 		"""
  67 		Adding columns of data
  68 		"""
  69 		self.data = c_[self.data,a]			# concatenation the data at end
  70 
  71 		j = max(self.varnm.values()) + 1	# starting index past max index
  72 		if isinstance(v,str): v = [v]
  73 		for i in v:						
  74 			self.varnm[i] = j
  75 			j += 1
  76 
  77 	def delvar(self,*v):
  78 		"""
  79 		Deleting columns of data
  80 		"""
  81 		# removing the variables listed 
  82 		for i in v:						
  83 			del self.varnm[i]
  84 
  85 		# index list for the remaining variables
  86 		index = self.varnm.values()
  87 		index.sort()
  88 
  89 		# selecting the remain columns
  90 		self.data = self.data[:,index]
  91 
  92 		# updating the index number 
  93 		self.varnm = self.__vardic(self.__sort_keys(range(len(index))))
  94 
  95 	def info(self,axis=0):
  96 		"""
  97 		Printing descriptive statistics on selected variables
  98 		"""
  99 		nobs = self.data.shape[axis]
 100 		nvar = len(self.varnm.keys())
 101 		min = self.data.min(axis)
 102 		max = self.data.max(axis)
 103 		mean = self.data.mean(axis)
 104 		std = self.data.std(axis)
 105 		vars = self.__sort_keys()
 106 		
 107 		print '\n=========================================================='
 108 		print '================== Database information =================='
 109 		print '==========================================================\n'
 110 
 111 		print '''file:			%s''' % b.DBname
 112 		print '''# obs:			%s''' % nobs
 113 		print '''# variables:	%s\n''' % nvar
 114 
 115 		print 'var			min			max			mean		std.dev'
 116 		print '=========================================================='
 117 		
 118 		for i in range(nvar):
 119 			print '''%s			%-5.2f		%-5.2f		%-5.2f		%-5.2f''' % tuple([vars[i],min[i],max[i],mean[i],std[i]]) 
 120 
 121 	def dataplot(self,var):
 122 		"""
 123 		Plotting the data with variable names
 124 		"""
 125 		a = self.get(var)
 126 
 127 		# plot a single column
 128 		title = "Plot of series " + var
 129 		ax = figure().add_axes([.1,.1,.8,.8])
 130 		ax.plot(a); 
 131 		ax.set_title(title)
 132 		show()
 133 
 134 	def __vardic(self,L):
 135 		"""
 136 		Making a dictionary with variable names and indices
 137 		"""
 138 		dic = {}; j = 0
 139 
 140 		# reading only the 1st line in the file and extracting variables names
 141 		# names are linked in the dictionary to their, and the data's, index
 142 		# making sure to strip leading and trailing white space
 143 		for i in L:
 144 			dic[i.strip()] = j
 145 			j += 1
 146 	
 147 		return dic
 148 
 149 	def __ext(self,fname):
 150 		"""
 151 		Finding the file extension of the filename passed to dbase
 152 		"""
 153 		return fname.split('.')[-1].strip()
 154 
 155 	def __sort_keys(self,v = []):
 156 		"""
 157 		Sorting the keys in the variable name dictionary so they are in the correct order
 158 		"""
 159 		k = self.varnm.keys()
 160 		if v == []: v = self.varnm.values()
 161 
 162 		return [k[i] for i in v]
 163 
 164 ########################
 165 ### Testing the class
 166 ########################
 167 
 168 if __name__ == '__main__':
 169 
 170 	# creating simulated data and variable labels
 171 	varnm = ['a','b','c']			# variable labels
 172 	data =	randn(5,3)				# the data array
 173 
 174 	# saving simulated data to a csv file
 175 	f = open('data.csv','w')
 176 	writer = csv.writer(f)
 177 	writer.writerow(varnm)
 178 	writer.writerows(data)
 179 	f.close()
 180 
 181 	# loading the data from the csv file and dumping the dbase class instance to a pickle file
 182 	a = dbase("data.csv")
 183 	a.dump("data.pickle")
 184 
 185 	# loading the object from the pickle file
 186 	print "\nLoading the dbase object from a pickle file\n"
 187 
 188 	b = dbase("data.pickle")
 189 
 190 	print "Data from dbase class\n", b.data
 191 	print "\nVariable names from dbase class\n", b.varnm
 192 	print "\nTwo columns selected using variable names\n", b.get('a','c')
 193 	print "\nSaving data and variable names to a different csv file\n", b.dump("data_dump.csv")
 194 
 195 	# making the database bigger
 196 	xtra1 = b.get('a') * b.get('b')
 197 	xtra2 = b.get('a') * b.get('c')
 198 	xtra = c_[xtra1,xtra2]
 199 	xtra_varnm = ('x1','x2')
 200 
 201 	b.addvar(xtra,xtra_varnm)
 202 	print "\nTwo columns added\n", b.data
 203 	print "\nTwo variable names added\n", b.varnm
 204 
 205 	# making the database smaller
 206 	b.delvar('a','x2')
 207 	print "\nTwo columns deleted\n", b.data
 208 	print "\nTwo variable names deleted\n", b.varnm
 209 
 210 	# getting the name of the file you are working on
 211 	print "\nWorking on file: " + b.DBname
 212 
 213 	# descriptive information on the database, or selected variables in the databse
 214 	b.info()
 215 
 216 	# plotting a series
 217 	b.dataplot('b')

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.