Attachment 'dbase.0.2.py'
Download
Toggle line numbers
1 from scipy import c_, arange
2 from scipy.io import read_array
3 from numpy.random import randn
4 from pylab import plot, show, figure
5 import pickle, csv, os
6
7 class dbase:
8 """
9 A simple data-frame, that reads and write csv/pickle files with variable names.
10 Columns in the data can be accessed using x.get('a','c') where 'a' and 'c' are
11 variable names.
12 """
13 def __init__(self,f):
14 """
15 Initializing the dbase class. Loading file f.
16 """
17 self.load(f)
18 self.DBname = os.getcwd() + '/' + f
19
20 def load(self,fname):
21 """
22 Loading data from a csv or a pickle file of the dbase class
23 """
24 fext = self.__ext(fname)
25 f = open(fname,'r')
26 if fext == 'csv':
27 self.varnm = self.__vardic(f.readline().split(','))
28 self.data = read_array(f, separator=',', lines=(0,-1))
29 elif fext == 'pickle':
30 a = pickle.load(f)
31 self.varnm = a.varnm
32 self.data = a.data
33 else:
34 raise 'This class only works on csv and pickle files'
35 f.close()
36
37 def dump(self,fname):
38 """
39 Dumping the instance of the class into a csv or pickle file
40 """
41 fext = self.__ext(fname)
42 f = open(fname,'w')
43 if fext == 'csv':
44 writer = csv.writer(f)
45 writer.writerow(self.__sort_keys())
46 writer.writerows(self.data)
47 elif fext == 'pickle':
48 pickle.dump(self,f)
49 else:
50 raise 'This class only outputs csv or pickle files'
51 f.close()
52
53 def get(self,*var):
54 """
55 Selecting a column based on variable labels. Assumes data are in columns.
56 """
57
58 a = self.data[:,self.varnm[var[0]]] # getting the data for the 1st element in self.data
59
60 for i in var[1:]:
61 a = c_[a,self.data[:,self.varnm[i]]] # concatenate column-wise, along last axis
62
63 return a
64
65 def addvar(self,a,v):
66 """
67 Adding columns of data
68 """
69 self.data = c_[self.data,a] # concatenation the data at end
70
71 j = max(self.varnm.values()) + 1 # starting index past max index
72 if isinstance(v,str): v = [v]
73 for i in v:
74 self.varnm[i] = j
75 j += 1
76
77 def delvar(self,*v):
78 """
79 Deleting columns of data
80 """
81 # removing the variables listed
82 for i in v:
83 del self.varnm[i]
84
85 # index list for the remaining variables
86 index = self.varnm.values()
87 index.sort()
88
89 # selecting the remain columns
90 self.data = self.data[:,index]
91
92 # updating the index number
93 self.varnm = self.__vardic(self.__sort_keys(range(len(index))))
94
95 def info(self,axis=0):
96 """
97 Printing descriptive statistics on selected variables
98 """
99 nobs = self.data.shape[axis]
100 nvar = len(self.varnm.keys())
101 min = self.data.min(axis)
102 max = self.data.max(axis)
103 mean = self.data.mean(axis)
104 std = self.data.std(axis)
105 vars = self.__sort_keys()
106
107 print '\n=========================================================='
108 print '================== Database information =================='
109 print '==========================================================\n'
110
111 print '''file: %s''' % b.DBname
112 print '''# obs: %s''' % nobs
113 print '''# variables: %s\n''' % nvar
114
115 print 'var min max mean std.dev'
116 print '=========================================================='
117
118 for i in range(nvar):
119 print '''%s %-5.2f %-5.2f %-5.2f %-5.2f''' % tuple([vars[i],min[i],max[i],mean[i],std[i]])
120
121 def dataplot(self,var):
122 """
123 Plotting the data with variable names
124 """
125 a = self.get(var)
126
127 # plot a single column
128 title = "Plot of series " + var
129 ax = figure().add_axes([.1,.1,.8,.8])
130 ax.plot(a);
131 ax.set_title(title)
132 show()
133
134 def __vardic(self,L):
135 """
136 Making a dictionary with variable names and indices
137 """
138 dic = {}; j = 0
139
140 # reading only the 1st line in the file and extracting variables names
141 # names are linked in the dictionary to their, and the data's, index
142 # making sure to strip leading and trailing white space
143 for i in L:
144 dic[i.strip()] = j
145 j += 1
146
147 return dic
148
149 def __ext(self,fname):
150 """
151 Finding the file extension of the filename passed to dbase
152 """
153 return fname.split('.')[-1].strip()
154
155 def __sort_keys(self,v = []):
156 """
157 Sorting the keys in the variable name dictionary so they are in the correct order
158 """
159 k = self.varnm.keys()
160 if v == []: v = self.varnm.values()
161
162 return [k[i] for i in v]
163
164 ########################
165 ### Testing the class
166 ########################
167
168 if __name__ == '__main__':
169
170 # creating simulated data and variable labels
171 varnm = ['a','b','c'] # variable labels
172 data = randn(5,3) # the data array
173
174 # saving simulated data to a csv file
175 f = open('data.csv','w')
176 writer = csv.writer(f)
177 writer.writerow(varnm)
178 writer.writerows(data)
179 f.close()
180
181 # loading the data from the csv file and dumping the dbase class instance to a pickle file
182 a = dbase("data.csv")
183 a.dump("data.pickle")
184
185 # loading the object from the pickle file
186 print "\nLoading the dbase object from a pickle file\n"
187
188 b = dbase("data.pickle")
189
190 print "Data from dbase class\n", b.data
191 print "\nVariable names from dbase class\n", b.varnm
192 print "\nTwo columns selected using variable names\n", b.get('a','c')
193 print "\nSaving data and variable names to a different csv file\n", b.dump("data_dump.csv")
194
195 # making the database bigger
196 xtra1 = b.get('a') * b.get('b')
197 xtra2 = b.get('a') * b.get('c')
198 xtra = c_[xtra1,xtra2]
199 xtra_varnm = ('x1','x2')
200
201 b.addvar(xtra,xtra_varnm)
202 print "\nTwo columns added\n", b.data
203 print "\nTwo variable names added\n", b.varnm
204
205 # making the database smaller
206 b.delvar('a','x2')
207 print "\nTwo columns deleted\n", b.data
208 print "\nTwo variable names deleted\n", b.varnm
209
210 # getting the name of the file you are working on
211 print "\nWorking on file: " + b.DBname
212
213 # descriptive information on the database, or selected variables in the databse
214 b.info()
215
216 # plotting a series
217 b.dataplot('b')
New Attachment
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.- [get | view] (2007-01-14 23:35:05, 4.5 KB) [[attachment:data.0.3.csv]]
- [get | view] (2007-01-07 18:13:31, 4.0 KB) [[attachment:data.csv]]
- [get | view] (2007-01-07 18:00:31, 16.1 KB) [[attachment:dbase.0.1.py]]
- [get | view] (2007-01-14 22:55:13, 5.6 KB) [[attachment:dbase.0.2.py]]
- [get | view] (2007-01-14 23:31:35, 18.1 KB) [[attachment:dbase.0.3.py]]
- [get | view] (2007-01-14 23:51:16, 18.4 KB) [[attachment:dbase.0.4.py]]
- [get | view] (2007-01-14 23:54:23, 18.4 KB) [[attachment:dbase.0.5.py]]
- [get | view] (2007-01-15 06:33:06, 18.4 KB) [[attachment:dbase.0.6.py]]
- [get | view] (2007-01-19 05:53:14, 19.1 KB) [[attachment:dbase.0.7.py]]
- [get | view] (2007-01-07 07:50:10, 15.8 KB) [[attachment:dbase.py]]
- [get | view] (2007-01-07 07:52:21, 7.6 KB) [[attachment:dbase.pydoc]]
- [get | view] (2007-01-07 18:01:44, 7.9 KB) [[attachment:dbase_pydoc.0.1.txt]]
- [get | view] (2007-01-14 22:56:04, 8.2 KB) [[attachment:dbase_pydoc.0.2.txt]]
- [get | view] (2007-01-07 18:02:50, 27.7 KB) [[attachment:ex_plot.0.1.png]]
- [get | view] (2007-01-07 08:01:04, 87.7 KB) [[attachment:ex_plot.png]]
- [get | view] (2007-01-07 08:03:54, 28.2 KB) [[attachment:ex_plot1.png]]
- [get | view] (2007-01-07 07:57:21, 895.9 KB) [[attachment:example_plot.png]]
- [get | view] (2007-01-07 07:51:51, 34.3 KB) [[attachment:pydoc]]