Attachment 'ols.0.1.py'
Download
Toggle line numbers
1 from __future__ import division
2 from scipy import c_, ones, dot, stats, diff
3 from scipy.linalg import inv, solve, det
4 from numpy import log, pi, sqrt, square, diagonal
5 from numpy.random import randn, seed
6 import time
7
8 class ols:
9 """
10 Author: Vincent Nijs (+ ?)
11
12 Email: v-nijs at kellogg.northwestern.edu
13
14 Last Modified: Mon Jan 15 17:56:17 CST 2007
15
16 Dependencies: See import statement at the top of this file
17
18 Doc: Class for multi-variate regression using OLS
19
20 For usage examples of other class methods see the class tests at the bottom of this file. To see the class in action
21 simply run this file using 'python ols.py'. This will generate some simulated data and run various analyses. If you have rpy installed
22 the same model will also be estimated by R for confirmation.
23
24 Input:
25 y = dependent variable
26 y_varnm = string with the variable label for y
27 x = independent variables, note that a constant is added by default
28 x_varnm = string or list of variable labels for the independent variables
29
30 Output:
31 There are no values returned by the class. Summary provides printed output.
32 All other measures can be accessed as follows:
33
34 Step 1: Create an OLS instance by passing data to the class
35
36 m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
37
38 Step 2: Get specific metrics
39
40 To print the coefficients:
41 >>> print m.b
42 To print the coefficients p-values:
43 >>> print m.p
44
45 """
46
47 def __init__(self,y,x,y_varnm = 'y',x_varnm = ''):
48 """
49 Initializing the ols class.
50 """
51 self.y = y
52 self.x = c_[ones(x.shape[0]),x]
53 self.y_varnm = y_varnm
54 if not isinstance(x_varnm,list):
55 self.x_varnm = ['const'] + list(x_varnm)
56 else:
57 self.x_varnm = ['const'] + x_varnm
58
59 # Estimate model using OLS
60 self.estimate()
61
62 def estimate(self):
63
64 # estimating coefficients, and basic stats
65 self.inv_xx = inv(dot(self.x.T,self.x))
66 xy = dot(self.x.T,self.y)
67 self.b = dot(self.inv_xx,xy) # estimate coefficients
68
69 self.nobs = self.y.shape[0] # number of observations
70 self.ncoef = self.x.shape[1] # number of coef.
71 self.df_e = self.nobs - self.ncoef # degrees of freedom, error
72 self.df_r = self.ncoef - 1 # degrees of freedom, regression
73
74 self.e = self.y - dot(self.x,self.b) # residuals
75 self.sse = dot(self.e,self.e)/self.df_e # SSE
76 self.se = sqrt(diagonal(self.sse*self.inv_xx)) # coef. standard errors
77 self.t = self.b / self.se # coef. t-statistics
78 self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2 # coef. p-values
79
80 self.R2 = 1 - self.e.var()/self.y.var() # model R-squared
81 self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef)) # adjusted R-square
82
83 self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e) # model F-statistic
84 self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e) # F-statistic p-value
85
86 def dw(self):
87 """
88 Calculates the Durbin-Waston statistic
89 """
90 de = diff(self.e,1)
91 dw = dot(de,de) / dot(self.e,self.e);
92
93 return dw
94
95 def omni(self):
96 """
97 Omnibus test for normality
98 """
99 return stats.normaltest(self.e)
100
101 def JB(self):
102 """
103 Calculate residual skewness, kurtosis, and do the JB test for normality
104 """
105
106 # Calculate residual skewness and kurtosis
107 skew = stats.skew(self.e)
108 kurtosis = 3 + stats.kurtosis(self.e)
109
110 # Calculate the Jarque-Bera test for normality
111 JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
112 JBpv = 1-stats.chi2.cdf(JB,2);
113
114 return JB, JBpv, kurtosis, skew
115
116 def ll(self):
117 """
118 Calculate model log-likelihood and two information criteria
119 """
120
121 # Model log-likelihood, AIC, and BIC criterion values
122 ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
123 aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
124 bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
125
126 return ll, aic, bic
127
128 def summary(self):
129 """
130 Printing model output to screen
131 """
132
133 # local time & date
134 t = time.localtime()
135
136 # extra stats
137 ll, aic, bic = self.ll()
138 JB, JBpv, skew, kurtosis = self.JB()
139 omni, omnipv = self.omni()
140
141 # printing output to screen
142 print '\n=============================================================================='
143 print "Dependent Variable: " + self.y_varnm
144 print "Method: Least Squares"
145 print "Date: ", time.strftime("%a, %d %b %Y",t)
146 print "Time: ", time.strftime("%H:%M:%S",t)
147 print '# obs: %5.0f' % self.nobs
148 print '# variables: %5.0f' % self.ncoef
149 print '=============================================================================='
150 print 'variable coefficient std. Error t-statistic prob.'
151 print '=============================================================================='
152 for i in range(len(self.x_varnm)):
153 print '''% -5s % -5.6f % -5.6f % -5.6f % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]])
154 print '=============================================================================='
155 print 'Models stats Residual stats'
156 print '=============================================================================='
157 print 'R-squared % -5.6f Durbin-Watson stat % -5.6f' % tuple([self.R2, self.dw()])
158 print 'Adjusted R-squared % -5.6f Omnibus stat % -5.6f' % tuple([self.R2adj, omni])
159 print 'F-statistic % -5.6f Prob(Omnibus stat) % -5.6f' % tuple([self.F, omnipv])
160 print 'Prob (F-statistic) % -5.6f JB stat % -5.6f' % tuple([self.Fpv, JB])
161 print 'Log likelihood % -5.6f Prob(JB) % -5.6f' % tuple([ll, JBpv])
162 print 'AIC criterion % -5.6f Skew % -5.6f' % tuple([aic, skew])
163 print 'BIC criterion % -5.6f Kurtosis % -5.6f' % tuple([bic, kurtosis])
164 print '=============================================================================='
165
166 if __name__ == '__main__':
167
168 ##########################
169 ### testing the ols class
170 ##########################
171
172 # creating simulated data and variable labels
173 seed(1)
174 data = randn(100,5) # the data array
175
176 # intercept is added, by default
177 m = ols(data[:,0],data[:,1:],y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
178 m.summary()
179
180 # if you have rpy installed, use it to test the results
181 have_rpy = False
182 try:
183 print "\n"
184 print "="*30
185 print "Validating OLS results in R"
186 print "="*30
187
188 import rpy
189 have_rpy = True
190 except ImportError:
191 print "\n"
192 print "="*30
193 print "Validating OLS-class results in R"
194 print "="*30
195 print "rpy is not installed"
196 print "="*30
197
198 if have_rpy:
199 y = data[:,0]
200 x1 = data[:,1]
201 x2 = data[:,2]
202 x3 = data[:,3]
203 x4 = data[:,4]
204 rpy.set_default_mode(rpy.NO_CONVERSION)
205 linear_model = rpy.r.lm(rpy.r("y ~ x1 + x2 + x3 + x4"), data = rpy.r.data_frame(x1=x1,x2=x2,x3=x3,x4=x4,y=y))
206 rpy.set_default_mode(rpy.BASIC_CONVERSION)
207 print linear_model.as_py()['coefficients']
208 summary = rpy.r.summary(linear_model)
209 print summary