This is an archival dump of old wiki content --- see scipy.org for current material.
Please see http://scipy-cookbook.readthedocs.org/

Attachment 'ols.0.2.py'

Download

   1 from __future__ import division
   2 from scipy import c_, ones, dot, stats, diff
   3 from scipy.linalg import inv, solve, det
   4 from numpy import log, pi, sqrt, square, diagonal
   5 from numpy.random import randn, seed
   6 import time
   7 
   8 class ols:
   9     """
  10     Author: Vincent Nijs (+ ?)
  11 
  12     Email: v-nijs at kellogg.northwestern.edu
  13 
  14     Last Modified: Mon Jan 15 17:56:17 CST 2007
  15     
  16     Dependencies: See import statement at the top of this file
  17 
  18     Doc: Class for multi-variate regression using OLS
  19 
  20     For usage examples of other class methods see the class tests at the bottom of this file. To see the class in action
  21     simply run this file using 'python ols.py'. This will generate some simulated data and run various analyses. If you have rpy installed
  22     the same model will also be estimated by R for confirmation.
  23 
  24     Input:
  25         y = dependent variable
  26         y_varnm = string with the variable label for y
  27         x = independent variables, note that a constant is added by default
  28         x_varnm = string or list of variable labels for the independent variables
  29     
  30     Output:
  31         There are no values returned by the class. Summary provides printed output.
  32         All other measures can be accessed as follows:
  33 
  34         Step 1: Create an OLS instance by passing data to the class
  35 
  36             m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
  37 
  38         Step 2: Get specific metrics
  39 
  40             To print the coefficients: 
  41                 >>> print m.b
  42             To print the coefficients p-values: 
  43                 >>> print m.p
  44     
  45     """
  46 
  47     def __init__(self,y,x,y_varnm = 'y',x_varnm = ''):
  48         """
  49         Initializing the ols class. 
  50         """
  51         self.y = y
  52         self.x = c_[ones(x.shape[0]),x]
  53         self.y_varnm = y_varnm
  54         if not isinstance(x_varnm,list): 
  55             self.x_varnm = ['const'] + list(x_varnm)
  56         else:
  57             self.x_varnm = ['const'] + x_varnm
  58 
  59         # Estimate model using OLS
  60         self.estimate()
  61 
  62     def estimate(self):
  63 
  64         # estimating coefficients, and basic stats
  65         self.inv_xx = inv(dot(self.x.T,self.x))
  66         xy = dot(self.x.T,self.y)
  67         self.b = dot(self.inv_xx,xy)                    # estimate coefficients
  68 
  69         self.nobs = self.y.shape[0]                     # number of observations
  70         self.ncoef = self.x.shape[1]                    # number of coef.
  71         self.df_e = self.nobs - self.ncoef              # degrees of freedom, error 
  72         self.df_r = self.ncoef - 1                      # degrees of freedom, regression 
  73 
  74         self.e = self.y - dot(self.x,self.b)            # residuals
  75         self.sse = dot(self.e,self.e)/self.df_e         # SSE
  76         self.se = sqrt(diagonal(self.sse*self.inv_xx))  # coef. standard errors
  77         self.t = self.b / self.se                       # coef. t-statistics
  78         self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2    # coef. p-values
  79 
  80         self.R2 = 1 - self.e.var()/self.y.var()         # model R-squared
  81         self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef))   # adjusted R-square
  82 
  83         self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e)  # model F-statistic
  84         self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e)  # F-statistic p-value
  85 
  86     def dw(self):
  87         """
  88         Calculates the Durbin-Waston statistic
  89         """
  90         de = diff(self.e,1)
  91         dw = dot(de,de) / dot(self.e,self.e);
  92 
  93         return dw
  94 
  95     def omni(self):
  96         """
  97         Omnibus test for normality
  98         """
  99         return stats.normaltest(self.e) 
 100     
 101     def JB(self):
 102         """
 103         Calculate residual skewness, kurtosis, and do the JB test for normality
 104         """
 105 
 106         # Calculate residual skewness and kurtosis
 107         skew = stats.skew(self.e) 
 108         kurtosis = 3 + stats.kurtosis(self.e) 
 109         
 110         # Calculate the Jarque-Bera test for normality
 111         JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
 112         JBpv = 1-stats.chi2.cdf(JB,2);
 113 
 114         return JB, JBpv, skew, kurtosis
 115 
 116     def ll(self):
 117         """
 118         Calculate model log-likelihood and two information criteria
 119         """
 120         
 121         # Model log-likelihood, AIC, and BIC criterion values 
 122         ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
 123         aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
 124         bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
 125 
 126         return ll, aic, bic
 127     
 128     def summary(self):
 129         """
 130         Printing model output to screen
 131         """
 132 
 133         # local time & date
 134         t = time.localtime()
 135 
 136         # extra stats
 137         ll, aic, bic = self.ll()
 138         JB, JBpv, skew, kurtosis = self.JB()
 139         omni, omnipv = self.omni()
 140 
 141         # printing output to screen
 142         print '\n=============================================================================='
 143         print "Dependent Variable: " + self.y_varnm
 144         print "Method: Least Squares"
 145         print "Date: ", time.strftime("%a, %d %b %Y",t)
 146         print "Time: ", time.strftime("%H:%M:%S",t)
 147         print '# obs:               %5.0f' % self.nobs
 148         print '# variables:     %5.0f' % self.ncoef 
 149         print '=============================================================================='
 150         print 'variable     coefficient     std. Error      t-statistic     prob.'
 151         print '=============================================================================='
 152         for i in range(len(self.x_varnm)):
 153             print '''% -5s          % -5.6f     % -5.6f     % -5.6f     % -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]]) 
 154         print '=============================================================================='
 155         print 'Models stats                         Residual stats'
 156         print '=============================================================================='
 157         print 'R-squared            % -5.6f         Durbin-Watson stat  % -5.6f' % tuple([self.R2, self.dw()])
 158         print 'Adjusted R-squared   % -5.6f         Omnibus stat        % -5.6f' % tuple([self.R2adj, omni])
 159         print 'F-statistic          % -5.6f         Prob(Omnibus stat)  % -5.6f' % tuple([self.F, omnipv])
 160         print 'Prob (F-statistic)   % -5.6f			JB stat             % -5.6f' % tuple([self.Fpv, JB])
 161         print 'Log likelihood       % -5.6f			Prob(JB)            % -5.6f' % tuple([ll, JBpv])
 162         print 'AIC criterion        % -5.6f         Skew                % -5.6f' % tuple([aic, skew])
 163         print 'BIC criterion        % -5.6f         Kurtosis            % -5.6f' % tuple([bic, kurtosis])
 164         print '=============================================================================='
 165 
 166 if __name__ == '__main__':
 167 
 168 	##########################
 169 	### testing the ols class
 170 	##########################
 171 
 172 	# creating simulated data and variable labels
 173 	seed(1)
 174 	data =  randn(100,5)            # the data array
 175 
 176 	# intercept is added, by default
 177 	m = ols(data[:,0],data[:,1:],y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
 178 	m.summary()
 179 
 180 	# if you have rpy installed, use it to test the results
 181 	have_rpy =  False
 182 	try:
 183 	    print "\n"
 184 	    print "="*30
 185 	    print "Validating OLS results in R"
 186 	    print "="*30
 187 
 188 	    import rpy
 189 	    have_rpy = True
 190 	except ImportError:
 191 	    print "\n"
 192 	    print "="*30
 193 	    print "Validating OLS-class results in R"
 194 	    print "="*30
 195 	    print "rpy is not installed"
 196 	    print "="*30
 197 
 198 	if have_rpy:
 199 	    y = data[:,0]
 200 	    x1 = data[:,1]
 201 	    x2 = data[:,2]
 202 	    x3 = data[:,3]
 203 	    x4 = data[:,4]
 204 	    rpy.set_default_mode(rpy.NO_CONVERSION)
 205 	    linear_model = rpy.r.lm(rpy.r("y ~ x1 + x2 + x3 + x4"), data = rpy.r.data_frame(x1=x1,x2=x2,x3=x3,x4=x4,y=y))
 206 	    rpy.set_default_mode(rpy.BASIC_CONVERSION)
 207 	    print linear_model.as_py()['coefficients']
 208 	    summary = rpy.r.summary(linear_model)
 209 	    print summary

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.