This is an archival dump of old wiki content --- see scipy.org for current material.
Please see http://scipy-cookbook.readthedocs.org/

Attachment 'ols.0.1.py'

Download

   1 from __future__ import division
   2 from scipy import c_, ones, dot, stats, diff
   3 from scipy.linalg import inv, solve, det
   4 from numpy import log, pi, sqrt, square, diagonal
   5 from numpy.random import randn, seed
   6 import time
   7 
   8 class ols:
   9 	"""
  10 	Author: Vincent Nijs (+ ?)
  11 
  12 	Email: v-nijs at kellogg.northwestern.edu
  13 
  14 	Last Modified: Mon Jan 15 17:56:17 CST 2007
  15 	
  16 	Dependencies: See import statement at the top of this file
  17 
  18 	Doc: Class for multi-variate regression using OLS
  19 
  20 	For usage examples of other class methods see the class tests at the bottom of this file. To see the class in action
  21 	simply run this file using 'python ols.py'. This will generate some simulated data and run various analyses. If you have rpy installed
  22 	the same model will also be estimated by R for confirmation.
  23 
  24 	Input:
  25 		y = dependent variable
  26 		y_varnm = string with the variable label for y
  27 		x = independent variables, note that a constant is added by default
  28 		x_varnm = string or list of variable labels for the independent variables
  29 	
  30 	Output:
  31 		There are no values returned by the class. Summary provides printed output.
  32 		All other measures can be accessed as follows:
  33 
  34 		Step 1: Create an OLS instance by passing data to the class
  35 
  36 			m = ols(y,x,y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
  37 
  38 		Step 2: Get specific metrics
  39 
  40 			To print the coefficients: 
  41 				>>> print m.b
  42 			To print the coefficients p-values: 
  43 				>>> print m.p
  44 	
  45 	"""
  46 
  47 	def __init__(self,y,x,y_varnm = 'y',x_varnm = ''):
  48 		"""
  49 		Initializing the ols class. 
  50 		"""
  51 		self.y = y
  52 		self.x = c_[ones(x.shape[0]),x]
  53 		self.y_varnm = y_varnm
  54 		if not isinstance(x_varnm,list): 
  55 			self.x_varnm = ['const'] + list(x_varnm)
  56 		else:
  57 			self.x_varnm = ['const'] + x_varnm
  58 
  59 		# Estimate model using OLS
  60 		self.estimate()
  61 
  62 	def estimate(self):
  63 
  64 		# estimating coefficients, and basic stats
  65 		self.inv_xx = inv(dot(self.x.T,self.x))
  66 		xy = dot(self.x.T,self.y)
  67 		self.b = dot(self.inv_xx,xy)					# estimate coefficients
  68 
  69 		self.nobs = self.y.shape[0]						# number of observations
  70 		self.ncoef = self.x.shape[1]					# number of coef.
  71 		self.df_e = self.nobs - self.ncoef				# degrees of freedom, error 
  72 		self.df_r = self.ncoef - 1						# degrees of freedom, regression 
  73 
  74 		self.e = self.y - dot(self.x,self.b)			# residuals
  75 		self.sse = dot(self.e,self.e)/self.df_e			# SSE
  76 		self.se = sqrt(diagonal(self.sse*self.inv_xx))	# coef. standard errors
  77 		self.t = self.b / self.se						# coef. t-statistics
  78 		self.p = (1-stats.t.cdf(abs(self.t), self.df_e)) * 2	# coef. p-values
  79 
  80 		self.R2 = 1 - self.e.var()/self.y.var()			# model R-squared
  81 		self.R2adj = 1-(1-self.R2)*((self.nobs-1)/(self.nobs-self.ncoef))	# adjusted R-square
  82 
  83 		self.F = (self.R2/self.df_r) / ((1-self.R2)/self.df_e)	# model F-statistic
  84 		self.Fpv = 1-stats.f.cdf(self.F, self.df_r, self.df_e)	# F-statistic p-value
  85 
  86 	def dw(self):
  87 		"""
  88 		Calculates the Durbin-Waston statistic
  89 		"""
  90 		de = diff(self.e,1)
  91 		dw = dot(de,de) / dot(self.e,self.e);
  92 
  93 		return dw
  94 
  95 	def omni(self):
  96 		"""
  97 		Omnibus test for normality
  98 		"""
  99 		return stats.normaltest(self.e) 
 100 	
 101 	def JB(self):
 102 		"""
 103 		Calculate residual skewness, kurtosis, and do the JB test for normality
 104 		"""
 105 
 106 		# Calculate residual skewness and kurtosis
 107 		skew = stats.skew(self.e) 
 108 		kurtosis = 3 + stats.kurtosis(self.e) 
 109 		
 110 		# Calculate the Jarque-Bera test for normality
 111 		JB = (self.nobs/6) * (square(skew) + (1/4)*square(kurtosis-3))
 112 		JBpv = 1-stats.chi2.cdf(JB,2);
 113 
 114 		return JB, JBpv, kurtosis, skew
 115 
 116 	def ll(self):
 117 		"""
 118 		Calculate model log-likelihood and two information criteria
 119 		"""
 120 		
 121 		# Model log-likelihood, AIC, and BIC criterion values 
 122 		ll = -(self.nobs*1/2)*(1+log(2*pi)) - (self.nobs/2)*log(dot(self.e,self.e)/self.nobs)
 123 		aic = -2*ll/self.nobs + (2*self.ncoef/self.nobs)
 124 		bic = -2*ll/self.nobs + (self.ncoef*log(self.nobs))/self.nobs
 125 
 126 		return ll, aic, bic
 127 	
 128 	def summary(self):
 129 		"""
 130 		Printing model output to screen
 131 		"""
 132 
 133 		# local time & date
 134 		t = time.localtime()
 135 
 136 		# extra stats
 137 		ll, aic, bic = self.ll()
 138 		JB, JBpv, skew, kurtosis = self.JB()
 139 		omni, omnipv = self.omni()
 140 
 141 		# printing output to screen
 142 		print '\n=============================================================================='
 143 		print "Dependent Variable: " + self.y_varnm
 144 		print "Method: Least Squares"
 145 		print "Date: ", time.strftime("%a, %d %b %Y",t)
 146 		print "Time: ", time.strftime("%H:%M:%S",t)
 147 		print '# obs:				%5.0f' % self.nobs
 148 		print '# variables:		%5.0f' % self.ncoef 
 149 		print '=============================================================================='
 150 		print 'variable		coefficient		std. Error		t-statistic		prob.'
 151 		print '=============================================================================='
 152 		for i in range(len(self.x_varnm)):
 153 			print '''% -5s			% -5.6f		% -5.6f		% -5.6f		% -5.6f''' % tuple([self.x_varnm[i],self.b[i],self.se[i],self.t[i],self.p[i]]) 
 154 		print '=============================================================================='
 155 		print 'Models stats							Residual stats'
 156 		print '=============================================================================='
 157 		print 'R-squared			% -5.6f			Durbin-Watson stat	% -5.6f' % tuple([self.R2, self.dw()])
 158 		print 'Adjusted R-squared	% -5.6f			Omnibus stat		% -5.6f' % tuple([self.R2adj, omni])
 159 		print 'F-statistic			% -5.6f			Prob(Omnibus stat)	% -5.6f' % tuple([self.F, omnipv])
 160 		print 'Prob (F-statistic)	% -5.6f			JB stat				% -5.6f' % tuple([self.Fpv, JB])
 161 		print 'Log likelihood		% -5.6f			Prob(JB)			% -5.6f' % tuple([ll, JBpv])
 162 		print 'AIC criterion		% -5.6f			Skew				% -5.6f' % tuple([aic, skew])
 163 		print 'BIC criterion		% -5.6f			Kurtosis			% -5.6f' % tuple([bic, kurtosis])
 164 		print '=============================================================================='
 165 
 166 if __name__ == '__main__':
 167 
 168 	##########################
 169 	### testing the ols class
 170 	##########################
 171 
 172 	# creating simulated data and variable labels
 173 	seed(1)
 174 	data =	randn(100,5)			# the data array
 175 
 176 	# intercept is added, by default
 177 	m = ols(data[:,0],data[:,1:],y_varnm = 'y',x_varnm = ['x1','x2','x3','x4'])
 178 	m.summary()
 179 
 180 	# if you have rpy installed, use it to test the results
 181 	have_rpy =	False
 182 	try:
 183 		print "\n"
 184 		print "="*30
 185 		print "Validating OLS results in R"
 186 		print "="*30
 187 
 188 		import rpy
 189 		have_rpy = True
 190 	except ImportError:
 191 		print "\n"
 192 		print "="*30
 193 		print "Validating OLS-class results in R"
 194 		print "="*30
 195 		print "rpy is not installed"
 196 		print "="*30
 197 
 198 	if have_rpy:
 199 		y = data[:,0]
 200 		x1 = data[:,1]
 201 		x2 = data[:,2]
 202 		x3 = data[:,3]
 203 		x4 = data[:,4]
 204 		rpy.set_default_mode(rpy.NO_CONVERSION)
 205 		linear_model = rpy.r.lm(rpy.r("y ~ x1 + x2 + x3 + x4"), data = rpy.r.data_frame(x1=x1,x2=x2,x3=x3,x4=x4,y=y))
 206 		rpy.set_default_mode(rpy.BASIC_CONVERSION)
 207 		print linear_model.as_py()['coefficients']
 208 		summary = rpy.r.summary(linear_model)
 209 		print summary

New Attachment

File to upload
Rename to
Overwrite existing attachment of same name

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.