-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfromClass-regression.py
More file actions
90 lines (57 loc) · 1.98 KB
/
fromClass-regression.py
File metadata and controls
90 lines (57 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''
Author
Date
'''
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Function
def standRegres(xArr,yArr):
xMat = np.mat(xArr); yMat = np.mat(yArr)
xTx = xMat.T*xMat
if np.linalg.det(xTx) == 0.0:
print("This matrix is singular, cannot do inverse")
return
ws = xTx.I * (xMat.T*yMat)
return ws
# Function
def rssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays
return ((yArr-yHatArr)**2).sum()
# Function
def tssError(yArr,yHatArr): #yArr and yHatArr both need to be arrays
return ((yArr-yHatArr.mean())**2).sum()
# Function
def Rsquare(yArr,yHatArr): #yArr and yHatArr both need to be arrays
rss = rssError(yArr,yHatArr)
tss = tssError(yArr,yHatArr)
return 1 - rss / tss
penguins = pd.read_csv("https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv",delimiter=",")
# Choose two variables from the dataset
lmPenguins = penguins[['...','...']]
lmPenguins = lmPenguins.dropna(axis=0)
lmPenguins['X0'] = 1
# Shuffle dataframe using sample function
lmPenguins = lmPenguins.sample(frac=1)
ratio = 0.75
total_rows = lmPenguins.shape[0]
train_size = int(total_rows*ratio)
train = lmPenguins[0:train_size]
test = lmPenguins[train_size:]
X_train = train[['X0', '...']].to_numpy()
y_train = train[['...']].to_numpy()
X_test = test[['X0', '...']].to_numpy()
y_test = test[['...']].to_numpy()
beta = standRegres(X_train, y_train)
y_hat = X_train * beta
Rsquare(y_train[:,0], y_hat[:,0].flatten().A1)
y_test_hat = X_test * beta
Rsquare(y_test[:,0], y_test_hat[:,0].flatten().A1)
# Code to display a graph of the training data and the computed regression line
# You do not need to do anything with the following code.
# I provide it so you can see the line you computed
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(X_train[:, 1], y_train[:, 0], s=2, c='red')
ax.plot(X_train[:, 1], y_hat[:,0])
plt.title("'Predictions' Using Training Data (i.e. Error In)")
plt.show()