# Import necessary Library
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
import seaborn as sns
import random
import numpy as np
random.seed(42)
X = np.random.uniform(0,1,20)
print(X)
#All values are within the given interval:
np.all(X >= 0)
np.all(X < 1)
# Display the histogram of the samples, along with the probability density function:
import matplotlib.pyplot as plt
count, bins, ignored = plt.hist(X, 15, normed=True)
plt.plot(bins, np.ones_like(bins), linewidth=2, color='r')
plt.show()
random.seed(42)
mu, sigma = 0, 1 # mean and standard deviation
N = np.random.normal(mu, sigma, 20)
print(N)
#Display the histogram of the samples, along with the probability density function:
import matplotlib.pyplot as plt
count, bins, ignored = plt.hist(N, 30, density=True)
plt.plot(bins, 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) ),linewidth=2, color='r')
plt.show()
import matplotlib.pyplot as plt
y=np.sin(2*np.pi*X)+N
print(y)
print(X)
plt.scatter(X,y)
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
# Showing X and y values in data frame
import numpy as np
import pandas as pd
df = pd.DataFrame({'x':X, 'y':y})
print(df)
# Divide the data into test and training data set (10 pairs of data for test and training data set)
df.train=df[0:10]
print(df.train)
X_train = df.train.iloc[:,0].values
print(X_train)
y_train = df.train.iloc[:,1].values
print(y_train)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# transforming the data to include another axis
x = X_train[:, np.newaxis]
y = y_train[:, np.newaxis]
model = LinearRegression()
model.fit(x, y)
y_pred = model.predict(x)
plt.scatter(x, y, s=10)
plt.plot(x, y_pred, color='r')
plt.show()
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=0)
x_poly = polynomial_features.fit_transform(x)
model0 = LinearRegression()
model0.fit(x_poly, y)
y_poly_pred = model0.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print('weights:')
print(model0.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=0')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=1)
x_poly = polynomial_features.fit_transform(x)
model1 = LinearRegression()
model1.fit(x_poly, y)
y_poly_pred = model1.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model1.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=1')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=3)
x_poly = polynomial_features.fit_transform(x)
model3 = LinearRegression()
model3.fit(x_poly, y)
y_poly_pred = model3.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model3.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order, M=3')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=9)
x_poly = polynomial_features.fit_transform(x)
model9 = LinearRegression()
model9.fit(x_poly, y)
y_poly_pred = model9.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
print(model9.coef_)
plt.scatter(x, y, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x,y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)
plt.plot(x, y_poly_pred, color='m')
plt.title('When order,M =9')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
# Print the co-efficents for M=0,1,3,9
print(model0.coef_)
print(model1.coef_)
print(model3.coef_)
print(model9.coef_)
import pandas as pd
# intialise data of lists.
data = {'M=0':[0,"","","","","","","","","" ],'M=1':[0,-0.39012766,"","","","","","","",""],'M=3':[0,-0.72768993,2.11420232,-1.986349090,"","","","","",""],'M=9':[ 0,-32.42123625,630.43839794,-4652.54557797,
16038.92928046, -26616.73893602,16020.44924781,9291.87084083,-16637.65286273,5956.51875661]}
print(data)
# Creates pandas DataFrame.
df.w = pd.DataFrame(data, index =['w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7', 'w8', 'w9'])
# print the data
df.w
# Draw Train error First
for m in range(0,10):
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=(m))
x_poly = polynomial_features.fit_transform(x)
model0 = LinearRegression()
model0.fit(x_poly, y)
y_poly_pred = model0.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
# Now draw test error
df.test=df[10:19]
print(df.test)
Xt= df.test.iloc[:,1].values
print(Xt)
yt = df.test.iloc[:,1].values
print(yt)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
# transforming the data to include another axis
x = Xt[:, np.newaxis]
y = yt[:, np.newaxis]
# Draw Test error Now
for m in range(0,10):
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=(m))
x_poly = polynomial_features.fit_transform(x)
model0 = LinearRegression()
model0.fit(x_poly, y)
y_poly_pred = model0.predict(x_poly)
rmse = np.sqrt(mean_squared_error(y,y_poly_pred))
r2 = r2_score(y,y_poly_pred)
print("RMSE:",rmse)
import pandas as pd
# intialise data of lists.
data1 = {'M':[0,1,2,3,4,5,6,7,8,9],'Train.RMSE':[1.2717577098315476,1.2516610787428275,1.2236087750763132,0.9725937278786453,0.9714749747422678,0.8162029262456223,0.48995687937025406,0.2453508474627798,0.2096583868768381,2.8577523264996737e-07],'Test.RMSE':[1.2533118179082696,8.457231124323202e-16,5.091044805060287e-16,1.3001412665426294e-15,1.464484751053976e-15,2.2056710172663047e-15,2.3138030711294517e-14,3.673626791684377e-14,1.0823941433404824e-13,3.2837446224769394e-14]}
print(data)
# Creates pandas DataFrame.
df1 = pd.DataFrame(data1)
# print the data
df1
# Plot train Vs Test error
plt.plot( 'M', 'Train.RMSE', data=df1, marker='o', markerfacecolor='red', markersize=12, color='skyblue', linewidth=4)
plt.plot( 'M', 'Test.RMSE', data=df1, marker='o',markerfacecolor='blue',markersize=12, color='olive', linewidth=4)
plt.legend()
plt.ylabel('RMSE')
plt.xlabel('M')
plt.ylim((0,2))
plt.xlim((0,9))
plt.show()
random.seed(42)
X2 = np.random.uniform(0,1,120)
print(X2)
mu, sigma = 0, 1 # mean and standard deviation
N = np.random.normal(mu, sigma, 120)
print(N)
import matplotlib.pyplot as plt
y2=np.sin(2*np.pi*X2)+N
print(y2)
print(X2)
# Fit 9th order of model and draw fit
np.random.seed(0)
polynomial_features= PolynomialFeatures(degree=9)
x2_poly = polynomial_features.fit_transform(X2.reshape(120,1))
# transforming the data to include another axis
x2 = X2[:, np.newaxis]
y2= y2[:, np.newaxis]
model = LinearRegression()
model.fit(x2_poly, y2)
y2_poly_pred = model.predict(x2_poly)
rmse = np.sqrt(mean_squared_error(y2,y2_poly_pred))
r2 = r2_score(y2,y2_poly_pred)
print("RMSE:",rmse)
print("R2:",r2)
plt.scatter(x2, y2, s=10)
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(x2,y2_poly_pred), key=sort_axis)
x2, y2_poly_pred = zip(*sorted_zip)
plt.plot(x2, y2_poly_pred, color='m')
plt.title('When order,M =9')
plt.xlabel('X')
plt.ylabel('y=sin(2*pi*X)+N')
plt.show()
import math
import sklearn
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
import operator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
def regularizeRidge(alpha):
if alpha < 0: alpha = math.exp(alpha)
else:
print("alpha = ",alpha)
if alpha != 0: print("ln(alpha) = ", math.log(alpha))
polynomial_features= PolynomialFeatures(degree=9)
x_transformed = polynomial_features.fit_transform(X2.reshape(120,1))
poly_linear_model = Ridge(alpha = alpha)
poly_linear_model.fit(x_transformed, y2)
return poly_linear_model
def chartRidge(alpha):
model = regularizeRidge(alpha)
xx = np.linspace(0, 1, 120)
x_transformed =polynomial_features.fit_transform(xx.reshape(120,1))
yy = model.predict(x_transformed)
plt.plot(xx, yy,label=alpha)
plt.scatter(X_train, y_train)
plt.scatter(Xt, yt, c = 'r')
plt.legend()
# When lambda=0, the chart is below
chartRidge(0)
# When lambda=0.10, the chart is below
chartRidge(0.1)
# When lambda=0.01, the chart is below
chartRidge(0.01)
# When lambda=0.001, the chart is below
chartRidge(0.001)
# When lambda=0.0001, the chart is below
chartRidge(0.0001)
# When lambda=10, the chart is below
chartRidge(10)
# When lambda=100, the chart is below
chartRidge(100)
# When lambda=1000, the chart is below
chartRidge(1000)
# When lambda=10000, the chart is below
chartRidge(10000)
from sklearn.metrics import mean_squared_error
import tensorflow as tf
def getMse(Y, yy):
standard = tf.square(Y - yy)
mse = tf.reduce_mean(standard)
return mse.numpy()
train_error_ridge = np.zeros(30)
test_error_ridge = np.zeros(30)
def getErrorRidge(i:int, model) : # A new error function
xx_transformed_test = polynomial_features.fit_transform(Xt.reshape(Xt.shape[0], 1))
xx_transformed_train = polynomial_features.fit_transform(X_train.reshape(X_train.shape[0], 1))
yy_test = model.predict(xx_transformed_test)
yy_train = model.predict(xx_transformed_train)
test_error_ridge[i] = getMse(yt, yy_test)
train_error_ridge[i] = getMse(y_train, yy_train)
xx = list(range(-30, 0))
for i in xx:
model = regularizeRidge(i)
getErrorRidge(i, model)
xx = list(range(-30, 0))
plt.plot(xx, test_error_ridge, label = "$test-error$", c = 'y')
plt.plot(xx, train_error_ridge, label = "$train-error$", c = 'r')
plt.xlabel('ln(lamdba)')
plt.ylabel('Error')
plt.legend()
# Now lets see the best lambda
best_lambda = 0
for i in range(-30,0):
if test_error_ridge[i+30] == test_error_ridge.min(): best_lambda = i
print("best ln(lambda)based on my analysis = ", best_lambda)
best_lambda_0 = math.exp(best_lambda)
print("best lambda = ", best_lambda_0)
print("In Conclusion of this analysis, the model with input ln(lamdba) = ",best_lambda,", lambda = ",best_lambda_0," will provide best test performance.")