Orhan Yavuz

Data Scientist & Software Engineer

NeuralNetwork

import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
encoder=OneHotEncoder()

Split Into Testing And Training

The first step is to split the data into testing and training sets.

We will work with the famous Iris dataset, loaded from sklearn.

iris = datasets.load_iris()
data=np.array(iris['data'])
target=np.array(iris['target'])

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=10)
y_train=encoder.fit_transform(y_train.reshape(-1,1)).toarray()
y_test =(encoder.fit_transform(y_test.reshape(-1,1)).toarray())

Scale the Predictors

Due to the nature of Neural Networks, we must scale the predictors into values between 0 and 1.
We can do this by applying a lambda function to each row for both sets.

scale_col = lambda c : c/np.max(c)

X_train=np.apply_along_axis(scale_col, 0, X_train)
X_test =np.apply_along_axis(scale_col, 0, X_test)

Activation Function

Our activation function is sigmoid.

def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))

Forward Propagation

First Hidden Layer

# Weight Matrix 1
W1=np.random.rand(4,3)
B1=np.random.rand(3,1)
J=np.ones(len(X_train)).reshape(100,1)
Z2=(X_train@W1)+(J@B1.T)
#Activation Function
A2=sigmoid(Z2)

Second Hidden Layer

W2=np.random.rand(3,3)
B2=np.random.rand(3,1)
Z3=(A2@W2)+(J@B2.T)
y_hat=sigmoid(Z2)

Back Propagation

def cost(y,yh):
    return 0.5*np.sum((y - yh)**2)

def sigmoidprime(x):
    return sigmoid(x)*(1-sigmoid(x))

delta3=(-(y_train-y_hat))*sigmoidprime(Z3)
delta2=(delta3@W2.T)*sigmoidprime(Z2)
djdw2=A2.T@delta3
djdb2=np.apply_along_axis(np.sum,0,delta3)
djdw1=X_train.T@delta2
djdb1=np.apply_along_axis(np.sum,0,delta2)

Training

#Number of Observations
n=len(X_train)
#Number of Iterations
N=10000
#Constant Gamma
gamma=0.15

#Generate Initial Random Weights
W1=np.random.rand(4,3)
B1=np.random.rand(3,1)
W2=np.random.rand(3,3)
B2=np.random.rand(3,1)

#Start Training!
costs=[]
for i in range(0,N):
    #Calculate the Predicted y values with the current weights
    Z2=(X_train@W1)+(J@(B1.T))
    A2=sigmoid(Z2)
    Z3=(A2@W2)+(J@B2.T)
    y_hat=sigmoid(Z3)
    costs.append(cost(y_train,y_hat))
    #Calculate the Gradient with the current Weights.
    delta3=(-(y_train-y_hat))*sigmoidprime(Z3)
    delta2=(delta3@W2.T)*sigmoidprime(Z2)
    djdw2=A2.T@delta3
    djdb2=np.apply_along_axis(np.sum,0,delta3).reshape(3,1)
    djdw1=X_train.T@delta2
    djdb1=np.apply_along_axis(np.sum,0,delta2).reshape(3,1)
    #Take a step in the right direction of the weights' gradient.
    W1=W1-djdw1*gamma
    W2=W2-djdw2*gamma
    B1=B1-djdb1*gamma
    B2=B2-djdb2*gamma

Testing our Model

J=np.ones(len(X_test)).reshape(-1,1)
Z2=(X_test@W1)+(J@(B1.T))
A2=sigmoid(Z2)
Z3=(A2@W2)+(J@B2.T)
y_hat=sigmoid(Z3)
y_hat=np.array((np.round(y_hat,decimals=2)>0.5),dtype=np.int)
y_hat[:5]

array([[0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0]])

Confusion Matrix

confusion_matrix(y_test.argmax(axis=1), y_hat.argmax(axis=1))

array([[15,  0,  0],
       [ 0, 19,  0],
       [ 0,  2, 14]], dtype=int64)