Compare commits

..

No commits in common. "fc7f02d62ce60b6a17116e682ae4c5f928276b06" and "c09dc9f7ec2fa3876fc97621b6b956af5f080507" have entirely different histories.

8 changed files with 38 additions and 60236 deletions

3
.gitignore vendored
View File

@ -1,7 +1,6 @@
# ---> Python
# Byte-compiled / optimized / DLL files
*__pycache__/
*.jukit/
__pycache__/
*.py[cod]
*$py.class

View File

@ -1,3 +1,3 @@
# neural-networks-from-scratch
Implementing neural networks with only numpy (Well, cupy which is numpy for GPUs)
Implementing neural networks with only numpy

File diff suppressed because it is too large Load Diff

36
src/layer.py Normal file
View File

@ -0,0 +1,36 @@
import numpy as np
from abc import ABC, abstractmethod
class Layer(ABC):
@abstractmethod
def forward(self, x: np.ndarray) -> np.ndarray:
"""
x = inputs
if should_cache = True,
additional caching will be done.
Set this to true and then call forward right before calling backward
"""
@property
@abstractmethod
def parameters(self) -> tuple[np.ndarray, ...]:
"""
Returns the different parameters.
The order is defined as per the sub class's convinience
"""
@parameters.setter
@abstractmethod
def parameters(self, parameters: tuple[np.ndarray, ...]) -> None:
"""
Write to parameters property
"""
@abstractmethod
def d_output_wrt_parameters(self, inputs: np.ndarray) -> tuple[np.ndarray, ...]:
pass
@abstractmethod
def d_output_wrt_inputs(self) -> np.ndarray:
pass

View File

@ -1,116 +0,0 @@
import cupy as cp
from abc import ABC, abstractmethod
class Layer(ABC):
@abstractmethod
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
pass
@property
@abstractmethod
def parameters(self) -> list[cp.ndarray] | None:
pass
@abstractmethod
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
@abstractmethod
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
pass
@abstractmethod
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = .001) -> cp.ndarray:
pass
def __call__(self, inputs: cp.ndarray) -> cp.ndarray:
return self.forward(inputs)
class Dense(Layer):
__weights: cp.ndarray
__biases: cp.ndarray
__cached_inputs: cp.ndarray
__cached_outputs: cp.ndarray
def __init__(self, input_size: cp.ndarray, output_size: cp.ndarray) -> None:
self.__weights = (cp.random.randn(input_size, output_size)) * cp.sqrt(1 / (output_size + input_size))
self.__biases = cp.zeros(output_size)
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_inputs = inputs
self.__cached_outputs = cp.dot(inputs, self.__weights) + self.__biases
return self.__cached_outputs
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return cp.dot(d_loss_wrt_outputs, self.__weights.T)
@property
def parameters(self) -> list[cp.ndarray]:
return [self.__weights, self.__biases]
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
d_loss_wrt_weights = cp.dot(self.__cached_inputs.T, d_loss_wrt_outputs)
d_loss_wrt_biases = cp.sum(d_loss_wrt_outputs)
return[d_loss_wrt_weights, d_loss_wrt_biases]
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.001) -> cp.ndarray:
d_loss_wrt_outputs /= d_loss_wrt_outputs.shape[1]
d_loss_wrt_inputs = self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
d_loss_wrt_weights, d_loss_wrt_biases = self.get_d_loss_wrt_parameters(d_loss_wrt_outputs)
self.__biases -= lr * d_loss_wrt_biases
self.__weights -= lr * d_loss_wrt_weights
return d_loss_wrt_inputs
class ReLU(Layer):
__cached_inputs: cp.ndarray
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_inputs = inputs
return cp.maximum(0, inputs)
@property
def parameters(self) -> list[cp.ndarray] | None:
pass
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return (self.__cached_inputs > 0) * d_loss_wrt_outputs
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
class Softmax(Layer):
__cached_outputs: cp.ndarray
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_outputs = cp.exp(inputs) / (cp.sum(cp.exp(inputs), axis=1).reshape(inputs.shape[0], 1) + 0.001)
return self.__cached_outputs
@property
def parameters(self) -> list[cp.ndarray] | None:
pass
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return d_loss_wrt_outputs
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
#time.sleep(1)
#out = self.__cached_outputs
#jacobian = out.reshape(out.shape[0], 1, out.shape[1]).repeat(2, axis=1)
#eye = cp.eye(out.shape[1]).reshape((1, out.shape[1], out.shape[1])).repeat(jacobian.shape[0], axis=0)
#jacobian *= eye
#jacobian -= cp.dot(out.T, out)
#d_loss_wrt_outputs =\
# cp.matmul(jacobian, d_loss_wrt_outputs.reshape(out.shape[0], out.shape[1], 1)).reshape((out.shape[0], out.shape[1]))
#return d_loss_wrt_outputs
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)

View File

@ -1,13 +0,0 @@
import cupy as cp
from abc import ABC, abstractmethod
class Loss(ABC):
@abstractmethod
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
pass
class CategoricalCrossEntropy(Loss):
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
return outputs - targets

View File

@ -1 +0,0 @@
import cupy as cp

View File

@ -1,103 +0,0 @@
import time
from layers import Dense, ReLU, Softmax
from loss import CategoricalCrossEntropy
import cupy as cp
import pandas as pd
from tqdm import tqdm
#layers = [
# Dense(2, 128),
# ReLU(),
# Dense(128, 128),
# ReLU(),
# Dense(128, 2),
# Softmax()
#]
##layers = [Dense(2, 2)]
#
#
#def forward(x):
# for layer in layers:
# x = layer.forward(x)
# return x
#
#
#SEQUENCE_LENGTH = 10000
#x1 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
#x2 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
#x = cp.array([x1, x2]).T
#y = cp.logical_xor(x1, x2)
#yt = cp.array([y == 0, y == 1]).T * 1.0
#
#out = forward(x)
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
#
#loss_function = CategoricalCrossEntropy()
#
#for i in range(100):
# #print(f"Epoch{i + 1}")
# output = forward(x)
# d_loss = loss_function.d_loss_wrt_inputs(output, yt)
# for layer in layers[::-1]:
# d_loss = layer.backward(d_loss)
#
#out = forward(x)
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
training_data_frame = pd.read_csv("./mnist_train.csv")
training_data_frame.head()
training_data = cp.array(training_data_frame)
y_train = training_data[:, 0]
x_train: cp.ndarray = training_data[:, 1:]
x_train = x_train.astype(float)
x_train /= cp.argmax(x_train)
del training_data
del training_data_frame
y_train_one_hot = cp.zeros(
(y_train.size, y_train.max().item() + 1)
)
y_train_one_hot[cp.arange(y_train.size), y_train] = 1
layers = [
Dense(784, 128),
ReLU(),
Dense(128, 128),
ReLU(),
Dense(128, 10),
Softmax()
]
def forward(x):
for layer in layers:
x = layer.forward(x)
return x
out = forward(x_train)
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))
loss_function = CategoricalCrossEntropy()
for i in range(99999):
output = forward(x_train)
time.sleep(1)
print(f"Epoch {i}:")
print(cp.sum(cp.argmax(output, axis=1) == y_train) * 100 / len(y_train))
d_loss = loss_function.d_loss_wrt_inputs(output, y_train_one_hot)
for layer in layers[::-1]:
d_loss = layer.backward(d_loss, lr=0.0001)
out = forward(x_train)
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))