Compare commits

...

10 Commits

Author SHA1 Message Date
fc7f02d62c Finished 2024-08-26 18:19:33 +05:30
a5de0ca7da Implement softmax layer 2024-05-09 13:21:34 +05:30
9b4f3073f3 Implement SDG optimizer 2024-05-09 13:01:12 +05:30
98b3b4e18a Changed parameter type from tuple to list in layer.py 2024-05-09 13:00:54 +05:30
3fe69b3869 Clean names in loss.py 2024-05-09 12:31:03 +05:30
b6cd28db7c Implement CrossEntropyLoss 2024-05-09 12:29:35 +05:30
db5761cf4e Implement loss 2024-05-09 12:15:20 +05:30
40601130e0 Implement Sequential Network 2024-05-09 11:55:18 +05:30
06c981253c Implement ReLU layer 2024-05-08 18:49:39 +05:30
2188edacab Impliment Dense layer class 2024-05-08 16:39:36 +05:30
8 changed files with 60236 additions and 38 deletions

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ *__pycache__/
*.jukit/
*.py[cod] *.py[cod]
*$py.class *$py.class

View File

@ -1,3 +1,3 @@
# neural-networks-from-scratch # neural-networks-from-scratch
Implementing neural networks with only numpy Implementing neural networks with only numpy (Well, cupy which is numpy for GPUs)

60000
mnist_train.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,36 +0,0 @@
import numpy as np
from abc import ABC, abstractmethod
class Layer(ABC):
@abstractmethod
def forward(self, x: np.ndarray) -> np.ndarray:
"""
x = inputs
if should_cache = True,
additional caching will be done.
Set this to true and then call forward right before calling backward
"""
@property
@abstractmethod
def parameters(self) -> tuple[np.ndarray, ...]:
"""
Returns the different parameters.
The order is defined as per the sub class's convinience
"""
@parameters.setter
@abstractmethod
def parameters(self, parameters: tuple[np.ndarray, ...]) -> None:
"""
Write to parameters property
"""
@abstractmethod
def d_output_wrt_parameters(self, inputs: np.ndarray) -> tuple[np.ndarray, ...]:
pass
@abstractmethod
def d_output_wrt_inputs(self) -> np.ndarray:
pass

116
src/layers.py Normal file
View File

@ -0,0 +1,116 @@
import cupy as cp
from abc import ABC, abstractmethod
class Layer(ABC):
@abstractmethod
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
pass
@property
@abstractmethod
def parameters(self) -> list[cp.ndarray] | None:
pass
@abstractmethod
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
@abstractmethod
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
pass
@abstractmethod
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = .001) -> cp.ndarray:
pass
def __call__(self, inputs: cp.ndarray) -> cp.ndarray:
return self.forward(inputs)
class Dense(Layer):
__weights: cp.ndarray
__biases: cp.ndarray
__cached_inputs: cp.ndarray
__cached_outputs: cp.ndarray
def __init__(self, input_size: cp.ndarray, output_size: cp.ndarray) -> None:
self.__weights = (cp.random.randn(input_size, output_size)) * cp.sqrt(1 / (output_size + input_size))
self.__biases = cp.zeros(output_size)
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_inputs = inputs
self.__cached_outputs = cp.dot(inputs, self.__weights) + self.__biases
return self.__cached_outputs
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return cp.dot(d_loss_wrt_outputs, self.__weights.T)
@property
def parameters(self) -> list[cp.ndarray]:
return [self.__weights, self.__biases]
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
d_loss_wrt_weights = cp.dot(self.__cached_inputs.T, d_loss_wrt_outputs)
d_loss_wrt_biases = cp.sum(d_loss_wrt_outputs)
return[d_loss_wrt_weights, d_loss_wrt_biases]
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.001) -> cp.ndarray:
d_loss_wrt_outputs /= d_loss_wrt_outputs.shape[1]
d_loss_wrt_inputs = self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
d_loss_wrt_weights, d_loss_wrt_biases = self.get_d_loss_wrt_parameters(d_loss_wrt_outputs)
self.__biases -= lr * d_loss_wrt_biases
self.__weights -= lr * d_loss_wrt_weights
return d_loss_wrt_inputs
class ReLU(Layer):
__cached_inputs: cp.ndarray
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_inputs = inputs
return cp.maximum(0, inputs)
@property
def parameters(self) -> list[cp.ndarray] | None:
pass
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return (self.__cached_inputs > 0) * d_loss_wrt_outputs
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
class Softmax(Layer):
__cached_outputs: cp.ndarray
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
self.__cached_outputs = cp.exp(inputs) / (cp.sum(cp.exp(inputs), axis=1).reshape(inputs.shape[0], 1) + 0.001)
return self.__cached_outputs
@property
def parameters(self) -> list[cp.ndarray] | None:
pass
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
pass
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
return d_loss_wrt_outputs
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
#time.sleep(1)
#out = self.__cached_outputs
#jacobian = out.reshape(out.shape[0], 1, out.shape[1]).repeat(2, axis=1)
#eye = cp.eye(out.shape[1]).reshape((1, out.shape[1], out.shape[1])).repeat(jacobian.shape[0], axis=0)
#jacobian *= eye
#jacobian -= cp.dot(out.T, out)
#d_loss_wrt_outputs =\
# cp.matmul(jacobian, d_loss_wrt_outputs.reshape(out.shape[0], out.shape[1], 1)).reshape((out.shape[0], out.shape[1]))
#return d_loss_wrt_outputs
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)

13
src/loss.py Normal file
View File

@ -0,0 +1,13 @@
import cupy as cp
from abc import ABC, abstractmethod
class Loss(ABC):
@abstractmethod
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
pass
class CategoricalCrossEntropy(Loss):
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
return outputs - targets

1
src/optimizer.py Normal file
View File

@ -0,0 +1 @@
import cupy as cp

103
src/test.py Normal file
View File

@ -0,0 +1,103 @@
import time
from layers import Dense, ReLU, Softmax
from loss import CategoricalCrossEntropy
import cupy as cp
import pandas as pd
from tqdm import tqdm
#layers = [
# Dense(2, 128),
# ReLU(),
# Dense(128, 128),
# ReLU(),
# Dense(128, 2),
# Softmax()
#]
##layers = [Dense(2, 2)]
#
#
#def forward(x):
# for layer in layers:
# x = layer.forward(x)
# return x
#
#
#SEQUENCE_LENGTH = 10000
#x1 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
#x2 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
#x = cp.array([x1, x2]).T
#y = cp.logical_xor(x1, x2)
#yt = cp.array([y == 0, y == 1]).T * 1.0
#
#out = forward(x)
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
#
#loss_function = CategoricalCrossEntropy()
#
#for i in range(100):
# #print(f"Epoch{i + 1}")
# output = forward(x)
# d_loss = loss_function.d_loss_wrt_inputs(output, yt)
# for layer in layers[::-1]:
# d_loss = layer.backward(d_loss)
#
#out = forward(x)
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
training_data_frame = pd.read_csv("./mnist_train.csv")
training_data_frame.head()
training_data = cp.array(training_data_frame)
y_train = training_data[:, 0]
x_train: cp.ndarray = training_data[:, 1:]
x_train = x_train.astype(float)
x_train /= cp.argmax(x_train)
del training_data
del training_data_frame
y_train_one_hot = cp.zeros(
(y_train.size, y_train.max().item() + 1)
)
y_train_one_hot[cp.arange(y_train.size), y_train] = 1
layers = [
Dense(784, 128),
ReLU(),
Dense(128, 128),
ReLU(),
Dense(128, 10),
Softmax()
]
def forward(x):
for layer in layers:
x = layer.forward(x)
return x
out = forward(x_train)
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))
loss_function = CategoricalCrossEntropy()
for i in range(99999):
output = forward(x_train)
time.sleep(1)
print(f"Epoch {i}:")
print(cp.sum(cp.argmax(output, axis=1) == y_train) * 100 / len(y_train))
d_loss = loss_function.d_loss_wrt_inputs(output, y_train_one_hot)
for layer in layers[::-1]:
d_loss = layer.backward(d_loss, lr=0.0001)
out = forward(x_train)
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))