Compare commits
10 Commits
c09dc9f7ec
...
fc7f02d62c
Author | SHA1 | Date | |
---|---|---|---|
fc7f02d62c | |||
a5de0ca7da | |||
9b4f3073f3 | |||
98b3b4e18a | |||
3fe69b3869 | |||
b6cd28db7c | |||
db5761cf4e | |||
40601130e0 | |||
06c981253c | |||
2188edacab |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,6 +1,7 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*__pycache__/
|
||||
*.jukit/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
|
@ -1,3 +1,3 @@
|
||||
# neural-networks-from-scratch
|
||||
|
||||
Implementing neural networks with only numpy
|
||||
Implementing neural networks with only numpy (Well, cupy which is numpy for GPUs)
|
||||
|
60000
mnist_train.csv
Normal file
60000
mnist_train.csv
Normal file
File diff suppressed because it is too large
Load Diff
36
src/layer.py
36
src/layer.py
@ -1,36 +0,0 @@
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Layer(ABC):
|
||||
@abstractmethod
|
||||
def forward(self, x: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
x = inputs
|
||||
if should_cache = True,
|
||||
additional caching will be done.
|
||||
Set this to true and then call forward right before calling backward
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def parameters(self) -> tuple[np.ndarray, ...]:
|
||||
"""
|
||||
Returns the different parameters.
|
||||
The order is defined as per the sub class's convinience
|
||||
"""
|
||||
|
||||
@parameters.setter
|
||||
@abstractmethod
|
||||
def parameters(self, parameters: tuple[np.ndarray, ...]) -> None:
|
||||
"""
|
||||
Write to parameters property
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def d_output_wrt_parameters(self, inputs: np.ndarray) -> tuple[np.ndarray, ...]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def d_output_wrt_inputs(self) -> np.ndarray:
|
||||
pass
|
116
src/layers.py
Normal file
116
src/layers.py
Normal file
@ -0,0 +1,116 @@
|
||||
import cupy as cp
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Layer(ABC):
|
||||
@abstractmethod
|
||||
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def parameters(self) -> list[cp.ndarray] | None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = .001) -> cp.ndarray:
|
||||
pass
|
||||
|
||||
def __call__(self, inputs: cp.ndarray) -> cp.ndarray:
|
||||
return self.forward(inputs)
|
||||
|
||||
|
||||
class Dense(Layer):
|
||||
__weights: cp.ndarray
|
||||
__biases: cp.ndarray
|
||||
|
||||
__cached_inputs: cp.ndarray
|
||||
__cached_outputs: cp.ndarray
|
||||
|
||||
def __init__(self, input_size: cp.ndarray, output_size: cp.ndarray) -> None:
|
||||
self.__weights = (cp.random.randn(input_size, output_size)) * cp.sqrt(1 / (output_size + input_size))
|
||||
self.__biases = cp.zeros(output_size)
|
||||
|
||||
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
|
||||
self.__cached_inputs = inputs
|
||||
self.__cached_outputs = cp.dot(inputs, self.__weights) + self.__biases
|
||||
return self.__cached_outputs
|
||||
|
||||
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
|
||||
return cp.dot(d_loss_wrt_outputs, self.__weights.T)
|
||||
|
||||
@property
|
||||
def parameters(self) -> list[cp.ndarray]:
|
||||
return [self.__weights, self.__biases]
|
||||
|
||||
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
|
||||
d_loss_wrt_weights = cp.dot(self.__cached_inputs.T, d_loss_wrt_outputs)
|
||||
d_loss_wrt_biases = cp.sum(d_loss_wrt_outputs)
|
||||
return[d_loss_wrt_weights, d_loss_wrt_biases]
|
||||
|
||||
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.001) -> cp.ndarray:
|
||||
d_loss_wrt_outputs /= d_loss_wrt_outputs.shape[1]
|
||||
d_loss_wrt_inputs = self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
|
||||
d_loss_wrt_weights, d_loss_wrt_biases = self.get_d_loss_wrt_parameters(d_loss_wrt_outputs)
|
||||
self.__biases -= lr * d_loss_wrt_biases
|
||||
self.__weights -= lr * d_loss_wrt_weights
|
||||
return d_loss_wrt_inputs
|
||||
|
||||
|
||||
class ReLU(Layer):
|
||||
__cached_inputs: cp.ndarray
|
||||
|
||||
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
|
||||
self.__cached_inputs = inputs
|
||||
return cp.maximum(0, inputs)
|
||||
|
||||
@property
|
||||
def parameters(self) -> list[cp.ndarray] | None:
|
||||
pass
|
||||
|
||||
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
|
||||
pass
|
||||
|
||||
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
|
||||
return (self.__cached_inputs > 0) * d_loss_wrt_outputs
|
||||
|
||||
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
|
||||
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
|
||||
|
||||
|
||||
class Softmax(Layer):
|
||||
__cached_outputs: cp.ndarray
|
||||
|
||||
def forward(self, inputs: cp.ndarray) -> cp.ndarray:
|
||||
self.__cached_outputs = cp.exp(inputs) / (cp.sum(cp.exp(inputs), axis=1).reshape(inputs.shape[0], 1) + 0.001)
|
||||
return self.__cached_outputs
|
||||
|
||||
@property
|
||||
def parameters(self) -> list[cp.ndarray] | None:
|
||||
pass
|
||||
|
||||
def get_d_loss_wrt_parameters(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray | None:
|
||||
pass
|
||||
|
||||
def get_d_loss_wrt_inputs(self, d_loss_wrt_outputs: cp.ndarray) -> cp.ndarray:
|
||||
return d_loss_wrt_outputs
|
||||
|
||||
def backward(self, d_loss_wrt_outputs: cp.ndarray, lr: float = 0.01) -> cp.ndarray:
|
||||
#time.sleep(1)
|
||||
#out = self.__cached_outputs
|
||||
#jacobian = out.reshape(out.shape[0], 1, out.shape[1]).repeat(2, axis=1)
|
||||
#eye = cp.eye(out.shape[1]).reshape((1, out.shape[1], out.shape[1])).repeat(jacobian.shape[0], axis=0)
|
||||
#jacobian *= eye
|
||||
#jacobian -= cp.dot(out.T, out)
|
||||
#d_loss_wrt_outputs =\
|
||||
# cp.matmul(jacobian, d_loss_wrt_outputs.reshape(out.shape[0], out.shape[1], 1)).reshape((out.shape[0], out.shape[1]))
|
||||
#return d_loss_wrt_outputs
|
||||
return self.get_d_loss_wrt_inputs(d_loss_wrt_outputs)
|
13
src/loss.py
Normal file
13
src/loss.py
Normal file
@ -0,0 +1,13 @@
|
||||
import cupy as cp
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Loss(ABC):
|
||||
@abstractmethod
|
||||
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
|
||||
pass
|
||||
|
||||
|
||||
class CategoricalCrossEntropy(Loss):
|
||||
def d_loss_wrt_inputs(self, outputs: cp.ndarray, targets: cp.ndarray) -> cp.ndarray:
|
||||
return outputs - targets
|
1
src/optimizer.py
Normal file
1
src/optimizer.py
Normal file
@ -0,0 +1 @@
|
||||
import cupy as cp
|
103
src/test.py
Normal file
103
src/test.py
Normal file
@ -0,0 +1,103 @@
|
||||
import time
|
||||
from layers import Dense, ReLU, Softmax
|
||||
from loss import CategoricalCrossEntropy
|
||||
import cupy as cp
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
#layers = [
|
||||
# Dense(2, 128),
|
||||
# ReLU(),
|
||||
# Dense(128, 128),
|
||||
# ReLU(),
|
||||
# Dense(128, 2),
|
||||
# Softmax()
|
||||
#]
|
||||
##layers = [Dense(2, 2)]
|
||||
#
|
||||
#
|
||||
#def forward(x):
|
||||
# for layer in layers:
|
||||
# x = layer.forward(x)
|
||||
# return x
|
||||
#
|
||||
#
|
||||
#SEQUENCE_LENGTH = 10000
|
||||
#x1 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
|
||||
#x2 = cp.random.random_integers(0, 1, SEQUENCE_LENGTH)
|
||||
#x = cp.array([x1, x2]).T
|
||||
#y = cp.logical_xor(x1, x2)
|
||||
#yt = cp.array([y == 0, y == 1]).T * 1.0
|
||||
#
|
||||
#out = forward(x)
|
||||
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
|
||||
#
|
||||
#loss_function = CategoricalCrossEntropy()
|
||||
#
|
||||
#for i in range(100):
|
||||
# #print(f"Epoch{i + 1}")
|
||||
# output = forward(x)
|
||||
# d_loss = loss_function.d_loss_wrt_inputs(output, yt)
|
||||
# for layer in layers[::-1]:
|
||||
# d_loss = layer.backward(d_loss)
|
||||
#
|
||||
#out = forward(x)
|
||||
#print(cp.sum(cp.argmax(out, axis=1) == y) * 100 / len(y))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
training_data_frame = pd.read_csv("./mnist_train.csv")
|
||||
training_data_frame.head()
|
||||
|
||||
training_data = cp.array(training_data_frame)
|
||||
y_train = training_data[:, 0]
|
||||
x_train: cp.ndarray = training_data[:, 1:]
|
||||
x_train = x_train.astype(float)
|
||||
x_train /= cp.argmax(x_train)
|
||||
del training_data
|
||||
del training_data_frame
|
||||
|
||||
|
||||
y_train_one_hot = cp.zeros(
|
||||
(y_train.size, y_train.max().item() + 1)
|
||||
)
|
||||
y_train_one_hot[cp.arange(y_train.size), y_train] = 1
|
||||
|
||||
|
||||
layers = [
|
||||
Dense(784, 128),
|
||||
ReLU(),
|
||||
Dense(128, 128),
|
||||
ReLU(),
|
||||
Dense(128, 10),
|
||||
Softmax()
|
||||
]
|
||||
|
||||
|
||||
def forward(x):
|
||||
for layer in layers:
|
||||
x = layer.forward(x)
|
||||
return x
|
||||
|
||||
|
||||
out = forward(x_train)
|
||||
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))
|
||||
|
||||
loss_function = CategoricalCrossEntropy()
|
||||
|
||||
for i in range(99999):
|
||||
output = forward(x_train)
|
||||
time.sleep(1)
|
||||
print(f"Epoch {i}:")
|
||||
print(cp.sum(cp.argmax(output, axis=1) == y_train) * 100 / len(y_train))
|
||||
d_loss = loss_function.d_loss_wrt_inputs(output, y_train_one_hot)
|
||||
for layer in layers[::-1]:
|
||||
d_loss = layer.backward(d_loss, lr=0.0001)
|
||||
|
||||
out = forward(x_train)
|
||||
print(cp.sum(cp.argmax(out, axis=1) == y_train) * 100 / len(y_train))
|
Loading…
x
Reference in New Issue
Block a user