In [319]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Dataset
import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
import time
import sys

In [320]:
"""
1. data parsing and loading (splitting into train_test (+validation))
2. creating the network structure (defining layers, defining forward)
3. Initializing the network, optimizers, loss functions, parameters etc
4. Training (train an epoch)
5. Testing (+validation)
6. main (calls train, test)

"""

'\n1. data parsing and loading (splitting into train_test (+validation))\n2. creating the network structure (defining layers, defining forward)\n3. Initializing the network, optimizers, loss functions, parameters etc\n4. Training (train an epoch)\n5. Testing (+validation)\n6. main (calls train, test)\n\n'

In [321]:
"""
IMPORT MNIST:
- database of handwritten digits (0-9)
- provided as train and test set but we'll merge and do our own split
"""

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))

train_size = 0.7
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=train_size)

x_train = x_train[:1000]
y_train = y_train[:1000]
x_test = x_test[:200]
y_test = y_test[:200]

In [322]:
#all objects need to be tensors, not numpy
#the type matters (i.e. int32, int64/long, float...)

x_train, x_test, y_train, y_test = torch.Tensor(x_train), torch.Tensor(x_test), torch.Tensor(y_train).long(), torch.Tensor(y_test).long()
#comment out this line if you want to use MyDataset as explained below

In [323]:
"""Dataloader: 
 - Takes in input of Dataset object. 
 - Often, you'll need to write your own Dataset class due to personalized needs. 
 - We'll start with using the default TensorDataset. After looking at later code,
   you'll see the benefits to creating a custom Dataset. I created one called MyDataset
   in the end. You may then come back and comment/uncomment based on instruction to use MyDataset instead 
"""
train_dataset = TensorDataset(x_train, y_train)
#train_dataset = MyDataset(x_train, y_train) #uncomment if you want to use MyDataset
train_loader = DataLoader(train_dataset, batch_size=64)

test_dataset = TensorDataset(x_test, y_test)
#test_dataset = MyDataset(x_test, y_test) #uncomment if you want to use MyDataset
test_loader = DataLoader(test_dataset, batch_size=64)

In [324]:
class simpleNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(input_size, hidden_size)
        # Output layer, 10 units in our case - one for each digit
        self.output = nn.Linear(hidden_size, output_size)

        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=0)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        #print("input x: ", x)
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        #print("output b4 softmax: ", x)
        x = self.softmax(x)
        #print("softmax output: ", x)
        
        return x

#Link for other types of layers (conv, pool, dropout...): https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d

"""TIME-SAVING WAY OF INITIALIZING COMPLEX MODEL: nn.Sequential

class Simple_MLP(nn.Module):
    def __init__(self, size_list):
        super(Simple_MLP, self).__init__()
        layers = []
        self.size_list = size_list #define all the parameter values in a list and just index through it

        #added batchnorm1d to the model given in recitation
        for i in range(len(size_list) - 2):
            layers.append(nn.Linear(size_list[i],size_list[i+1]))
            layers.append(nn.BatchNorm1d(size_list[i+1]))
            layers.append(nn.ReLU())
            #layers.append(nn.Dropout(0.5))
        layers.append(nn.Linear(size_list[-2], size_list[-1]))

        self.net = nn.Sequential(*layers) #this function will sequentially call through each layer, effectively doing your forward function

    def forward(self, x):
        return self.net(x)""" #see how much time this saves? 

'TIME-SAVING WAY OF INITIALIZING COMPLEX MODEL: nn.Sequential\n\nclass Simple_MLP(nn.Module):\n    def __init__(self, size_list):\n        super(Simple_MLP, self).__init__()\n        layers = []\n        self.size_list = size_list #define all the parameter values in a list and just index through it\n\n        #added batchnorm1d to the model given in recitation\n        for i in range(len(size_list) - 2):\n            layers.append(nn.Linear(size_list[i],size_list[i+1]))\n            layers.append(nn.BatchNorm1d(size_list[i+1]))\n            layers.append(nn.ReLU())\n            #layers.append(nn.Dropout(0.5))\n        layers.append(nn.Linear(size_list[-2], size_list[-1]))\n\n        self.net = nn.Sequential(*layers) #this function will sequentially call through each layer, effectively doing your forward function\n\n    def forward(self, x):\n        return self.net(x)'

In [325]:
x_shape = x_train[0].shape

input_size = x_shape[0]*x_shape[1] #original x sample is shape (28,28), but our model takes in 1-d input so remember to flatten it later
print("input_size: ", input_size)
hidden_size = 256
output_size = 10

model = simpleNetwork(input_size, hidden_size, output_size)
device = torch.device("cpu")
model.to(device)
print("model: ", model)

input_size:  784
model:  simpleNetwork(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): Softmax(dim=0)
)


In [326]:
#Define our optimizer and loss function

learning_rate = 1e-3

criterion = nn.MSELoss() #nn.CrossEntropyLoss(), nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) #optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True)

In [327]:
def train_epoch(model, train_loader, criterion, optimizer):

    model.train()

    running_loss = 0.0
    
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients so we need to clear them
        #print("TRAINING data: ", data)
        
        data = torch.flatten(data, start_dim=1) #flatten to match model input
        
        idx = target.long()
        target = torch.zeros((len(target),10))
        target[torch.arange(len(target)),idx] = 1

        #NOTE: Do you see how we have to alter our data and labels each time within the loop? This time could be saved if 
        #we wrote our own custom dataset that automatically does this when retrieving samples from train_loader. I wrote
        #one called MyDataset afterwards

        #uncomment from data = torch.flatten... to target[idx] = 1 if using MyDataset

        data = data.to(device)
        target = target.to(device) # all data & model on same device

        outputs = model(data)
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    #scheduler.step(running_loss)
    
    end_time = time.time()
    
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    return running_loss

In [328]:
def test_model(model, test_loader, criterion):

    with torch.no_grad(): #testing so don't need gradients

        model.eval()

        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(test_loader):  
            data = torch.flatten(data, start_dim=1) #flatten to match model input

            #uncomment flattening above if using MyDataset

            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            #target = torch.argmax(target, 1) #uncomment when using MyDataset)
            for i in range(len(predicted)):
                if predicted[i] == target[i]:
                  correct_predictions += 1
                total_predictions += 1

        acc = correct_predictions/total_predictions
        print('Testing accuracy: ', acc)

    return acc

In [329]:
n_epochs = 10
Train_loss = []
Test_loss = []
Test_acc = []

for i in range(n_epochs):
    print("Epoch: ", i+1)
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    acc = test_model(model, test_loader, criterion)


Epoch:  1
Training Loss:  0.09386827982962132 Time:  0.14947915077209473 s
Testing accuracy:  0.755
Epoch:  2
Training Loss:  0.0870783762075007 Time:  0.1395096778869629 s
Testing accuracy:  0.75
Epoch:  3
Training Loss:  0.08231420256197453 Time:  0.13604998588562012 s
Testing accuracy:  0.75
Epoch:  4
Training Loss:  0.07902503712102771 Time:  0.14116811752319336 s
Testing accuracy:  0.755
Epoch:  5
Training Loss:  0.07714120903983712 Time:  0.13157272338867188 s
Testing accuracy:  0.765
Epoch:  6
Training Loss:  0.07602649461477995 Time:  0.1564333438873291 s
Testing accuracy:  0.8
Epoch:  7
Training Loss:  0.07551217125728726 Time:  0.15763354301452637 s
Testing accuracy:  0.805
Epoch:  8
Training Loss:  0.07495519379153848 Time:  0.1345217227935791 s
Testing accuracy:  0.79
Epoch:  9
Training Loss:  0.0745693608187139 Time:  0.1268603801727295 s
Testing accuracy:  0.795
Epoch:  10
Training Loss:  0.07428902434185147 Time:  0.13258838653564453 s
Testing accuracy:  0.82


In [330]:
#BONUS: Writing our own Dataset class to adjust x,y accordingly
#Then you won't have to do all the flattening and making one-hot vector every loop iteration

class MyDataset(Dataset):
    def __init__(self, X, Y): 
        #if certain changes are fixed, you're better off doing it in init, b/c it's only ran once
        np.set_printoptions(threshold=sys.maxsize)
        
        self.X = np.array([x.flatten() for x in X]) #flatten each input from (28,28) to 28*28 = 784
        self.Y = Y
        self.Y_hot = np.zeros((len(Y),10))
        self.Y_hot[np.arange(len(Y)),Y] = 1 #creates one-hot vector for each label
        

    def __getitem__(self, idx):
        #called while enumerating through dataloader to retrieve samples

        data = torch.Tensor(self.X[idx]).flatten()
        target_onehot = torch.Tensor(self.Y_hot[idx]).float()

        return data, target_onehot


    def __len__(self):
        return len(self.X)