Open In Colab

Lecture 8: Regularization in Neural Networks Part 2 Code #

#@title 
from ipywidgets import widgets
out1 = widgets.Output()
with out1:
  from IPython.display import YouTubeVideo
  video = YouTubeVideo(id=f"oHtOFQR0Wng", width=854, height=480, fs=1, rel=0)
  print("Video available at https://youtube.com/watch?v=" + video.id)
  display(video)
display(out1)
#@title 
from IPython import display as IPyDisplay
IPyDisplay.HTML(
    f"""
  <div>
    <a href= "https://github.com/DL4CV-NPTEL/Deep-Learning-For-Computer-Vision/blob/main/Slides/Week_4/DL4CV_Week04_Part04.pdf" target="_blank">
    <img src="https://github.com/DL4CV-NPTEL/Deep-Learning-For-Computer-Vision/blob/main/Data/Slides_Logo.png?raw=1"
  alt="button link to Airtable" style="width:200px"></a>
    </div>""" )

Data Augmentations#

from torchvision import datasets
import torchvision
import random
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
import PIL
import matplotlib.pyplot as plt
%matplotlib inline
##########################
### SETTINGS
##########################

RANDOM_SEED = 1
BATCH_SIZE = 128
NUM_EPOCHS = 100

##########################
### MNIST DATASET
##########################

# Note transforms.ToTensor() scales input images
# to 0-1 range

training_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(32, 32)),
    torchvision.transforms.RandomCrop(size=(28, 28)),
    torchvision.transforms.RandomRotation(degrees=30, interpolation=PIL.Image.BILINEAR),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)),
    # normalize does (x_i - mean) / std
    # if images are [0, 1], they will be [-1, 1] afterwards
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Resize(size=(32, 32)),
    torchvision.transforms.CenterCrop(size=(28, 28)),
    torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)),
])

# for more see
# https://pytorch.org/docs/stable/torchvision/transforms.html

train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=training_transforms,
                               download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=test_transforms)


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE, 
                         shuffle=False)
/usr/local/lib/python3.7/dist-packages/torchvision/transforms/transforms.py:1306: UserWarning: Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. Please use InterpolationMode enum.
  "Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. "
# Checking the dataset

random.seed(0)
torch.manual_seed(0)


for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break
    
fig, ax = plt.subplots(1, 4)
for i in range(4):
    ax[i].imshow(images[i].view(28, 28).numpy(), cmap='binary')
    
plt.tight_layout()
Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])
../../_images/Week_4_Lecture_8_7_1.png
# compare without augmentation

train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=torchvision.transforms.ToTensor(),
                               download=True)

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          shuffle=True)

torch.manual_seed(0)
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break
    
fig, ax = plt.subplots(1, 4)
for i in range(4):
    ax[i].imshow(images[i].view(28, 28).numpy(), cmap='binary')
    
plt.tight_layout()
Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])
../../_images/Week_4_Lecture_8_8_1.png

Helper functions#

Imports

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import sampler
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import SubsetRandomSampler
from torchvision import transforms
import time
import os
import random
from distutils.version import LooseVersion as Version

Helper function for Dataloading

def get_dataloaders_mnist(batch_size, num_workers=0,
                          validation_fraction=None,
                          train_transforms=None,
                          test_transforms=None):

    if train_transforms is None:
        train_transforms = transforms.ToTensor()

    if test_transforms is None:
        test_transforms = transforms.ToTensor()

    train_dataset = datasets.MNIST(root='data',
                                   train=True,
                                   transform=train_transforms,
                                   download=True)

    valid_dataset = datasets.MNIST(root='data',
                                   train=True,
                                   transform=test_transforms)

    test_dataset = datasets.MNIST(root='data',
                                  train=False,
                                  transform=test_transforms)

    if validation_fraction is not None:
        num = int(validation_fraction * 60000)
        train_indices = torch.arange(0, 60000 - num)
        valid_indices = torch.arange(60000 - num, 60000)

        train_sampler = SubsetRandomSampler(train_indices)
        valid_sampler = SubsetRandomSampler(valid_indices)

        valid_loader = DataLoader(dataset=valid_dataset,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  sampler=valid_sampler)

        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  drop_last=True,
                                  sampler=train_sampler)

    else:
        train_loader = DataLoader(dataset=train_dataset,
                                  batch_size=batch_size,
                                  num_workers=num_workers,
                                  drop_last=True,
                                  shuffle=True)

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=batch_size,
                             num_workers=num_workers,
                             shuffle=False)

    if validation_fraction is None:
        return train_loader, test_loader
    else:
        return train_loader, valid_loader, test_loader

Helper function for evaluation

def set_all_seeds(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def set_deterministic():
    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

 
def compute_accuracy(model, data_loader, device):

    with torch.no_grad():

        correct_pred, num_examples = 0, 0

        for i, (features, targets) in enumerate(data_loader):

            features = features.to(device)
            targets = targets.float().to(device)

            logits = model(features)
            _, predicted_labels = torch.max(logits, 1)

            num_examples += targets.size(0)
            correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100

Helper function for training

def train_model(model, num_epochs, train_loader,
                valid_loader, test_loader, optimizer, device):

    start_time = time.time()
    minibatch_loss_list, train_acc_list, valid_acc_list = [], [], []
    for epoch in range(num_epochs):

        model.train()
        for batch_idx, (features, targets) in enumerate(train_loader):

            features = features.to(device)
            targets = targets.to(device)

            # ## FORWARD AND BACK PROP
            logits = model(features)
            loss = torch.nn.functional.cross_entropy(logits, targets)
            optimizer.zero_grad()

            loss.backward()

            # ## UPDATE MODEL PARAMETERS
            optimizer.step()

            # ## LOGGING
            minibatch_loss_list.append(loss.item())
            if not batch_idx % 50:
                print(f'Epoch: {epoch+1:03d}/{num_epochs:03d} '
                      f'| Batch {batch_idx:04d}/{len(train_loader):04d} '
                      f'| Loss: {loss:.4f}')

        model.eval()
        with torch.no_grad():  # save memory during inference
            train_acc = compute_accuracy(model, train_loader, device=device)
            valid_acc = compute_accuracy(model, valid_loader, device=device)
            print(f'Epoch: {epoch+1:03d}/{num_epochs:03d} '
                  f'| Train: {train_acc :.2f}% '
                  f'| Validation: {valid_acc :.2f}%')
            train_acc_list.append(train_acc.item())
            valid_acc_list.append(valid_acc.item())

        elapsed = (time.time() - start_time)/60
        print(f'Time elapsed: {elapsed:.2f} min')

    elapsed = (time.time() - start_time)/60
    print(f'Total Training Time: {elapsed:.2f} min')

    test_acc = compute_accuracy(model, test_loader, device=device)
    print(f'Test accuracy {test_acc :.2f}%')

    return minibatch_loss_list, train_acc_list, valid_acc_list

Helper function for Plotting

def plot_training_loss(minibatch_loss_list, num_epochs, iter_per_epoch,
                       results_dir=None, averaging_iterations=100):

    plt.figure()
    ax1 = plt.subplot(1, 1, 1)
    ax1.plot(range(len(minibatch_loss_list)),
             (minibatch_loss_list), label='Minibatch Loss')

    if len(minibatch_loss_list) > 1000:
        ax1.set_ylim([
            0, np.max(minibatch_loss_list[1000:])*1.5
            ])
    ax1.set_xlabel('Iterations')
    ax1.set_ylabel('Loss')

    ax1.plot(np.convolve(minibatch_loss_list,
                         np.ones(averaging_iterations,)/averaging_iterations,
                         mode='valid'),
             label='Running Average')
    ax1.legend()

    ###################
    # Set scond x-axis
    ax2 = ax1.twiny()
    newlabel = list(range(num_epochs+1))

    newpos = [e*iter_per_epoch for e in newlabel]

    ax2.set_xticks(newpos[::10])
    ax2.set_xticklabels(newlabel[::10])

    ax2.xaxis.set_ticks_position('bottom')
    ax2.xaxis.set_label_position('bottom')
    ax2.spines['bottom'].set_position(('outward', 45))
    ax2.set_xlabel('Epochs')
    ax2.set_xlim(ax1.get_xlim())
    ###################

    plt.tight_layout()

    if results_dir is not None:
        image_path = os.path.join(results_dir, 'plot_training_loss.pdf')
        plt.savefig(image_path)


def plot_accuracy(train_acc_list, valid_acc_list, results_dir):

    num_epochs = len(train_acc_list)

    plt.plot(np.arange(1, num_epochs+1),
             train_acc_list, label='Training')
    plt.plot(np.arange(1, num_epochs+1),
             valid_acc_list, label='Validation')

    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()

    if results_dir is not None:
        image_path = os.path.join(
            results_dir, 'plot_acc_training_validation.pdf')
        plt.savefig(image_path)


def show_examples(model, data_loader):

    for batch_idx, (features, targets) in enumerate(data_loader):

        with torch.no_grad():
            features = features
            targets = targets
            logits = model(features)
            predictions = torch.argmax(logits, dim=1)
        break

    fig, axes = plt.subplots(nrows=3, ncols=5,
                             sharex=True, sharey=True)

    nhwc_img = np.transpose(features, axes=(0, 2, 3, 1))
    nhw_img = np.squeeze(nhwc_img.numpy(), axis=3)

    for idx, ax in enumerate(axes.ravel()):
        ax.imshow(nhw_img[idx], cmap='binary')
        ax.title.set_text(f'P: {predictions[idx]} | T: {targets[idx]}')
        ax.axison = False

    plt.tight_layout()
    plt.show()

Dropout#

Settings and Dataset

##########################
### SETTINGS
##########################

RANDOM_SEED = 123
BATCH_SIZE = 256
NUM_HIDDEN_1 = 50
NUM_HIDDEN_2 = 25
NUM_EPOCHS = 10
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
set_all_seeds(RANDOM_SEED)
set_deterministic()
##########################
### MNIST DATASET
##########################

train_loader, valid_loader, test_loader = get_dataloaders_mnist(
    batch_size=BATCH_SIZE,
    validation_fraction=0.1)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    print('Class labels of 10 examples:', labels[:10])
    break
Image batch dimensions: torch.Size([256, 1, 28, 28])
Image label dimensions: torch.Size([256])
Class labels of 10 examples: tensor([4, 5, 8, 9, 9, 4, 9, 9, 3, 9])

Model

class MultilayerPerceptron(torch.nn.Module):

    def __init__(self, num_features, num_classes, drop_proba, 
                 num_hidden_1, num_hidden_2):
        super().__init__()
        
        self.my_network = torch.nn.Sequential(
            # 1st hidden layer
            torch.nn.Flatten(),
            torch.nn.Linear(num_features, num_hidden_1),
            torch.nn.ReLU(),
            torch.nn.Dropout(drop_proba),
            # 2nd hidden layer
            torch.nn.Linear(num_hidden_1, num_hidden_2),
            torch.nn.ReLU(),
            torch.nn.Dropout(drop_proba),
            # output layer
            torch.nn.Linear(num_hidden_2, num_classes)
        )
           
    def forward(self, x):
        logits = self.my_network(x)
        return logits

Without Dropout

torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
                             num_hidden_1=NUM_HIDDEN_1,
                             num_hidden_2=NUM_HIDDEN_2,
                             drop_proba=0.0,
                             num_classes=10)
model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

minibatch_loss_list, train_acc_list, valid_acc_list = train_model(
    model=model,
    num_epochs=NUM_EPOCHS,
    train_loader=train_loader,
    valid_loader=valid_loader,
    test_loader=test_loader,
    optimizer=optimizer,
    device=DEVICE)

plot_training_loss(minibatch_loss_list=minibatch_loss_list,
                   num_epochs=NUM_EPOCHS,
                   iter_per_epoch=len(train_loader),
                   results_dir=None,
                   averaging_iterations=20)

plt.show()

plot_accuracy(train_acc_list=train_acc_list,
              valid_acc_list=valid_acc_list,
              results_dir=None)

plt.ylim([80, 100])
plt.show()
Epoch: 001/010 | Batch 0000/0210 | Loss: 2.2982
Epoch: 001/010 | Batch 0050/0210 | Loss: 1.9540
Epoch: 001/010 | Batch 0100/0210 | Loss: 0.9112
Epoch: 001/010 | Batch 0150/0210 | Loss: 0.6009
Epoch: 001/010 | Batch 0200/0210 | Loss: 0.4537
Epoch: 001/010 | Train: 86.79% | Validation: 89.57%
Time elapsed: 0.16 min
Epoch: 002/010 | Batch 0000/0210 | Loss: 0.4226
Epoch: 002/010 | Batch 0050/0210 | Loss: 0.4288
Epoch: 002/010 | Batch 0100/0210 | Loss: 0.3404
Epoch: 002/010 | Batch 0150/0210 | Loss: 0.3256
Epoch: 002/010 | Batch 0200/0210 | Loss: 0.2733
Epoch: 002/010 | Train: 89.21% | Validation: 91.32%
Time elapsed: 0.33 min
Epoch: 003/010 | Batch 0000/0210 | Loss: 0.4403
Epoch: 003/010 | Batch 0050/0210 | Loss: 0.3642
Epoch: 003/010 | Batch 0100/0210 | Loss: 0.3392
Epoch: 003/010 | Batch 0150/0210 | Loss: 0.3118
Epoch: 003/010 | Batch 0200/0210 | Loss: 0.2612
Epoch: 003/010 | Train: 91.54% | Validation: 93.28%
Time elapsed: 0.47 min
Epoch: 004/010 | Batch 0000/0210 | Loss: 0.1900
Epoch: 004/010 | Batch 0050/0210 | Loss: 0.3374
Epoch: 004/010 | Batch 0100/0210 | Loss: 0.2584
Epoch: 004/010 | Batch 0150/0210 | Loss: 0.2293
Epoch: 004/010 | Batch 0200/0210 | Loss: 0.1962
Epoch: 004/010 | Train: 92.19% | Validation: 93.52%
Time elapsed: 0.62 min
Epoch: 005/010 | Batch 0000/0210 | Loss: 0.3160
Epoch: 005/010 | Batch 0050/0210 | Loss: 0.3008
Epoch: 005/010 | Batch 0100/0210 | Loss: 0.3078
Epoch: 005/010 | Batch 0150/0210 | Loss: 0.2415
Epoch: 005/010 | Batch 0200/0210 | Loss: 0.1907
Epoch: 005/010 | Train: 93.13% | Validation: 94.28%
Time elapsed: 0.77 min
Epoch: 006/010 | Batch 0000/0210 | Loss: 0.1948
Epoch: 006/010 | Batch 0050/0210 | Loss: 0.1533
Epoch: 006/010 | Batch 0100/0210 | Loss: 0.2249
Epoch: 006/010 | Batch 0150/0210 | Loss: 0.2144
Epoch: 006/010 | Batch 0200/0210 | Loss: 0.2245
Epoch: 006/010 | Train: 94.30% | Validation: 95.48%
Time elapsed: 0.92 min
Epoch: 007/010 | Batch 0000/0210 | Loss: 0.1763
Epoch: 007/010 | Batch 0050/0210 | Loss: 0.2022
Epoch: 007/010 | Batch 0100/0210 | Loss: 0.1443
Epoch: 007/010 | Batch 0150/0210 | Loss: 0.1857
Epoch: 007/010 | Batch 0200/0210 | Loss: 0.1338
Epoch: 007/010 | Train: 94.69% | Validation: 95.67%
Time elapsed: 1.07 min
Epoch: 008/010 | Batch 0000/0210 | Loss: 0.1491
Epoch: 008/010 | Batch 0050/0210 | Loss: 0.1751
Epoch: 008/010 | Batch 0100/0210 | Loss: 0.2704
Epoch: 008/010 | Batch 0150/0210 | Loss: 0.1301
Epoch: 008/010 | Batch 0200/0210 | Loss: 0.1843
Epoch: 008/010 | Train: 94.93% | Validation: 95.88%
Time elapsed: 1.21 min
Epoch: 009/010 | Batch 0000/0210 | Loss: 0.1855
Epoch: 009/010 | Batch 0050/0210 | Loss: 0.1570
Epoch: 009/010 | Batch 0100/0210 | Loss: 0.1190
Epoch: 009/010 | Batch 0150/0210 | Loss: 0.1813
Epoch: 009/010 | Batch 0200/0210 | Loss: 0.2289
Epoch: 009/010 | Train: 95.64% | Validation: 96.32%
Time elapsed: 1.36 min
Epoch: 010/010 | Batch 0000/0210 | Loss: 0.1531
Epoch: 010/010 | Batch 0050/0210 | Loss: 0.2345
Epoch: 010/010 | Batch 0100/0210 | Loss: 0.2008
Epoch: 010/010 | Batch 0150/0210 | Loss: 0.1409
Epoch: 010/010 | Batch 0200/0210 | Loss: 0.1265
Epoch: 010/010 | Train: 95.96% | Validation: 96.42%
Time elapsed: 1.51 min
Total Training Time: 1.51 min
Test accuracy 95.63%
../../_images/Week_4_Lecture_8_28_52.png ../../_images/Week_4_Lecture_8_28_53.png

With 50% Dropout

torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
                             num_hidden_1=NUM_HIDDEN_1,
                             num_hidden_2=NUM_HIDDEN_2,
                             drop_proba=0.5,
                             num_classes=10)
model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

minibatch_loss_list, train_acc_list, valid_acc_list = train_model(
    model=model,
    num_epochs=NUM_EPOCHS,
    train_loader=train_loader,
    valid_loader=valid_loader,
    test_loader=test_loader,
    optimizer=optimizer,
    device=DEVICE)

plot_training_loss(minibatch_loss_list=minibatch_loss_list,
                   num_epochs=NUM_EPOCHS,
                   iter_per_epoch=len(train_loader),
                   results_dir=None,
                   averaging_iterations=20)
plt.show()

plot_accuracy(train_acc_list=train_acc_list,
              valid_acc_list=valid_acc_list,
              results_dir=None)
plt.ylim([80, 100])
plt.show()
Epoch: 001/010 | Batch 0000/0210 | Loss: 2.3018
Epoch: 001/010 | Batch 0050/0210 | Loss: 2.0694
Epoch: 001/010 | Batch 0100/0210 | Loss: 1.7909
Epoch: 001/010 | Batch 0150/0210 | Loss: 1.3392
Epoch: 001/010 | Batch 0200/0210 | Loss: 1.2475
Epoch: 001/010 | Train: 80.98% | Validation: 85.18%
Time elapsed: 0.15 min
Epoch: 002/010 | Batch 0000/0210 | Loss: 1.2265
Epoch: 002/010 | Batch 0050/0210 | Loss: 1.1502
Epoch: 002/010 | Batch 0100/0210 | Loss: 0.9517
Epoch: 002/010 | Batch 0150/0210 | Loss: 1.0110
Epoch: 002/010 | Batch 0200/0210 | Loss: 0.8153
Epoch: 002/010 | Train: 87.38% | Validation: 89.55%
Time elapsed: 0.30 min
Epoch: 003/010 | Batch 0000/0210 | Loss: 1.0311
Epoch: 003/010 | Batch 0050/0210 | Loss: 0.8248
Epoch: 003/010 | Batch 0100/0210 | Loss: 0.8777
Epoch: 003/010 | Batch 0150/0210 | Loss: 0.9017
Epoch: 003/010 | Batch 0200/0210 | Loss: 0.7630
Epoch: 003/010 | Train: 89.29% | Validation: 91.45%
Time elapsed: 0.44 min
Epoch: 004/010 | Batch 0000/0210 | Loss: 0.7227
Epoch: 004/010 | Batch 0050/0210 | Loss: 0.8405
Epoch: 004/010 | Batch 0100/0210 | Loss: 0.7044
Epoch: 004/010 | Batch 0150/0210 | Loss: 0.7248
Epoch: 004/010 | Batch 0200/0210 | Loss: 0.6005
Epoch: 004/010 | Train: 90.31% | Validation: 92.18%
Time elapsed: 0.59 min
Epoch: 005/010 | Batch 0000/0210 | Loss: 0.7448
Epoch: 005/010 | Batch 0050/0210 | Loss: 0.7122
Epoch: 005/010 | Batch 0100/0210 | Loss: 0.7613
Epoch: 005/010 | Batch 0150/0210 | Loss: 0.7275
Epoch: 005/010 | Batch 0200/0210 | Loss: 0.6863
Epoch: 005/010 | Train: 91.08% | Validation: 92.60%
Time elapsed: 0.74 min
Epoch: 006/010 | Batch 0000/0210 | Loss: 0.7163
Epoch: 006/010 | Batch 0050/0210 | Loss: 0.6041
Epoch: 006/010 | Batch 0100/0210 | Loss: 0.5916
Epoch: 006/010 | Batch 0150/0210 | Loss: 0.5960
Epoch: 006/010 | Batch 0200/0210 | Loss: 0.6973
Epoch: 006/010 | Train: 91.68% | Validation: 93.17%
Time elapsed: 0.89 min
Epoch: 007/010 | Batch 0000/0210 | Loss: 0.6481
Epoch: 007/010 | Batch 0050/0210 | Loss: 0.6638
Epoch: 007/010 | Batch 0100/0210 | Loss: 0.5803
Epoch: 007/010 | Batch 0150/0210 | Loss: 0.6064
Epoch: 007/010 | Batch 0200/0210 | Loss: 0.6294
Epoch: 007/010 | Train: 91.97% | Validation: 93.40%
Time elapsed: 1.05 min
Epoch: 008/010 | Batch 0000/0210 | Loss: 0.6046
Epoch: 008/010 | Batch 0050/0210 | Loss: 0.5835
Epoch: 008/010 | Batch 0100/0210 | Loss: 0.7137
Epoch: 008/010 | Batch 0150/0210 | Loss: 0.5571
Epoch: 008/010 | Batch 0200/0210 | Loss: 0.5737
Epoch: 008/010 | Train: 92.39% | Validation: 93.80%
Time elapsed: 1.21 min
Epoch: 009/010 | Batch 0000/0210 | Loss: 0.6368
Epoch: 009/010 | Batch 0050/0210 | Loss: 0.5321
Epoch: 009/010 | Batch 0100/0210 | Loss: 0.6144
Epoch: 009/010 | Batch 0150/0210 | Loss: 0.6012
Epoch: 009/010 | Batch 0200/0210 | Loss: 0.6647
Epoch: 009/010 | Train: 92.72% | Validation: 93.93%
Time elapsed: 1.35 min
Epoch: 010/010 | Batch 0000/0210 | Loss: 0.5607
Epoch: 010/010 | Batch 0050/0210 | Loss: 0.6563
Epoch: 010/010 | Batch 0100/0210 | Loss: 0.7113
Epoch: 010/010 | Batch 0150/0210 | Loss: 0.5406
Epoch: 010/010 | Batch 0200/0210 | Loss: 0.4692
Epoch: 010/010 | Train: 92.84% | Validation: 94.05%
Time elapsed: 1.50 min
Total Training Time: 1.50 min
Test accuracy 92.68%
../../_images/Week_4_Lecture_8_30_52.png ../../_images/Week_4_Lecture_8_30_53.png

Acknowledgements

Code adopted from the excellent lectures of Sebastian Raschka

https://sebastianraschka.com/blog/2021/dl-course.html