import torch.nn as nn
import torch

## Create input_tensor with three features
input_tensor = torch.tensor(
    [[0.3471,0.4547,-0.2356]]
)

# Define our First linear layer
linear_layer = nn.Linear(in_features=3, out_features=2)

# Pass input through linear layer
output = linear_layer(input_tensor)
print(output)

tensor([[-0.1275,  0.0318]], grad_fn=<AddmmBackward0>)

linear_layer.weight

Parameter containing:
tensor([[-0.0425, -0.1467, -0.2155],
        [-0.0660, -0.1149, -0.2340]], requires_grad=True)

linear_layer.bias

Parameter containing:
tensor([-0.0968,  0.0519], requires_grad=True)

model = nn.Sequential(
    nn.Linear(10,18),
    nn.Linear(18,20),
    nn.Linear(20,5)
)

input_tensor = torch.tensor([[6.0]])
sigmoid = nn.Sigmoid()
output =  sigmoid(input_tensor)

output

tensor([[0.9975]])

model = nn.Sequential(
    nn.Linear(6,4), # First Linear layer
    nn.Linear(4,1), # Seconf linear layer
    nn.Softmax(dim=-1)
)

# output = model(input_tensor)
# print(output)

# For Binary Class Prediction

model = nn.Sequential(
    nn.Linear(8,1),
    nn.Sigmoid()
)

# model.description()

Loss Function

For MultiClass Classification we used One Hot Encoding to decide final Prediction.

# Transforming labels with one-hot encoding

import torch.nn.functional as F

F.one_hot(torch.tensor(0),num_classes = 3) # first class encoding

tensor([1, 0, 0])

F.one_hot(torch.tensor(1),num_classes = 3) # second class encoding

tensor([0, 1, 0])

F.one_hot(torch.tensor(2),num_classes=3) # third class encoding

tensor([0, 0, 1])

Loss Function CrossEntropyLossFunction

from torch.nn import CrossEntropyLoss

scores = torch.tensor([[-0.1211, 0.1059]])  # Here this is predicted score for both classes. (means It will predict the probability for both classes so it has 2 column for single data)
one_hot_target = torch.tensor([[1,0]])  # this one is one hot encoded version for class 0 = [1,0] and for class 1 = [0,1]

loss_func = CrossEntropyLoss()
loss_func(scores.double(),one_hot_target.double())

tensor(0.8131, dtype=torch.float64)

Derivatives For Minimizing Loss (Gradient Loss)

# Create the model and run a forward pass
model = nn.Sequential(nn.Linear(16,8),
                      nn.Linear(8,4),
                      nn.Linear(4,2))

prediction = model(sample)


# calculate the loss and compute the gradients
criteria = CrossEntropyLoss()
loss = criterion(prediction, target)
loss.backward()

# Access each layer's gradients
model[0].weight.grad, model[0].bias.grad  #layer 1
model[1].weight.grad, model[1].bias.grad  # layer 2
model[2].weight.grad, model[2].bias.grad  # layer 3

Create the dataset and the dtaloader

from torch.utils.data import TensorDataset, DataLoader

dataset = TensorDataset(torch.tensor(features).float(), torch.tensor(target).float())
dataloader = DataLoader(dataset,batch_size=4, shuffle=True)

# Create the model
model = nn.Sequential(nn.Linear(4,2),
                     nn.Linear(2,1))

# Create the loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters,lr=0.001)

Relu and LeakyReLU

Layer Initialization

layer = nn.Linear(64,128)
print(layer.weight.min(),layer.weight.max())

tensor(-0.1250, grad_fn=<MinBackward1>) tensor(0.1250, grad_fn=<MaxBackward1>)

nn.init.uniform_(layer.weight)

Parameter containing:
tensor([[0.6007, 0.8485, 0.1343,  ..., 0.5944, 0.7855, 0.7634],
        [0.7935, 0.8246, 0.2737,  ..., 0.8416, 0.3304, 0.3361],
        [0.2474, 0.3499, 0.1802,  ..., 0.9879, 0.6338, 0.3520],
        ...,
        [0.5509, 0.7683, 0.0708,  ..., 0.3167, 0.9182, 0.9470],
        [0.7783, 0.2794, 0.2236,  ..., 0.8608, 0.5396, 0.4671],
        [0.0925, 0.5456, 0.8165,  ..., 0.6591, 0.4315, 0.6176]],
       requires_grad=True)

print(layer.weight.min(),layer.weight.max())

tensor(0.0001, grad_fn=<MinBackward1>) tensor(0.9999, grad_fn=<MaxBackward1>)

model = nn.Sequential(
    nn.Linear(64,128),
    nn.Linear(128,256)
)

for name,param in model.named_parameters():
  if name == '0.weight':
    param.requires_grad = False

for name, param in model.named_parameters():    
  
    # Check if the parameters belong to the first layer
    if name == '0.weight' or name == '0.bias':
      
        # Freeze the parameters
        param.requires_grad = False
  
    # Check if the parameters belong to the second layer
    if name == '1.weight' or name == '1.bias':
      
        # Freeze the parameters
        param.requires_grad = False

Weight initialization Code

layer0 = nn.Linear(16, 32)
layer1 = nn.Linear(32, 64)

# Use uniform initialization for layer0 and layer1 weights
nn.init.uniform_(layer0.weight)
nn.init.uniform_(layer1.weight)

model = nn.Sequential(layer0, layer1)

Chapter 3

A Deeper Dive into loadind Data

Here We don’t require First and last Column for our features

Extracting target values from datasets

Now Converting our raw datasets to tensor datasets.

Adding to a DataLoader for efficent processing and training the model

Creating Tensor Datasets through numpy Data.

import numpy as np
import torch
from torch.utils.data import TensorDataset

np_features = np.array(np.random.rand(12, 8))
np_target = np.array(np.random.rand(12, 1))

# Convert arrays to PyTorch tensors
torch_features = torch.tensor(np_features)
torch_target = torch.tensor(np_target)

# Create a TensorDataset from two tensors
dataset = TensorDataset(torch_features,torch_target)

# Return the last element of this dataset
print(dataset[-1])

Sample Working Code of DataLoader and TensorDataSet

# Load the different columns into two PyTorch tensors
features = torch.tensor(dataframe[['ph', 'Sulfate', 'Conductivity', 'Organic_carbon']].to_numpy()).float()
target = torch.tensor(dataframe['Potability'].to_numpy()).float()

# Create a dataset from the two generated tensors
dataset = TensorDataset(features, target)

# Create a dataloader using the above dataset
dataloader = DataLoader(dataset, shuffle=True, batch_size=2)
x, y = next(iter(dataloader))

# Create a model using the nn.Sequential API
model = nn.Sequential(nn.Linear(4,1),nn.Linear(1,1))
output = model(features)
print(output)

# Set the model to evaluation mode
model.eval()
validation_loss = 0.0

with torch.no_grad():
  
  for data in validationloader:
    
      outputs = model(data[0])
      loss = criterion(outputs, data[1])
      
      # Sum the current loss to the validation_loss variable
      validation_loss += loss.item()
      
# Calculate the mean loss value
validation_loss_epoch = validation_loss/len(validationloader)
print(validation_loss_epoch)

# Set the model back to training mode
model.train()

Dropout

# Using the same model, set the dropout probability to 0.8
model = nn.Sequential(
    nn.Linear(3072,16),
    nn.ReLU(),
    nn.Dropout(p=0.8)
)
model(input_tensor)

Weight Decay

Improving Model Performance

values = [] for idx in range(10): # Randomly sample a learning rate factor between 2 and 4 factor = np.random.uniform(2,4) lr = 10 ** -factor

# Randomly select a momentum between 0.85 and 0.99
momentum = np.random.uniform(0.85,0.99)

values.append((lr, momentum))