Age Prediction
Solution for submission 175171
A detailed solution for submission 175171 submitted for challenge Age Prediction
In [5]:
%load_ext aicrowd.magic
In [6]:
%aicrowd login
In [7]:
import os
import time
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
import sys
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
TRAIN_CSV_PATH = ''
TEST_CSV_PATH = ''
IMAGE_PATH = ''
In [8]:
cuda = 1
seed = 5
numworkers = 16
outpath = ''
In [9]:
NUM_WORKERS = numworkers
if cuda >= 0:
DEVICE = torch.device("cuda:%d" % cuda)
else:
DEVICE = torch.device("cpu")
if seed == -1:
RANDOM_SEED = None
else:
RANDOM_SEED = seed
PATH = outpath
if not os.path.exists(PATH):
os.mkdir(PATH)
LOGFILE = os.path.join(PATH, 'training.log')
TEST_PREDICTIONS = os.path.join(PATH, 'test_predictions.log')
TEST_ALLPROBAS = os.path.join(PATH, 'test_allprobas.tensor')
# Logging
header = []
header.append('PyTorch Version: %s' % torch.__version__)
header.append('CUDA device available: %s' % torch.cuda.is_available())
header.append('Using CUDA device: %s' % DEVICE)
header.append('Random Seed: %s' % RANDOM_SEED)
header.append('Output Path: %s' % PATH)
header.append('Script: %s' % sys.argv[0])
with open(LOGFILE, 'w') as f:
for entry in header:
print(entry)
f.write('%s\n' % entry)
f.flush()
In [10]:
class DatasetAge(Dataset):
"""Custom Dataset for loading face images"""
def __init__(self, csv_path, img_dir, split, transform=None):
df = pd.read_csv(csv_path)
self.img_dir = os.path.join(img_dir, split)
self.image_names = df["ImageID"].values
self.split = split
self.csv_path = csv_path
self.y = [int(int(age.split('-')[0])/10) for age in df['age'].values]
self.transform = transform
def __getitem__(self, index):
img = cv2.imread(os.path.join(self.img_dir,
self.image_names[index])+".jpg")
if self.transform is not None:
augmented = self.transform(image=img)
img = augmented['image']
if self.split != 'test':
label = self.y[index]
levels = [1]*label + [0]*(NUM_CLASSES - 1 - label)
levels = torch.tensor(levels, dtype=torch.float32)
return img, label, levels
else:
return img, self.image_names[index]
def __len__(self):
return len(self.y)
In [11]:
import albumentations
import albumentations as A
import cv2
import numpy as np
import torch
from albumentations.pytorch.transforms import ToTensorV2
train_transforms = A.Compose([
A.HorizontalFlip(),
A.Rotate(limit=15, p=0.7, interpolation=cv2.INTER_AREA, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)),
A.Cutout(8, 138, 138, p=0.7),
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
ToTensorV2()
])
BATCH_SIZE = 4
NUM_CLASSES =10
val_transforms = A.Compose([
A.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225],
),
ToTensorV2()])
train_dataset = DatasetAge(csv_path=TRAIN_CSV_PATH,
img_dir=IMAGE_PATH,
split="train",
transform=train_transforms)
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=NUM_WORKERS)
In [12]:
from efficientnet_pytorch import EfficientNet
class AgeModel(nn.Module):
def __init__(self, num_classes):
super(AgeModel, self).__init__()
self.num_classes = num_classes
self.model = EfficientNet.from_pretrained('efficientnet-b6')
self.adpool = torch.nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(2304, 1, bias=False)
self.linear_1_bias = nn.Parameter(torch.zeros(self.num_classes-1).float())
def forward(self, x):
x = self.model.extract_features(x)
x = self.adpool(x)
x = x.view(x.size(0), -1)
logits = self.fc(x)
logits = logits + self.linear_1_bias
probas = torch.sigmoid(logits)
return logits, probas
In [13]:
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.optim import SGD
In [14]:
lr = 0.001
momentum=0.9
weight_decay=0.0001
num_epochs=200
In [ ]:
def cost_fn(logits, levels):
val = (-torch.sum((F.logsigmoid(logits)*levels
+ (F.logsigmoid(logits) - logits)*(1-levels)),
dim=1))
return torch.mean(val)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
model = AgeModel(NUM_CLASSES)
model.to(DEVICE)
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=1, eta_min=0.00001, last_epoch=-1)
def compute_mae_and_mse_and_accuracy(model, data_loader, device):
mae, mse, accuracy, num_examples = 0, 0, 0, 0
for i, (features, targets, levels) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
predict_levels = probas > 0.5
predicted_labels = torch.sum(predict_levels, dim=1)
num_examples += targets.size(0)
mae += torch.sum(torch.abs(predicted_labels - targets))
mse += torch.sum((predicted_labels - targets)**2)
accuracy += torch.sum(predicted_labels == targets)
mae = mae.float() / num_examples
mse = mse.float() / num_examples
accuracy = accuracy.float() / num_examples
return mae, mse, accuracy
start_time = time.time()
best_mae, best_rmse, best_epoch = 999, 999, -1
for epoch in range(num_epochs):
model.train()
for batch_idx, (features, targets, levels) in enumerate(train_loader):
features = features.to(DEVICE)
targets = targets
targets = targets.to(DEVICE)
levels = levels.to(DEVICE)
# FORWARD AND BACK PROP
logits, probas = model(features)
cost = cost_fn(logits, levels)
optimizer.zero_grad()
cost.backward()
# UPDATE MODEL PARAMETERS
optimizer.step()
# LOGGING
if not batch_idx % 50:
s = ('Epoch: %03d/%03d | Batch %04d/%04d | Cost: %.4f'
% (epoch+1, num_epochs, batch_idx,
len(train_dataset)//BATCH_SIZE, cost))
print(s)
with open(LOGFILE, 'a') as f:
f.write('%s\n' % s)
scheduler.step()
model.eval()
with torch.set_grad_enabled(False):
valid_mae, valid_mse, valid_accuracy = compute_mae_and_mse_and_accuracy(model, train_loader,
device=DEVICE)
if valid_mae < best_mae:
best_mae, best_rmse, best_epoch, best_accuracy = valid_mae, torch.sqrt(valid_mse), epoch, valid_accuracy
########## SAVE MODEL #############
torch.save(model.state_dict(), os.path.join(PATH, 'best_model.pt'))
s = 'MAE/RMSE/ACCURACY: | Current Valid: %.2f/%.2f/%.2f Ep. %d | Best Valid : %.2f/%.2f/%.2f Ep. %d' % (
valid_mae, torch.sqrt(valid_mse),valid_accuracy, epoch, best_mae, best_rmse, best_accuracy, best_epoch)
print(s)
with open(LOGFILE, 'a') as f:
f.write('%s\n' % s)
s = 'Time elapsed: %.2f min' % ((time.time() - start_time)/60)
print(s)
with open(LOGFILE, 'a') as f:
f.write('%s\n' % s)
In [17]:
test_dataset = DatasetAge(csv_path=TEST_CSV_PATH,
img_dir="data/",
split="test",
transform=val_transforms)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False,
num_workers=NUM_WORKERS)
In [18]:
map_ = {
0: '0-10',
1: '10-20',
2: '20-30',
3: '30-40',
4: '40-50',
5: '50-60',
6: '60-70',
7: '70-80',
8: '80-90',
9: '90-100'
}
In [ ]:
model = AgeModel(10)
model.load_state_dict(torch.load("out_finalv2/best_model.pt", map_location='cpu'))
model.eval()
model.to(DEVICE)
########## SAVE PREDICTIONS ######
all_pred = []
all_probas = []
with torch.set_grad_enabled(False):
for batch_idx, (features, id_) in enumerate(test_loader):
features = features.to(DEVICE)
logits, probas = model(features)
all_probas.append(probas)
predict_levels = probas > 0.5
predicted_labels = torch.sum(predict_levels, dim=1)
lst = [(id_, map_[int(i)]) for i, id_ in zip(predicted_labels, id_)]
all_pred.extend(lst)
In [ ]:
pd.DataFrame(all_pred, columns=["ImageID", "age"]).to_csv("assets/submission.csv")
In [ ]:
%aicrowd notebook submit -c age-prediction -a assets --no-verify
In [ ]:
Content
Comments
You must login before you can post a comment.
Thank really for posting your submission !