モデルビルダー
学習目標: 層を組み合わせて典型的なニューラルネットワークを構築するパターンを身につける
MLP (多層パーセプトロン)
柔軟な層数の MLP
import torch
import torch.nn as nn
class MLP(nn.Module):
"""任意の隠れ層リストでMLPを組む"""
def __init__(self, input_dim, hidden_dims, output_dim,
activation='relu', dropout=0.0, batch_norm=False):
super().__init__()
layers = []
dims = [input_dim] + list(hidden_dims) + [output_dim]
act = {'relu': nn.ReLU, 'gelu': nn.GELU,
'tanh': nn.Tanh, 'leaky': lambda: nn.LeakyReLU(0.2)}[activation]
for i in range(len(dims) - 1):
layers.append(nn.Linear(dims[i], dims[i+1]))
if i < len(dims) - 2: # 最後の層には付けない
if batch_norm:
layers.append(nn.BatchNorm1d(dims[i+1]))
layers.append(act())
if dropout > 0:
layers.append(nn.Dropout(dropout))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
model = MLP(784, [512, 256, 128], 10,
activation='gelu', dropout=0.2, batch_norm=True)
print(model)
タスク別の最終層
| タスク | 最終層 | 損失関数 |
|---|---|---|
| 多クラス分類 | Linear → softmaxは損失内 | CrossEntropyLoss |
| 多ラベル分類 | Linear → Sigmoid | BCEWithLogitsLoss |
| 回帰 | Linear(活性化なし) | MSELoss |
| 順序回帰 | Linear → cumulative | CORN / OrdinalLoss |
CNN ビルダー
基本的なCNN(VGGスタイル)
def conv_block(in_ch, out_ch, n_convs=2, pool=True):
"""conv + BN + ReLU を n_convs 回 + プール"""
layers = []
for i in range(n_convs):
layers += [
nn.Conv2d(in_ch if i == 0 else out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
]
if pool:
layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class SimpleCNN(nn.Module):
def __init__(self, n_classes=10, channels=(64, 128, 256, 512)):
super().__init__()
blocks = []
in_ch = 3
for out_ch in channels:
blocks.append(conv_block(in_ch, out_ch))
in_ch = out_ch
self.features = nn.Sequential(*blocks)
self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(channels[-1], n_classes)
def forward(self, x):
x = self.features(x)
x = self.pool(x).flatten(1)
return self.fc(x)
残差ブロック (ResNet-style)
class ResBlock(nn.Module):
def __init__(self, in_ch, out_ch, stride=1):
super().__init__()
self.conv1 = nn.Conv2d(in_ch, out_ch, 3, stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_ch)
self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_ch)
self.shortcut = (
nn.Sequential(
nn.Conv2d(in_ch, out_ch, 1, stride, bias=False),
nn.BatchNorm2d(out_ch),
) if stride != 1 or in_ch != out_ch else nn.Identity()
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = out + self.shortcut(x)
return F.relu(out)
Pre-trained backbone を使う
from torchvision import models
# ResNet50 を特徴抽出器として使う
backbone = models.resnet50(weights='IMAGENET1K_V1')
backbone.fc = nn.Linear(backbone.fc.in_features, n_classes)
# 全部固定 → 最終層だけ学習(linear probing)
for p in backbone.parameters():
p.requires_grad = False
for p in backbone.fc.parameters():
p.requires_grad = True
系列モデル
LSTM / GRU
class SeqClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim=128, hidden=256, n_classes=2,
bidirectional=True, num_layers=2):
super().__init__()
self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.rnn = nn.LSTM(embed_dim, hidden, num_layers=num_layers,
batch_first=True, bidirectional=bidirectional,
dropout=0.3)
n_dirs = 2 if bidirectional else 1
self.fc = nn.Linear(hidden * n_dirs, n_classes)
def forward(self, x, lengths=None):
e = self.embed(x)
if lengths is not None:
packed = nn.utils.rnn.pack_padded_sequence(
e, lengths.cpu(), batch_first=True, enforce_sorted=False)
_, (h, _) = self.rnn(packed)
else:
_, (h, _) = self.rnn(e)
# 最後の層の双方向を結合
h = torch.cat([h[-2], h[-1]], dim=1)
return self.fc(h)
Mini Transformer
class MiniTransformer(nn.Module):
def __init__(self, vocab_size, embed=128, n_heads=4, n_layers=4,
max_len=512, n_classes=2):
super().__init__()
self.embed = nn.Embedding(vocab_size, embed)
self.pos = nn.Embedding(max_len, embed)
enc_layer = nn.TransformerEncoderLayer(embed, n_heads,
dim_feedforward=embed*4,
dropout=0.1,
batch_first=True,
activation='gelu')
self.encoder = nn.TransformerEncoder(enc_layer, n_layers)
self.cls = nn.Linear(embed, n_classes)
def forward(self, x, mask=None):
pos_ids = torch.arange(x.size(1), device=x.device)
h = self.embed(x) + self.pos(pos_ids)
h = self.encoder(h, src_key_padding_mask=mask)
return self.cls(h.mean(dim=1)) # mean pooling
設定駆動でモデルを作る
YAML/dict で構成を表現し、コード変更なしで実験できる作り。Hydra や OmegaConf と相性◎。
def build_model(config):
"""config: {'type': 'mlp'|'cnn', ...}"""
if config['type'] == 'mlp':
return MLP(
input_dim=config['input_dim'],
hidden_dims=config['hidden_dims'],
output_dim=config['output_dim'],
activation=config.get('activation', 'relu'),
dropout=config.get('dropout', 0.0),
batch_norm=config.get('batch_norm', False),
)
elif config['type'] == 'cnn':
return SimpleCNN(
n_classes=config['n_classes'],
channels=tuple(config.get('channels', (64, 128, 256, 512))),
)
elif config['type'] == 'resnet':
from torchvision import models
m = getattr(models, config['backbone'])(weights='IMAGENET1K_V1')
m.fc = nn.Linear(m.fc.in_features, config['n_classes'])
return m
raise ValueError(f"unknown type: {config['type']}")
# 使用例(YAMLから読む)
config = {
'type': 'mlp',
'input_dim': 784,
'hidden_dims': [512, 256],
'output_dim': 10,
'activation': 'gelu',
'dropout': 0.2,
'batch_norm': True,
}
model = build_model(config)
パラメータ数の確認とサマリ
def model_summary(model, input_size):
n_params = sum(p.numel() for p in model.parameters())
n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Parameters: {n_params/1e6:.2f}M (trainable {n_trainable/1e6:.2f}M)")
# 各層の出力サイズ(torchinfo を使うのが楽)
try:
from torchinfo import summary
summary(model, input_size=input_size)
except ImportError:
print(model)