nn.Module - 배움 에이아이

nn.Module은 PyTorch에서 모든 신경망 모델의 기본 클래스입니다. 모델의 구조 정의, 순전파 로직, 파라미터 관리를 하나의 클래스에서 처리합니다.

기본 모델 정의

import torch
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        # 층 정의
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # 순전파 로직
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# 모델 생성 및 사용
model = SimpleNet(input_dim=784, hidden_dim=256, output_dim=10)
x = torch.randn(32, 784)   # 배치 크기 32
output = model(x)           # forward() 자동 호출
print(f"출력 shape: {output.shape}")  # (32, 10)

nn.Sequential

간단한 순차 모델은 nn.Sequential로 더 간결하게 정의할 수 있습니다.

model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(128, 10),
)

output = model(torch.randn(32, 784))

파라미터 관리

model = SimpleNet(784, 256, 10)

# 모든 파라미터 순회
for name, param in model.named_parameters():
    print(f"{name}: shape={param.shape}, requires_grad={param.requires_grad}")

# 전체 파라미터 수
total = sum(p.numel() for p in model.parameters())
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"전체: {total:,}, 학습 가능: {trainable:,}")

커스텀 레이어

nn.Module을 상속하여 사용자 정의 레이어를 만들 수 있습니다.

class ResidualBlock(nn.Module):
    """스킵 연결을 포함하는 잔차 블록"""
    def __init__(self, dim):
        super().__init__()
        self.block = nn.Sequential(
            nn.Linear(dim, dim),
            nn.ReLU(),
            nn.Linear(dim, dim),
        )
        self.ln = nn.LayerNorm(dim)

    def forward(self, x):
        # 스킵 연결: 입력을 출력에 더함
        return self.ln(x + self.block(x))


class DeepNet(nn.Module):
    """잔차 블록을 활용한 심층 네트워크"""
    def __init__(self, input_dim, hidden_dim, output_dim, num_blocks=4):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, hidden_dim)
        self.blocks = nn.ModuleList([
            ResidualBlock(hidden_dim) for _ in range(num_blocks)
        ])
        self.output_proj = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.input_proj(x)
        for block in self.blocks:
            x = block(x)
        return self.output_proj(x)

서브 모듈은 반드시 nn.Module의 속성으로 등록하거나 nn.ModuleList, nn.ModuleDict를 사용합니다. 일반 Python list나 dict에 모듈을 넣으면 .parameters()에서 감지되지 않아 학습이 되지 않습니다.

디바이스 이동

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 모델 전체를 GPU로 이동
model = SimpleNet(784, 256, 10).to(device)

# 입력 데이터도 같은 디바이스로
x = torch.randn(32, 784).to(device)
output = model(x)

모델 모드 전환

# 학습 모드: Dropout, BatchNorm이 학습 동작으로 전환
model.train()

# 평가 모드: Dropout 비활성화, BatchNorm은 Running Stats 사용
model.eval()

파라미터 동결

전이학습에서 사전학습 가중치를 고정할 때 사용합니다.

# 특정 레이어의 파라미터 동결
for param in model.fc1.parameters():
    param.requires_grad = False

# 동결된 파라미터 확인
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"학습 가능 파라미터: {trainable:,}")

체크리스트

nn.Module을 상속하여 커스텀 모델을 정의할 수 있다
forward() 메서드에서 순전파 로직을 구현할 수 있다
nn.ModuleList와 Python list의 차이를 이해한다
.to(device)로 모델을 GPU로 이동할 수 있다

다음 문서

Dataset / DataLoader

모델에 데이터를 공급하는 파이프라인 구축

학습 루프

nn.Module을 활용한 학습/평가 루프 구현

​기본 모델 정의

​nn.Sequential

​파라미터 관리

​커스텀 레이어

​디바이스 이동

​모델 모드 전환

​파라미터 동결

​체크리스트

​다음 문서

Dataset / DataLoader

학습 루프

기본 모델 정의

nn.Sequential

파라미터 관리

커스텀 레이어

디바이스 이동

모델 모드 전환

파라미터 동결

체크리스트

다음 문서