itertools 모듈 - 배움 에이아이

학습 목표

product, permutations, combinations로 조합을 생성할 수 있다
chain, zip_longest, islice로 이터러블을 결합/분할할 수 있다
groupby로 데이터를 그룹핑할 수 있다
메모리 효율적인 반복 패턴을 이해한다

왜 중요한가

itertools는 메모리 효율적인 반복 도구를 제공합니다. 모든 조합을 리스트로 생성하는 대신, 필요할 때 하나씩 생성(Lazy Evaluation)하므로 대용량 데이터에서도 메모리를 절약합니다. ML에서 하이퍼파라미터 그리드 탐색, 데이터 조합, 배치 처리 등에 활용됩니다.

조합 생성

product - 카테시안 곱

from itertools import product

# 두 시퀀스의 모든 조합
colors = ["빨강", "파랑"]
sizes = ["S", "M", "L"]

for combo in product(colors, sizes):
    print(combo)
# ("빨강", "S"), ("빨강", "M"), ("빨강", "L"),
# ("파랑", "S"), ("파랑", "M"), ("파랑", "L")

# repeat: 자기 자신과의 곱
dice = list(product(range(1, 7), repeat=2))
print(f"주사위 2개 조합: {len(dice)}개")  # 36개

# 하이퍼파라미터 그리드
lrs = [0.001, 0.01, 0.1]
batch_sizes = [16, 32]
optimizers = ["adam", "sgd"]

grid = list(product(lrs, batch_sizes, optimizers))
print(f"총 실험 수: {len(grid)}")  # 12개

permutations - 순열

from itertools import permutations

# 모든 순열 (순서 중요)
items = ["A", "B", "C"]
for perm in permutations(items):
    print(perm)
# ("A", "B", "C"), ("A", "C", "B"), ("B", "A", "C"), ...

# 길이 지정
for perm in permutations(items, 2):
    print(perm)
# ("A", "B"), ("A", "C"), ("B", "A"), ("B", "C"), ("C", "A"), ("C", "B")

combinations - 조합

from itertools import combinations, combinations_with_replacement

# 조합 (순서 무관)
items = ["A", "B", "C", "D"]
for combo in combinations(items, 2):
    print(combo)
# ("A", "B"), ("A", "C"), ("A", "D"), ("B", "C"), ("B", "D"), ("C", "D")

# 중복 허용 조합
for combo in combinations_with_replacement([1, 2, 3], 2):
    print(combo)
# (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)

이터러블 결합과 분할

chain - 여러 이터러블 연결

from itertools import chain

# 여러 리스트 연결
a = [1, 2, 3]
b = [4, 5, 6]
c = [7, 8, 9]

for item in chain(a, b, c):
    print(item, end=" ")
# 1 2 3 4 5 6 7 8 9

# 중첩 리스트 평탄화
nested = [[1, 2], [3, 4], [5, 6]]
flat = list(chain.from_iterable(nested))
print(flat)  # [1, 2, 3, 4, 5, 6]

zip_longest

from itertools import zip_longest

# 길이가 다른 시퀀스 결합
names = ["김철수", "이영희", "박민수"]
scores = [85, 92]

# 기본 zip - 짧은 쪽에 맞춤
print(list(zip(names, scores)))
# [("김철수", 85), ("이영희", 92)]

# zip_longest - 긴 쪽에 맞춤
print(list(zip_longest(names, scores, fillvalue=0)))
# [("김철수", 85), ("이영희", 92), ("박민수", 0)]

islice - 이터러블 슬라이싱

from itertools import islice

# 이터러블의 일부만 가져오기
data = range(100)
first_10 = list(islice(data, 10))
print(first_10)  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# start, stop, step
middle = list(islice(data, 20, 30, 2))
print(middle)  # [20, 22, 24, 26, 28]

# 제너레이터에서 일부만 소비
def infinite_counter():
    n = 0
    while True:
        yield n
        n += 1

first_5 = list(islice(infinite_counter(), 5))
print(first_5)  # [0, 1, 2, 3, 4]

그룹핑과 필터링

groupby

from itertools import groupby

# 정렬된 데이터를 그룹핑 (반드시 정렬 필요!)
data = [
    ("A반", "김철수"), ("A반", "이영희"),
    ("B반", "박민수"), ("B반", "최지원"),
    ("C반", "정하늘"),
]

for key, group in groupby(data, key=lambda x: x[0]):
    members = [name for _, name in group]
    print(f"{key}: {members}")
# A반: ["김철수", "이영희"]
# B반: ["박민수", "최지원"]
# C반: ["정하늘"]

groupby는 연속된 동일 키만 그룹으로 묶습니다. 반드시 먼저 정렬(sorted())합니다.

# 정렬하지 않으면 의도와 다르게 동작
data = [1, 1, 2, 1, 1]
for k, g in groupby(data):
    print(k, list(g))
# 1 [1, 1]
# 2 [2]
# 1 [1, 1]  <- 1이 두 그룹으로 나뉨!

accumulate

from itertools import accumulate
import operator

# 누적 합
numbers = [1, 2, 3, 4, 5]
cumsum = list(accumulate(numbers))
print(cumsum)  # [1, 3, 6, 10, 15]

# 누적 곱
cumprod = list(accumulate(numbers, operator.mul))
print(cumprod)  # [1, 2, 6, 24, 120]

# 누적 최대값
data = [3, 1, 4, 1, 5, 9, 2]
cummax = list(accumulate(data, max))
print(cummax)  # [3, 3, 4, 4, 5, 9, 9]

AI/ML에서의 활용

from itertools import product, chain, islice, accumulate

# 하이퍼파라미터 그리드 서치
param_grid = {
    "learning_rate": [1e-4, 1e-3, 1e-2],
    "hidden_size": [128, 256],
    "dropout": [0.1, 0.3, 0.5],
}
keys = param_grid.keys()
values = param_grid.values()

experiments = [
    dict(zip(keys, combo))
    for combo in product(*values)
]
print(f"총 {len(experiments)}개 실험")  # 18개

# 다중 데이터셋 체이닝
train_batches = [[1, 2], [3, 4]]
val_batches = [[5, 6]]
all_batches = list(chain.from_iterable(train_batches + val_batches))

# 학습 손실 누적 평균
losses = [0.9, 0.7, 0.5, 0.4, 0.3]
cumulative_avg = [
    s / (i + 1)
    for i, s in enumerate(accumulate(losses))
]
print([f"{x:.2f}" for x in cumulative_avg])
# ["0.90", "0.80", "0.70", "0.62", "0.56"]

itertools와 리스트 컴프리헨션의 차이는 무엇인가요?

리스트 컴프리헨션은 모든 결과를 메모리에 저장합니다. itertools는 지연 평가로 하나씩 생성하므로 대용량 데이터에서 메모리를 절약합니다. 결과 전체가 필요하면 리스트 컴프리헨션, 순회만 필요하면 itertools가 적합합니다.

repeat, count, cycle은 무엇인가요?

무한 이터레이터입니다. count(10): 10부터 무한 증가, cycle([1,2,3]): 무한 반복, repeat("x", 5): “x”를 5번 반복. 반드시 islice나 조건으로 제한해서 사용합니다.

체크리스트

product, permutations, combinations의 차이를 설명할 수 있다
chain으로 여러 이터러블을 연결할 수 있다
groupby 사용 전 정렬이 필요한 이유를 이해한다
메모리 효율적인 반복 패턴의 이점을 설명할 수 있다

다음 문서

조건문

if/elif/else와 match-case 학습

collections

특화 컨테이너 복습

​학습 목표

​왜 중요한가

​조합 생성

​product - 카테시안 곱

​permutations - 순열

​combinations - 조합

​이터러블 결합과 분할

​chain - 여러 이터러블 연결

​zip_longest

​islice - 이터러블 슬라이싱

​그룹핑과 필터링

​groupby

​accumulate

​AI/ML에서의 활용

​체크리스트

​다음 문서

조건문