SQLAlchemy ORM: Session & Unit of Work

데이터베이스 작업을 객체 중심으로 수행하게 해주는 ORM(Object-Relational Mapping)은 백엔드 개발에서 널리 사용됩니다. SQLAlchemy와 같은 라이브러리는 이러한 ORM 기능을 제공하며, 그 중심에는 Session이라는 개념이 있습니다. Session은 데이터베이스로부터 가져온 엔티티(Entity)들을 특정 작업 범위 내에서 관리하고, 변경 사항을 추적하며, 최종적으로 데이터베이스에 반영하는 역할을 수행합니다.

본 포스트에서는 Session의 핵심 동작 원리인 Unit of Work 패턴과 Identity Map 개념을 직접 구현해보며 깊이 이해하는 시간을 갖겠습니다.

ORM과 Session의 필요성

코드 내에서 SQL 쿼리를 직접 작성하여 데이터베이스와 상호작용하는 대신, ORM을 사용하면 데이터베이스 테이블을 파이썬 객체(엔티티)로 매핑하여 다룰 수 있습니다. 이는 다음과 같은 이점을 제공합니다.

객체 중심 개발: 데이터베이스 구조가 아닌 객체 모델 중심으로 비즈니스 로직을 구현할 수 있습니다.
데이터베이스 의존성 감소: 사용 중인 데이터베이스 종류에 덜 구애받는 코드를 작성할 수 있습니다.
생산성 향상: 반복적인 CRUD(Create, Read, Update, Delete) 쿼리 작성을 줄여줍니다.

Session은 이러한 ORM 환경에서 특정 트랜잭션 또는 작업 단위를 관리하는 컨텍스트를 제공합니다. 데이터베이스에서 조회된 객체들을 Identity Map이라는 공간에 저장하여 동일한 객체에 대한 유일성을 보장하고, 객체의 상태 변화(생성, 수정, 삭제)를 추적합니다. 이러한 변경 추적 및 반영 패턴을 Unit of Work라고 합니다.

Unit of Work 패턴과 Identity Map 구현

개념을 더 명확히 이해하기 위해 간단한 User 엔티티와 가상 데이터베이스, 그리고 SimpleSession 클래스를 구현해보겠습니다.

# User 엔티티 정의
class User:
    def __init__(self, id: int, name: str, email: str):
        self.id = id
        self.name = name
        self.email = email

    def __repr__(self):
        return f"User(id={self.id}, name='{self.name}', email='{self.email}')"

    def __eq__(self, other):
        if not isinstance(other, User):
            return NotImplemented
        return self.id == other.id

    def __hash__(self):
        return hash(self.id)

# 가상 데이터베이스 정의
from typing import Dict

class Database:
    def __init__(self):
        self.users = {
            1: User(1, "John Doe", "john@example.com"),
            2: User(2, "Jane Doe", "jane@example.com"),
        }

    def fetch_user(self, id: int):
        return self.users.get(id)

    def insert(self, entity: User):
        self.users[entity.id] = entity

    def update(self, id: int, changes: Dict[str, any]):
        if id in self.users:
             # 실제 ORM에서는 필드별 업데이트가 일반적입니다.
             for key, value in changes.items():
                 setattr(self.users[id], key, value)

    def delete(self, id: int):
        if id in self.users:
             del self.users[id]

# 가상 데이터베이스 인스턴스 생성
database = Database()

이제 핵심인 SimpleSession 클래스를 구현합니다. 이 클래스는 Unit of Work 패턴의 주요 요소들을 포함합니다.

import copy
from typing import Dict, TypeVar, Generic, Set

T = TypeVar('T')

class SimpleSession(Generic[T]):

    def __init__(self):
        # Identity Map: 로드된 객체의 유일성 보장 (ID -> 객체)
        self.identity_map: Dict[int, T] = {}
        # 변경 추적을 위한 집합 (Set)
        self.new_objects: Set[T] = set()      # INSERT 대상
        self.dirty_objects: Set[T] = set()     # UPDATE 대상
        self.removed_objects: Set[T] = set()   # DELETE 대상
        # dirty 객체 비교를 위한 원본 상태 저장 (ID -> 원본 객체 복사본)
        self.original_states: Dict[int, T] = {}

    def _add_to_identity_map(self, entity: T):
        """Identity Map에 객체를 추가하고 원본 상태를 저장 (필요시)"""
        if hasattr(entity, 'id') and entity.id not in self.identity_map:
            self.identity_map[entity.id] = entity
            # 원본 상태 스냅샷 저장 (dirty 체크용) - 깊은 복사(deepcopy)가 중요합니다.
            try:
                self.original_states[entity.id] = copy.deepcopy(entity)
            except TypeError:
                 # deepcopy가 불가능한 경우 경고 및 얕은 복사(copy) 사용
                 print(f"Warning: Could not deepcopy entity {entity.id}. Using shallow copy.")
                 self.original_states[entity.id] = copy.copy(entity)

    def get(self, entity_id: int) -> T | None:
        """
        Identity Map에서 객체를 가져옵니다. 없으면 DB에서 로드합니다.
        """
        # 1. Identity Map 확인
        if entity_id in self.identity_map:
            return self.identity_map[entity_id]

        # 2. DB 조회 시뮬레이션
        print(f"Simulating DB fetch for entity ID: {entity_id}")
        entity_data = database.fetch_user(entity_id) # 실제 DB 조회 로직
        if entity_data:
            # 조회된 데이터로 객체 생성 또는 ORM 매퍼 사용
            # 여기서는 간단히 User 객체를 직접 생성합니다.
            entity = User(id=entity_data.id, name=entity_data.name, email=entity_data.email)
            self._add_to_identity_map(entity) # Identity Map에 추가 및 원본 상태 저장
            return entity
        else:
            return None

    def add(self, entity: T):
        """새 객체를 Unit of Work에 등록합니다 (INSERT 대상)."""
        if not hasattr(entity, 'id') or entity.id is None:
             print(f"Cannot add entity without ID: {entity}") # 또는 ID 자동 생성 로직 필요
             return
        if entity.id in self.identity_map:
            print(f"Entity {entity.id} is already managed.")
            return

        print(f"Registering NEW: {entity}")
        self._add_to_identity_map(entity)
        self.new_objects.add(entity)
        # 다른 상태 집합에서는 제거 (상태 충돌 방지)
        self.dirty_objects.discard(entity)
        self.removed_objects.discard(entity)

    def remove(self, entity: T):
        """객체를 Unit of Work에서 제거 대상으로 등록합니다 (DELETE 대상)."""
        if not hasattr(entity, 'id') or entity.id not in self.identity_map:
             print(f"Cannot remove entity {getattr(entity, 'id', 'N/A')}: Not managed by this session.")
             return

        entity_id = entity.id
        print(f"Registering REMOVED: {entity}")
        self.removed_objects.add(entity)
        # 다른 상태 집합에서는 제거
        self.new_objects.discard(entity)
        self.dirty_objects.discard(entity)

    def _detect_dirty_objects(self):
        """
        Identity Map의 객체와 원본 상태를 비교하여 자동으로 dirty 상태를 감지합니다.
        객체 비교 로직이 중요하며, 실제 ORM은 더 정교한 방식을 사용합니다.
        """
        print("Detecting dirty objects...")
        for entity_id, current_entity in list(self.identity_map.items()): # 순회 중 변경 고려
            # new 또는 removed 상태 객체는 dirty 검사 대상이 아님
            if current_entity in self.new_objects or current_entity in self.removed_objects:
                continue

            original_entity = self.original_states.get(entity_id)
            if original_entity:
                # 객체 비교: 여기서는 간단히 __dict__ 비교 (실제로는 속성별 비교 권장)
                # JPA를 포함한 몇몇 ORM에서는 수정된 상태를 'dirty'라고 표현합니다.
                if current_entity.__dict__ != original_entity.__dict__:
                     print(f"Detected DIRTY (via diff): {current_entity}")
                     self.dirty_objects.add(current_entity)
            # else: 원본 상태가 없으면 자동 감지 불가 (예: 세션 외부에서 로드된 객체)

    def commit(self):
        """변경 사항을 데이터베이스에 영속화합니다."""
        print("\n--- Starting Commit ---")

        # 1. 변경 감지 수행 (UPDATE 대상 확정)
        self._detect_dirty_objects()

        # 2. 실제 DB 작업 수행 (순서 중요: INSERT -> UPDATE -> DELETE)
        self._insert_new()
        self._update_dirty()
        self._delete_removed()

        # 3. Commit 후 상태 초기화 (다음 트랜잭션을 위해)
        # 성공적으로 DB에 반영된 객체들의 추적 상태를 초기화합니다.
        self.new_objects.clear()
        self.dirty_objects.clear()
        self.removed_objects.clear()
        # 원본 상태도 클리어해야 다음 커밋에서 정확한 비교가 가능합니다.
        self.original_states.clear()
        # Identity Map 자체는 세션 생명주기 동안 유지될 수 있습니다.
        # 필요시 self.identity_map.clear() 호출 가능

        # 커밋 후 identity map의 객체들을 원본 상태로 재설정
        for entity_id, entity in self.identity_map.items():
            try:
                 self.original_states[entity_id] = copy.deepcopy(entity)
            except TypeError:
                 print(f"Warning: Could not deepcopy entity {entity_id} after commit. Using shallow copy.")
                 self.original_states[entity_id] = copy.copy(entity)


        print("--- Commit Finished ---\n")

    def rollback(self):
        """변경 사항을 취소하고 추적 상태를 초기화합니다."""
        print("\n--- Starting Rollback ---")
        # DB 작업은 수행하지 않습니다.
        # 추적 상태만 초기화합니다.
        self.new_objects.clear()
        self.dirty_objects.clear()
        self.removed_objects.clear()

        # Identity Map의 객체들을 원본 상태로 되돌리는 로직 (선택적이며 복잡할 수 있음)
        # 여기서는 간단히 identity map과 original_states를 초기화합니다.
        self.identity_map.clear()
        self.original_states.clear()

        print("--- Rollback Finished ---\n")

    # --- Private Helper Methods for DB Interaction (Simulation) ---

    def _insert_new(self):
        """새 객체들을 데이터베이스에 INSERT 합니다."""
        print(f"INSERTING {len(self.new_objects)} new object(s):")
        for entity in list(self.new_objects): # 순회 중 변경될 수 있으므로 리스트 복사
            print(f"  INSERT INTO {type(entity).__name__} ({', '.join(entity.__dict__.keys())}) VALUES (...) for {entity}")
            database.insert(entity) # 실제 DB INSERT 호출
            # INSERT 성공 후, 이 객체는 더 이상 'new'가 아니므로 original_states 업데이트 가능
            # _add_to_identity_map 에서 이미 처리되었을 수 있으나, commit 시점 동기화 보장
            # try:
            #     self.original_states[entity.id] = copy.deepcopy(entity)
            # except TypeError:
            #      print(f"Warning: Could not deepcopy entity {entity.id} after insert. Using shallow copy.")
            #      self.original_states[entity.id] = copy.copy(entity)

    def _update_dirty(self):
        """변경된 객체들을 데이터베이스에 UPDATE 합니다."""
        print(f"UPDATING {len(self.dirty_objects)} dirty object(s):")
        for entity in list(self.dirty_objects):
            original = self.original_states.get(entity.id)
            if original:
                # 변경된 필드만 찾아 업데이트 (효율적)
                changes = self._diff_entities(original, entity)
                if changes:
                    print(f"  UPDATE {type(entity).__name__} SET {changes} WHERE id={entity.id} (Original: {original})")
                    database.update(entity.id, changes) # 실제 DB UPDATE 호출
                else:
                     print(f"  Skipping UPDATE for {entity.id}: No changes detected.")
            else:
                # 원본이 없으면 전체 필드 업데이트 또는 에러 처리 (상황에 따라 다름)
                print(f"  UPDATE {type(entity).__name__} SET ... WHERE id={entity.id} (Warning: No original state for diff)")
                # database.update_all_fields(entity) # 예시: 모든 필드 업데이트

            # UPDATE 성공 후, 이 객체는 더 이상 'dirty'가 아님
            # 원본 상태를 현재 상태로 업데이트 (다음 변경 감지를 위해)
            # try:
            #     self.original_states[entity.id] = copy.deepcopy(entity)
            # except TypeError:
            #     print(f"Warning: Could not deepcopy entity {entity.id} after update. Using shallow copy.")
            #     self.original_states[entity.id] = copy.copy(entity)

    def _delete_removed(self):
        """삭제 대상으로 표시된 객체들을 데이터베이스에서 DELETE 합니다."""
        print(f"DELETING {len(self.removed_objects)} removed object(s):")
        for entity in list(self.removed_objects):
            print(f"  DELETE FROM {type(entity).__name__} WHERE id={entity.id}")
            database.delete(entity.id) # 실제 DB DELETE 호출

            # DELETE 성공 후, Identity Map 및 원본 상태에서 제거
            entity_id = entity.id
            if entity_id in self.identity_map:
                del self.identity_map[entity_id]
            if entity_id in self.original_states:
                del self.original_states[entity_id]

    def _diff_entities(self, original: T, updated: T) -> Dict[str, any]:
        """두 객체 상태를 비교하여 변경된 속성을 찾아 반환합니다."""
        changes = {}
        if not original or not updated or type(original) is not type(updated):
            return {} # 비교 불가

        original_dict = original.__dict__
        updated_dict = updated.__dict__

        # ID는 보통 변경되지 않으므로 제외
        keys_to_compare = set(original_dict.keys()) | set(updated_dict.keys()) - {'id'}

        for key in keys_to_compare:
            val_orig = original_dict.get(key)
            val_upd = updated_dict.get(key)

            if val_orig != val_upd:
                changes[key] = val_upd # 변경된 값 저장

        return changes

SimpleSession의 핵심 로직은 다음과 같습니다.

identity_map: get 메소드를 통해 조회된 객체는 이 딕셔너리에 id를 키로 하여 저장됩니다. 동일 id로 다시 get을 호출하면 DB 조회를 생략하고 identity_map에서 객체를 반환하여 객체 동일성(Identity)을 보장합니다.
original_states: 객체가 identity_map에 처음 추가될 때, 객체의 상태를 깊은 복사(deep copy)하여 저장합니다. 이는 commit 시점에 현재 객체 상태와 비교하여 변경 여부(dirty)를 감지하는 데 사용됩니다. 얕은 복사(shallow copy)는 참조만 복사하므로 원본 추적에 적합하지 않습니다.
변경 추적 집합 (new_objects, dirty_objects, removed_objects):
- add(entity): 새로운 객체를 new_objects에 추가합니다 (INSERT 대상).
- remove(entity): 관리 중인 객체를 removed_objects에 추가합니다 (DELETE 대상).
- _detect_dirty_objects(): identity_map의 객체들을 original_states와 비교하여 변경된 객체를 찾아 dirty_objects에 추가합니다 (UPDATE 대상). 여기서는 간단히 __dict__를 비교했지만, 실제 ORM은 더 정교한 비교 로직이나 명시적인 dirty 마킹 방식을 사용할 수 있습니다.
commit(): Unit of Work의 핵심입니다. _detect_dirty_objects()를 호출하여 변경 사항을 최종 확인한 뒤, _insert_new(), _update_dirty(), _delete_removed() 메소드를 정해진 순서(보통 INSERT → UPDATE → DELETE)에 따라 호출하여 실제 DB 작업을 수행합니다. 이 과정에서 여러 객체 변경이 하나의 논리적 트랜잭션으로 묶여 DB에 반영됩니다. 작업 완료 후 추적 집합과 original_states를 초기화하여 다음 작업을 준비합니다.
rollback(): commit과 달리 DB 작업을 수행하지 않고, 추적 중인 모든 변경 사항(new_objects, dirty_objects, removed_objects)과 identity_map, original_states를 초기화하여 마지막 commit 이후의 모든 변경을 취소합니다.

이러한 Unit of Work 패턴은 객체 상태 변경 시마다 DB I/O를 발생시키는 대신, 변경 내용을 메모리(Session)에 누적했다가 commit 시점에 한 번에 처리함으로써 DB 접근 횟수를 최적화하는 효과를 가져옵니다.

실제 사용 예시

이제 SimpleSession을 사용하여 객체 변경, 삭제, 롤백 시나리오를 살펴보겠습니다.

1. 객체 수정 및 Commit

session = SimpleSession[User]()

# ID 2번 사용자를 세션에서 가져오기
user2 = session.get(2)
if user2:
    print(f"Retrieved from session: {user2}")
    # 세션 내 객체의 이메일 변경
    user2.email = "jane.doe.updated@example.com"
    # 이 시점까지는 DB에 반영되지 않습니다.

# 변경 사항을 DB에 반영
session.commit()

# Commit 후 상태 확인
print(f"Identity Map after commit: {session.identity_map}")
# Commit 후 original_states는 비어있어야 정상입니다 (새로운 스냅샷 생성은 내부 로직).
# print(f"Original States after commit: {session.original_states}")

# 실제 데이터베이스 상태 확인
print("Database state after commit:")
print(database.users)

실행 결과:

Simulating DB fetch for entity ID: 2
Retrieved from session: User(id=2, name='Jane Doe', email='jane@example.com')

--- Starting Commit ---
Detecting dirty objects...
Detected DIRTY (via diff): User(id=2, name='Jane Doe', email='jane.doe.updated@example.com')
INSERTING 0 new object(s):
UPDATING 1 dirty object(s):
  UPDATE User SET {'email': 'jane.doe.updated@example.com'} WHERE id=2 (Original: User(id=2, name='Jane Doe', email='jane@example.com'))
DELETING 0 removed object(s):
--- Commit Finished ---

Identity Map after commit: {2: User(id=2, name='Jane Doe', email='jane.doe.updated@example.com')}
Database state after commit:
{1: User(id=1, name='John Doe', email='john@example.com'), 2: User(id=2, name='Jane Doe', email='jane.doe.updated@example.com')}

user2.email 변경은 commit() 호출 시점에 감지되어 UPDATE 쿼리가 실행되었고, 데이터베이스 상태에도 반영된 것을 확인할 수 있습니다.

2. 객체 삭제 및 Commit

session = SimpleSession[User]() # 새 세션 시작

# ID 1번 사용자를 가져와 삭제 대상으로 등록
user1 = session.get(1)
if user1:
    print(f"Retrieved from session: {user1}")
    session.remove(user1) # 삭제 대상으로 등록

# 변경 사항(삭제)을 DB에 반영
session.commit()

# Commit 후 상태 확인
print(f"Identity Map after commit: {session.identity_map}")
# print(f"Original States after commit: {session.original_states}")

# 실제 데이터베이스 상태 확인
print("Database state after commit:")
print(database.users)

실행 결과:

Simulating DB fetch for entity ID: 1
Retrieved from session: User(id=1, name='John Doe', email='john@example.com')
Registering REMOVED: User(id=1, name='John Doe', email='john@example.com')

--- Starting Commit ---
Detecting dirty objects...
INSERTING 0 new object(s):
UPDATING 0 dirty object(s):
DELETING 1 removed object(s):
  DELETE FROM User WHERE id=1
--- Commit Finished ---

Identity Map after commit: {} # 삭제된 객체는 Identity Map에서도 제거됨
Database state after commit:
{2: User(id=2, name='Jane Doe', email='jane.doe.updated@example.com')}

session.remove(user1) 호출 후 commit()이 실행되자 DELETE 쿼리가 수행되었고, 데이터베이스에서 ID 1번 사용자가 제거되었습니다.

3. 변경 사항 롤백 (Rollback)

비즈니스 로직 수행 중 예외가 발생하거나 특정 조건에 따라 변경 사항을 데이터베이스에 반영하고 싶지 않을 때 rollback()을 사용합니다.

session = SimpleSession[User]() # 새 세션 시작

# 새 사용자 추가 시도
user4 = User(id=4, name="David", email="david@example.com")
session.add(user4)

# 기존 사용자 수정 시도
user2 = session.get(2)
if user2:
    user2.name = "Jane Smith" # 이름 변경 시도

# 커밋 대신 롤백 호출
session.rollback() # 모든 변경사항 취소

# Rollback 후 상태 확인
print(f"Identity Map after rollback: {session.identity_map}") # 롤백 정책에 따라 비어있거나 이전 상태
print(f"New objects after rollback: {session.new_objects}") # 비어있어야 함
print(f"Dirty objects after rollback: {session.dirty_objects}") # 비어있어야 함
print(f"Removed objects after rollback: {session.removed_objects}") # 비어있어야 함

# 데이터베이스 상태 확인 (변경되지 않았어야 함)
print("Database state after rollback:")
print(database.users)

실행 결과:

Registering NEW: User(id=4, name='David', email='david@example.com')
Simulating DB fetch for entity ID: 2

--- Starting Rollback ---
--- Rollback Finished ---

Identity Map after rollback: {} # 롤백으로 Identity Map이 초기화됨
New objects after rollback: set()
Dirty objects after rollback: set()
Removed objects after rollback: set()
Database state after rollback:
{2: User(id=2, name='Jane Doe', email='jane.doe.updated@example.com')} # 이전 commit 상태 유지

session.add(user4)와 user2.name 변경 시도가 있었지만, session.rollback() 호출로 인해 commit되지 않았습니다. 따라서 new_objects와 dirty_objects는 비워졌고, 데이터베이스 상태는 이전 commit 시점 그대로 유지되었습니다.

결론

지금까지 파이썬 ORM의 핵심 구성 요소인 Session과 그 기반이 되는 Unit of Work 패턴, Identity Map 개념을 간단한 코드로 구현하며 살펴보았습니다. Session은 객체 변경 사항을 효율적으로 추적하고, commit 시점에 데이터베이스에 반영하여 I/O를 최적화하며, rollback을 통해 작업 단위의 원자성을 보장하는 중요한 역할을 수행합니다.

이러한 내부 동작 원리를 이해하는 것은 SQLAlchemy와 같은 실제 ORM 라이브러리를 더욱 효과적으로 사용하고 문제를 해결하는 데 큰 도움이 됩니다. 다음 시리즈에서는 실제 SQLAlchemy 세션을 공부해보도록 하겠습니다.

SQLAlchemy 시리즈 - ORM의 핵심: Session과 Unit of Work 패턴 이해하기

ORM과 Session의 필요성

Unit of Work 패턴과 Identity Map 구현

실제 사용 예시

결론

Subscribe to my newsletter

roach

roach