From 214be3415e27e57036f3c252188d6eb06edc55b7 Mon Sep 17 00:00:00 2001 From: Josh Kingsley Date: Thu, 13 Nov 2025 13:37:19 +0200 Subject: docs: add CRDT design notes from Claude --- docs/design/crdt-design.md | 270 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 docs/design/crdt-design.md (limited to 'docs/design/crdt-design.md') diff --git a/docs/design/crdt-design.md b/docs/design/crdt-design.md new file mode 100644 index 0000000..5e6dd2d --- /dev/null +++ b/docs/design/crdt-design.md @@ -0,0 +1,270 @@ +# CRDT Design for Music Notation Grid + +## Overview + +This document captures the design considerations for implementing a CRDT-based state management system for a music notation grid that supports rhythmic subdivisions (e.g., triplets, quintuplets). + +**Last Updated:** 2025-11-12 + +## Domain Context + +- **Grid Structure:** Rows of cells representing rhythmic divisions +- **Base Unit:** Cells represent musical durations (e.g., quarter notes) +- **Subdivisions:** Replacing N cells with M cells of different duration (e.g., 4 eighth notes → 3 eighth note triplets) +- **Critical Constraint:** Total rhythmic duration of each row must remain constant (width preservation) + +## Why Not Use Automerge/JSON CRDT? + +### Problems with Generic JSON CRDTs + +1. **Invariant Enforcement:** Automerge doesn't know about width constraints + - Concurrent edits might violate total duration + - No way to "reject" a CRDT merge after the fact + - Can't undo without breaking causality + +2. **Wrong Abstraction Level:** + - Automerge provides list/map primitives + - We need rhythmic subdivision semantics + - Like using text CRDT for spreadsheet formulas + +3. **Validation After Merge:** + ```typescript + const doc = Automerge.merge(docA, docB); + if (!isValidWidth(doc.grid.rows[0])) { + // Now what? Can't reject, can't undo easily + } + ``` + +## Recommended Approach: Operation-Based CRDT + +### Core Principles + +1. **Cell IDs instead of indices** - stable references across replicas +2. **Subdivide = delete + insert** - clear semantics that preserve duration +3. **Conflict detection** - width violations shown to users, not silently resolved +4. **Operation-level commutativity** - same result regardless of operation order + +### Type Definitions + +```typescript +export type CellId = string; // UUID + +export type Cell = Immutable<{ + id: CellId; + + // Fractional index for positioning (like Figma, Linear) + position: string; // e.g., "a0", "a0.5", "a1" + + // Rhythmic duration this cell represents (e.g., 0.25 = quarter note) + duration: number; + + // Reference to parent if this is a subdivision + parentCellId: CellId | null; + + // CRDT metadata + createdBy: string; // OpId + timestamp: HLCTimestamp; + deleted: boolean; // Tombstone for deletions +}>; + +export type CreateGrid = Immutable<{ + type: "createGrid"; + opId: string; + timestamp: HLCTimestamp; + gridId: string; + rows: number; + baseCellsPerRow: number; + + // Pre-generated cell IDs for initial cells + initialCellIds: CellId[][]; // [row][col] +}>; + +export type Subdivide = Immutable<{ + type: "subdivide"; + opId: string; + timestamp: HLCTimestamp; + gridId: string; + rowIndex: number; + + // Cells to mark as deleted (tombstone) + deletedCellIds: CellId[]; + + // New cells to insert + newCells: Array<{ + id: CellId; + position: string; // Fractional index between neighbors + duration: number; // Must sum to deleted cells' total duration + }>; +}>; +``` + +### Key Design Decisions + +#### 1. Fractional Positioning + +Uses fractional indexing (similar to Figma's approach) to allow insertion between any two cells without reindexing. + +```typescript +// Example positions: +"a0" // First cell +"a0.5" // Inserted between first and second +"a1" // Second cell +"a1.25" // Inserted between second and third +``` + +#### 2. Tombstones for Deletions + +Cells are never truly deleted, just marked with `deleted: true`. This ensures: +- Idempotent operations (deleting twice is safe) +- Causal consistency (can tell what existed when) +- Conflict detection (know if concurrent ops targeted same cells) + +#### 3. Duration Preservation by Construction + +Each `Subdivide` operation ensures: +```typescript +const deletedDuration = deletedCellIds.reduce( + (sum, id) => sum + getCellById(id).duration, + 0 +); + +const insertedDuration = newCells.reduce( + (sum, cell) => sum + cell.duration, + 0 +); + +// Must be equal (enforced when creating operation) +assert(deletedDuration === insertedDuration); +``` + +## Conflict Resolution Options + +### Option 1: Last-Writer-Wins Per Cell + +```typescript +function applySubdivide(cells: Cell[], op: Subdivide): Cell[] { + const targetCellsExist = op.deletedCellIds.every(id => + cells.find(c => c.id === id && !c.deleted) + ); + + if (!targetCellsExist) { + // Target cells already deleted by concurrent op + // This op becomes no-op (idempotent) + return cells; + } + + // Apply deletion + insertion + // ... +} +``` + +**Pros:** +- Simple, deterministic +- Guaranteed commutative (same result regardless of order) + +**Cons:** +- Silently drops one user's edit +- No way to recover lost intent + +### Option 2: Hierarchical Cell Model + +Subdivisions create nested structure: + +```typescript +export type Cell = Immutable<{ + id: CellId; + duration: number; + + // If subdivided, contains child cells + children: Cell[] | null; + + // For CRDT: track modifications + createdBy: string; + subdivideOps: Set; +}>; +``` + +**Pros:** +- Preserves both users' intent +- Can show both subdivisions in UI + +**Cons:** +- Complex merge logic +- Unclear what "subdivide an already-subdivided cell" means +- May require user resolution anyway + +### Option 3: Conflict Detection + User Resolution (RECOMMENDED) + +Allow concurrent operations to apply, but detect conflicts: + +```typescript +function validateRowWidth(row: Row): ValidationResult { + const activeCells = row.cells.filter(c => !c.deleted); + const totalDuration = activeCells.reduce((sum, c) => sum + c.duration, 0); + const expected = getExpectedDuration(row); + + if (totalDuration !== expected) { + return { + valid: false, + conflictingOps: findConflictingOps(row), + affectedCells: findAffectedCells(row), + suggestedResolution: { + // Provide options to user + } + }; + } + + return { valid: true }; +} +``` + +**Pros:** +- No silent data loss +- Users understand what happened +- Can choose best resolution per case + +**Cons:** +- Requires UI for conflict resolution +- Temporary invalid states exist +- More complex UX + +## Open Questions + +1. **Which conflict resolution method to use?** + - Pure LWW? + - Conflict detection + user resolution? + - Hybrid approach? + +2. **Fractional indexing implementation:** + - Use existing library or custom? + - How to handle position exhaustion? + +3. **Operation validation:** + - Client-side only or server-side too? + - How to handle invalid operations from old clients? + +4. **Snapshot strategy:** + - When to create snapshots for performance? + - How to compact operation history? + +## Next Steps + +- [ ] Decide on conflict resolution strategy +- [ ] Implement basic Cell ID system +- [ ] Replace index-based operations with ID-based +- [ ] Add operation IDs and timestamps (HLC) +- [ ] Implement fractional positioning +- [ ] Test commutativity with randomized operation order +- [ ] Design conflict UI (if using Option 3) + +## References + +- [Automerge Documentation](https://automerge.org/) +- [Yjs CRDT](https://docs.yjs.dev/) +- [Figma's Fractional Indexing](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/) +- [Hybrid Logical Clocks Paper](https://jaredforsyth.com/posts/hybrid-logical-clocks/) + +## Related Code + +- Current implementation: `src/doc/index.ts` +- Grid realization logic: `src/doc/index.ts:87-125` -- cgit v1.2.3