From 214be3415e27e57036f3c252188d6eb06edc55b7 Mon Sep 17 00:00:00 2001 From: Josh Kingsley Date: Thu, 13 Nov 2025 13:37:19 +0200 Subject: docs: add CRDT design notes from Claude --- docs/design/fractional-indexing.md | 396 +++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) create mode 100644 docs/design/fractional-indexing.md (limited to 'docs/design/fractional-indexing.md') diff --git a/docs/design/fractional-indexing.md b/docs/design/fractional-indexing.md new file mode 100644 index 0000000..287e719 --- /dev/null +++ b/docs/design/fractional-indexing.md @@ -0,0 +1,396 @@ +# Fractional Indexing for Stable Cell Positions + +**Last Updated:** 2025-11-13 + +## Overview + +Fractional indexing provides stable, immutable identifiers for cell positions in an ordered sequence, enabling concurrent insertions without the coordination problems of array indices. + +## The Problem with Array Indices + +Consider this scenario with traditional array indices: + +``` +Initial state: +[0: A, 1: B, 2: C, 3: D] + +Alice subdivides cells 1-2 (B, C) +Bob subdivides cells 2-3 (C, D) + +After Alice's operation: +[0: A, 1: T1, 2: T2, 3: T3, 4: D] +Now "cell 2" means T2 + +After Bob's operation (he thinks "cell 2" is C): +[0: A, 1: T1, 2: T2, 3: T3, 4: Q1, 5: Q2, 6: Q3, 7: D] +But C is gone! Bob's operation is invalid. +``` + +**The problem:** Indices are relative positions that change when the array is modified. Concurrent operations reference different cells by the same index. + +## Fractional Indexing Solution + +Instead of indices, each cell has a **position string** that never changes: + +``` +Initial state: +Cell A: position="a0" +Cell B: position="a1" +Cell C: position="a2" +Cell D: position="a3" + +Alice subdivides [B, C] (position="a1" and "a2") +Bob subdivides [C, D] (position="a2" and "a3") + +Alice creates: + T1: position="a1.2" (between a1 and a2) + T2: position="a1.5" + T3: position="a1.8" + +Bob creates: + Q1: position="a2.2" (between a2 and a3) + Q2: position="a2.5" + Q3: position="a2.8" + +Merged result (sorted by position): +a0 (A), a1 (B, deleted), a1.2 (T1), a1.5 (T2), a1.8 (T3), +a2 (C, deleted), a2.2 (Q1), a2.5 (Q2), a2.8 (Q3), a3 (D, deleted) +``` + +**Key insight:** Position strings are immutable. Operations reference cells by their stable position, not their current index in the array. + +## Position String Format + +Positions are lexicographically sortable strings: + +``` +"a0" < "a0.5" < "a1" < "a1.25" < "a2" +``` + +### Base Structure + +``` +[prefix][integer].[fractional] +``` + +- **Prefix:** Letter(s) for global ordering (e.g., "a", "b", "z") +- **Integer:** Whole number part +- **Fractional:** Optional decimal part for insertions + +### Generating Positions + +```rust +pub struct FractionalIndex(String); + +impl FractionalIndex { + /// Create initial positions for a sequence + pub fn initial_sequence(count: usize) -> Vec { + (0..count) + .map(|i| FractionalIndex(format!("a{}", i))) + .collect() + } + + /// Generate position between two existing positions + pub fn between(before: &str, after: &str) -> Self { + // Simplified version - production needs more robust logic + let before_val = parse_position(before); + let after_val = parse_position(after); + let middle = (before_val + after_val) / 2.0; + + FractionalIndex(format_position(middle)) + } + + /// Generate position before the first element + pub fn before_first(first: &str) -> Self { + // e.g., "a0" → "a-1" + let val = parse_position(first); + FractionalIndex(format_position(val - 1.0)) + } + + /// Generate position after the last element + pub fn after_last(last: &str) -> Self { + // e.g., "a5" → "a6" + let val = parse_position(last); + FractionalIndex(format_position(val + 1.0)) + } +} + +fn parse_position(pos: &str) -> f64 { + // Strip prefix, parse number + let num_part = pos.trim_start_matches(|c: char| c.is_alphabetic()); + num_part.parse().unwrap_or(0.0) +} + +fn format_position(val: f64) -> String { + if val.fract() == 0.0 { + format!("a{}", val as i64) + } else { + format!("a{}", val) + } +} +``` + +### Insertion Examples + +```rust +// Insert between a0 and a1 +FractionalIndex::between("a0", "a1") // → "a0.5" + +// Insert between a0.5 and a1 +FractionalIndex::between("a0.5", "a1") // → "a0.75" + +// Insert between a0.75 and a1 +FractionalIndex::between("a0.75", "a1") // → "a0.875" + +// Insert at beginning +FractionalIndex::before_first("a0") // → "a-1" + +// Insert at end +FractionalIndex::after_last("a3") // → "a4" +``` + +## Subdivision Operation with Positions + +```rust +pub struct Subdivide { + pub deleted_cell_ids: Vec, + pub new_cells: Vec, +} + +pub struct NewCell { + pub id: Uuid, + pub position: String, // Fractional position + pub duration: f64, +} + +impl Subdivide { + /// Create a subdivide operation + pub fn new( + target_cells: &[Cell], + new_durations: Vec, + ) -> Self { + // Find position range for new cells + let first_pos = &target_cells.first().unwrap().position; + let last_pos = &target_cells.last().unwrap().position; + + // Generate evenly spaced positions between first and last + let new_cells = distribute_positions( + first_pos, + last_pos, + new_durations.len(), + ) + .into_iter() + .zip(new_durations) + .map(|(position, duration)| NewCell { + id: Uuid::new_v7(), + position, + duration, + }) + .collect(); + + Subdivide { + deleted_cell_ids: target_cells.iter().map(|c| c.id).collect(), + new_cells, + } + } +} + +fn distribute_positions( + first: &str, + last: &str, + count: usize, +) -> Vec { + let start = parse_position(first); + let end = parse_position(last); + let step = (end - start) / (count as f64 + 1.0); + + (1..=count) + .map(|i| format_position(start + step * i as f64)) + .collect() +} +``` + +**Example:** +```rust +// Subdivide cells at positions a1, a2, a3 into 5 new cells + +// Position range: a1 to a3 +// Generate 5 positions between them: +// a1.33, a1.67, a2.0, a2.33, a2.67 + +let subdivide = Subdivide { + deleted_cell_ids: [id_a1, id_a2, id_a3], + new_cells: [ + NewCell { id: new_id_1, position: "a1.33", duration: 0.15 }, + NewCell { id: new_id_2, position: "a1.67", duration: 0.15 }, + NewCell { id: new_id_3, position: "a2.0", duration: 0.15 }, + NewCell { id: new_id_4, position: "a2.33", duration: 0.15 }, + NewCell { id: new_id_5, position: "a2.67", duration: 0.15 }, + ], +}; +``` + +## Applying Operations + +```rust +impl Op { + fn apply(&self, realized: &mut RealizedDoc) { + match &self.payload { + OpPayload::Subdivide { new_cells, deleted_cell_ids, .. } => { + let row = realized.get_row_mut(self.row_id); + + // Mark deleted cells (tombstone) + for cell_id in deleted_cell_ids { + if let Some(cell) = row.cells.iter_mut().find(|c| c.id == *cell_id) { + cell.deleted = true; + } + } + + // Insert new cells + for new_cell in new_cells { + row.cells.push(Cell { + id: new_cell.id, + position: new_cell.position.clone(), + duration: new_cell.duration, + created_by: self.id, + deleted: false, + }); + } + + // Sort by position for display + row.cells.sort_by(|a, b| a.position.cmp(&b.position)); + } + _ => {} + } + } +} +``` + +## Handling Concurrent Insertions + +Fractional indexing ensures deterministic ordering even with concurrent insertions: + +``` +Initial: [a0: A, a1: B, a2: C] + +Alice inserts between A and B: position = "a0.5" +Bob inserts between A and B: position = "a0.5" + +Both generate the same position! +``` + +**Solution 1: Add randomness** +```rust +fn between_with_jitter(before: &str, after: &str, actor_id: &Uuid) -> String { + let base = between(before, after); + // Add tiny actor-specific offset + let offset = (actor_id.as_bytes()[0] as f64) / 1000000.0; + adjust_position(&base, offset) +} +``` + +**Solution 2: Use timestamp in position** +```rust +fn between_with_timestamp(before: &str, after: &str, timestamp: u64) -> String { + let middle = between(before, after); + format!("{}.{}", middle, timestamp % 1000) +} +``` + +**Solution 3: Accept ties (rely on cell ID for final ordering)** +```rust +impl Cell { + fn compare_for_display(&self, other: &Cell) -> Ordering { + // Primary: position + match self.position.cmp(&other.position) { + Ordering::Equal => { + // Tie-breaker: cell ID (lexicographic) + self.id.cmp(&other.id) + } + other => other, + } + } +} +``` + +## Position Space Exhaustion + +**Problem:** Repeatedly inserting between two positions eventually runs out of precision: + +``` +a0, a0.5, a0.75, a0.875, a0.9375, a0.96875, ... +``` + +**Solutions:** + +### 1. Rebalancing (Offline) +Periodically regenerate positions with even spacing: +```rust +fn rebalance_positions(cells: &mut [Cell]) { + let count = cells.len(); + for (i, cell) in cells.iter_mut().enumerate() { + cell.position = format!("a{}", i); + } +} +``` + +### 2. Multi-level Positions +Use hierarchical structure: +``` +"a0", "a0|b0", "a0|b1", "a1" +``` + +### 3. Arbitrary Precision +Use strings that can grow indefinitely: +``` +"a0", "a0.5", "a0.5.5", "a0.5.5.5", ... +``` + +## Production Implementation + +Consider using existing libraries: + +- **fractional-index** (Rust) - Figma-style implementation +- **string-ordering** - Lexicographic ordering primitives + +Example with `fractional-index`: +```rust +use fractional_index::{FractionalIndex, generate_key_between}; + +// Generate position between two cells +let pos = generate_key_between( + Some(&cell_a.position), + Some(&cell_b.position), +); +``` + +## Key Properties + +1. **Immutable** - Positions never change after creation +2. **Lexicographic** - String comparison gives correct order +3. **Dense** - Can always insert between any two positions +4. **Deterministic** - Same inputs produce same outputs +5. **Conflict-Free** - Concurrent insertions at same position are handled gracefully + +## Trade-offs + +**Pros:** +- No coordination needed for insertions +- Stable references across replicas +- Simple to reason about + +**Cons:** +- Positions can become long strings over time +- Requires periodic rebalancing in heavy-edit scenarios +- Slightly more complex than array indices + +## References + +- [Figma's Blog: Realtime Editing of Ordered Sequences](https://www.figma.com/blog/realtime-editing-of-ordered-sequences/) +- [fractional-index crate](https://crates.io/crates/fractional-index) +- [LSEQ CRDT](https://hal.archives-ouvertes.fr/hal-00921633/document) + +## Related Documents + +- See `crdt-design.md` for overall CRDT architecture +- See `branch-based-merge.md` for conflict detection and resolution -- cgit v1.2.3