233 lines · 6.0 KB
| 1 | /** |
| 2 | * Line-level diff using a simple LCS-based approach. |
| 3 | * Produces unified diff hunks with context. |
| 4 | */ |
| 5 | |
| 6 | export type DiffLineType = 'context' | 'add' | 'delete'; |
| 7 | |
| 8 | export interface DiffLine { |
| 9 | type: DiffLineType; |
| 10 | content: string; |
| 11 | oldLineNo: number | null; |
| 12 | newLineNo: number | null; |
| 13 | } |
| 14 | |
| 15 | export interface DiffHunk { |
| 16 | oldStart: number; |
| 17 | oldCount: number; |
| 18 | newStart: number; |
| 19 | newCount: number; |
| 20 | lines: DiffLine[]; |
| 21 | } |
| 22 | |
| 23 | /** |
| 24 | * Compute a unified diff between two strings. |
| 25 | * Returns hunks with context lines (default 3). |
| 26 | */ |
| 27 | export function computeDiff(oldText: string, newText: string, contextLines: number = 3): DiffHunk[] { |
| 28 | const oldLines = oldText.length > 0 ? oldText.split('\n') : []; |
| 29 | const newLines = newText.length > 0 ? newText.split('\n') : []; |
| 30 | |
| 31 | // Compute LCS to find matching lines |
| 32 | const editScript = computeEditScript(oldLines, newLines); |
| 33 | |
| 34 | // Build annotated diff lines |
| 35 | const allLines: DiffLine[] = []; |
| 36 | let oldNo = 1; |
| 37 | let newNo = 1; |
| 38 | |
| 39 | for (const op of editScript) { |
| 40 | if (op === 'equal') { |
| 41 | allLines.push({ |
| 42 | type: 'context', |
| 43 | content: oldLines[oldNo - 1], |
| 44 | oldLineNo: oldNo, |
| 45 | newLineNo: newNo, |
| 46 | }); |
| 47 | oldNo++; |
| 48 | newNo++; |
| 49 | } else if (op === 'delete') { |
| 50 | allLines.push({ |
| 51 | type: 'delete', |
| 52 | content: oldLines[oldNo - 1], |
| 53 | oldLineNo: oldNo, |
| 54 | newLineNo: null, |
| 55 | }); |
| 56 | oldNo++; |
| 57 | } else if (op === 'insert') { |
| 58 | allLines.push({ |
| 59 | type: 'add', |
| 60 | content: newLines[newNo - 1], |
| 61 | oldLineNo: null, |
| 62 | newLineNo: newNo, |
| 63 | }); |
| 64 | newNo++; |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | // Group into hunks with context |
| 69 | return groupIntoHunks(allLines, contextLines); |
| 70 | } |
| 71 | |
| 72 | /** |
| 73 | * Compute an edit script using the LCS (Longest Common Subsequence) approach. |
| 74 | * Returns an array of operations: 'equal', 'delete', 'insert'. |
| 75 | */ |
| 76 | function computeEditScript(oldLines: string[], newLines: string[]): ('equal' | 'delete' | 'insert')[] { |
| 77 | const N = oldLines.length; |
| 78 | const M = newLines.length; |
| 79 | |
| 80 | // For very large files, use a simpler greedy approach |
| 81 | if (N + M > 10000) { |
| 82 | return greedyDiff(oldLines, newLines); |
| 83 | } |
| 84 | |
| 85 | // Standard LCS DP |
| 86 | // dp[i][j] = length of LCS of oldLines[0..i-1] and newLines[0..j-1] |
| 87 | const dp: number[][] = []; |
| 88 | for (let i = 0; i <= N; i++) { |
| 89 | dp[i] = new Array(M + 1).fill(0); |
| 90 | } |
| 91 | |
| 92 | for (let i = 1; i <= N; i++) { |
| 93 | for (let j = 1; j <= M; j++) { |
| 94 | if (oldLines[i - 1] === newLines[j - 1]) { |
| 95 | dp[i][j] = dp[i - 1][j - 1] + 1; |
| 96 | } else { |
| 97 | dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); |
| 98 | } |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | // Backtrack to build edit script |
| 103 | const script: ('equal' | 'delete' | 'insert')[] = []; |
| 104 | let i = N; |
| 105 | let j = M; |
| 106 | |
| 107 | while (i > 0 || j > 0) { |
| 108 | if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) { |
| 109 | script.unshift('equal'); |
| 110 | i--; |
| 111 | j--; |
| 112 | } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) { |
| 113 | script.unshift('insert'); |
| 114 | j--; |
| 115 | } else { |
| 116 | script.unshift('delete'); |
| 117 | i--; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | return script; |
| 122 | } |
| 123 | |
| 124 | /** |
| 125 | * Simple greedy diff for very large files. |
| 126 | * Not optimal but avoids O(N*M) memory. |
| 127 | */ |
| 128 | function greedyDiff(oldLines: string[], newLines: string[]): ('equal' | 'delete' | 'insert')[] { |
| 129 | const script: ('equal' | 'delete' | 'insert')[] = []; |
| 130 | let i = 0; |
| 131 | let j = 0; |
| 132 | |
| 133 | while (i < oldLines.length && j < newLines.length) { |
| 134 | if (oldLines[i] === newLines[j]) { |
| 135 | script.push('equal'); |
| 136 | i++; |
| 137 | j++; |
| 138 | } else { |
| 139 | // Look ahead to find a match |
| 140 | let foundOld = -1; |
| 141 | let foundNew = -1; |
| 142 | const lookAhead = Math.min(10, Math.max(oldLines.length - i, newLines.length - j)); |
| 143 | |
| 144 | for (let k = 1; k <= lookAhead; k++) { |
| 145 | if (j + k < newLines.length && oldLines[i] === newLines[j + k] && foundNew === -1) { |
| 146 | foundNew = k; |
| 147 | } |
| 148 | if (i + k < oldLines.length && oldLines[i + k] === newLines[j] && foundOld === -1) { |
| 149 | foundOld = k; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | if (foundNew !== -1 && (foundOld === -1 || foundNew <= foundOld)) { |
| 154 | for (let k = 0; k < foundNew; k++) { |
| 155 | script.push('insert'); |
| 156 | j++; |
| 157 | } |
| 158 | } else if (foundOld !== -1) { |
| 159 | for (let k = 0; k < foundOld; k++) { |
| 160 | script.push('delete'); |
| 161 | i++; |
| 162 | } |
| 163 | } else { |
| 164 | script.push('delete'); |
| 165 | script.push('insert'); |
| 166 | i++; |
| 167 | j++; |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | while (i < oldLines.length) { |
| 173 | script.push('delete'); |
| 174 | i++; |
| 175 | } |
| 176 | while (j < newLines.length) { |
| 177 | script.push('insert'); |
| 178 | j++; |
| 179 | } |
| 180 | |
| 181 | return script; |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * Group diff lines into hunks with surrounding context. |
| 186 | */ |
| 187 | function groupIntoHunks(allLines: DiffLine[], contextLines: number): DiffHunk[] { |
| 188 | // Find indices of changed lines |
| 189 | const changeIndices: number[] = []; |
| 190 | for (let i = 0; i < allLines.length; i++) { |
| 191 | if (allLines[i].type !== 'context') { |
| 192 | changeIndices.push(i); |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | if (changeIndices.length === 0) return []; |
| 197 | |
| 198 | // Group changes that are close together (within 2*context lines) |
| 199 | const groups: number[][] = []; |
| 200 | let currentGroup: number[] = [changeIndices[0]]; |
| 201 | |
| 202 | for (let i = 1; i < changeIndices.length; i++) { |
| 203 | if (changeIndices[i] - changeIndices[i - 1] <= contextLines * 2 + 1) { |
| 204 | currentGroup.push(changeIndices[i]); |
| 205 | } else { |
| 206 | groups.push(currentGroup); |
| 207 | currentGroup = [changeIndices[i]]; |
| 208 | } |
| 209 | } |
| 210 | groups.push(currentGroup); |
| 211 | |
| 212 | // Build hunks from groups |
| 213 | const hunks: DiffHunk[] = []; |
| 214 | for (const group of groups) { |
| 215 | const firstChange = group[0]; |
| 216 | const lastChange = group[group.length - 1]; |
| 217 | |
| 218 | const start = Math.max(0, firstChange - contextLines); |
| 219 | const end = Math.min(allLines.length, lastChange + contextLines + 1); |
| 220 | |
| 221 | const hunkLines = allLines.slice(start, end); |
| 222 | |
| 223 | const oldStart = hunkLines.find((l) => l.oldLineNo !== null)?.oldLineNo ?? 1; |
| 224 | const newStart = hunkLines.find((l) => l.newLineNo !== null)?.newLineNo ?? 1; |
| 225 | const oldCount = hunkLines.filter((l) => l.type !== 'add').length; |
| 226 | const newCount = hunkLines.filter((l) => l.type !== 'delete').length; |
| 227 | |
| 228 | hunks.push({ oldStart, oldCount, newStart, newCount, lines: hunkLines }); |
| 229 | } |
| 230 | |
| 231 | return hunks; |
| 232 | } |
| 233 |