233 lines · 6.0 KB
1 /**
2 * Line-level diff using a simple LCS-based approach.
3 * Produces unified diff hunks with context.
4 */
5
6 export type DiffLineType = 'context' | 'add' | 'delete';
7
8 export interface DiffLine {
9 type: DiffLineType;
10 content: string;
11 oldLineNo: number | null;
12 newLineNo: number | null;
13 }
14
15 export interface DiffHunk {
16 oldStart: number;
17 oldCount: number;
18 newStart: number;
19 newCount: number;
20 lines: DiffLine[];
21 }
22
23 /**
24 * Compute a unified diff between two strings.
25 * Returns hunks with context lines (default 3).
26 */
27 export function computeDiff(oldText: string, newText: string, contextLines: number = 3): DiffHunk[] {
28 const oldLines = oldText.length > 0 ? oldText.split('\n') : [];
29 const newLines = newText.length > 0 ? newText.split('\n') : [];
30
31 // Compute LCS to find matching lines
32 const editScript = computeEditScript(oldLines, newLines);
33
34 // Build annotated diff lines
35 const allLines: DiffLine[] = [];
36 let oldNo = 1;
37 let newNo = 1;
38
39 for (const op of editScript) {
40 if (op === 'equal') {
41 allLines.push({
42 type: 'context',
43 content: oldLines[oldNo - 1],
44 oldLineNo: oldNo,
45 newLineNo: newNo,
46 });
47 oldNo++;
48 newNo++;
49 } else if (op === 'delete') {
50 allLines.push({
51 type: 'delete',
52 content: oldLines[oldNo - 1],
53 oldLineNo: oldNo,
54 newLineNo: null,
55 });
56 oldNo++;
57 } else if (op === 'insert') {
58 allLines.push({
59 type: 'add',
60 content: newLines[newNo - 1],
61 oldLineNo: null,
62 newLineNo: newNo,
63 });
64 newNo++;
65 }
66 }
67
68 // Group into hunks with context
69 return groupIntoHunks(allLines, contextLines);
70 }
71
72 /**
73 * Compute an edit script using the LCS (Longest Common Subsequence) approach.
74 * Returns an array of operations: 'equal', 'delete', 'insert'.
75 */
76 function computeEditScript(oldLines: string[], newLines: string[]): ('equal' | 'delete' | 'insert')[] {
77 const N = oldLines.length;
78 const M = newLines.length;
79
80 // For very large files, use a simpler greedy approach
81 if (N + M > 10000) {
82 return greedyDiff(oldLines, newLines);
83 }
84
85 // Standard LCS DP
86 // dp[i][j] = length of LCS of oldLines[0..i-1] and newLines[0..j-1]
87 const dp: number[][] = [];
88 for (let i = 0; i <= N; i++) {
89 dp[i] = new Array(M + 1).fill(0);
90 }
91
92 for (let i = 1; i <= N; i++) {
93 for (let j = 1; j <= M; j++) {
94 if (oldLines[i - 1] === newLines[j - 1]) {
95 dp[i][j] = dp[i - 1][j - 1] + 1;
96 } else {
97 dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
98 }
99 }
100 }
101
102 // Backtrack to build edit script
103 const script: ('equal' | 'delete' | 'insert')[] = [];
104 let i = N;
105 let j = M;
106
107 while (i > 0 || j > 0) {
108 if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) {
109 script.unshift('equal');
110 i--;
111 j--;
112 } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
113 script.unshift('insert');
114 j--;
115 } else {
116 script.unshift('delete');
117 i--;
118 }
119 }
120
121 return script;
122 }
123
124 /**
125 * Simple greedy diff for very large files.
126 * Not optimal but avoids O(N*M) memory.
127 */
128 function greedyDiff(oldLines: string[], newLines: string[]): ('equal' | 'delete' | 'insert')[] {
129 const script: ('equal' | 'delete' | 'insert')[] = [];
130 let i = 0;
131 let j = 0;
132
133 while (i < oldLines.length && j < newLines.length) {
134 if (oldLines[i] === newLines[j]) {
135 script.push('equal');
136 i++;
137 j++;
138 } else {
139 // Look ahead to find a match
140 let foundOld = -1;
141 let foundNew = -1;
142 const lookAhead = Math.min(10, Math.max(oldLines.length - i, newLines.length - j));
143
144 for (let k = 1; k <= lookAhead; k++) {
145 if (j + k < newLines.length && oldLines[i] === newLines[j + k] && foundNew === -1) {
146 foundNew = k;
147 }
148 if (i + k < oldLines.length && oldLines[i + k] === newLines[j] && foundOld === -1) {
149 foundOld = k;
150 }
151 }
152
153 if (foundNew !== -1 && (foundOld === -1 || foundNew <= foundOld)) {
154 for (let k = 0; k < foundNew; k++) {
155 script.push('insert');
156 j++;
157 }
158 } else if (foundOld !== -1) {
159 for (let k = 0; k < foundOld; k++) {
160 script.push('delete');
161 i++;
162 }
163 } else {
164 script.push('delete');
165 script.push('insert');
166 i++;
167 j++;
168 }
169 }
170 }
171
172 while (i < oldLines.length) {
173 script.push('delete');
174 i++;
175 }
176 while (j < newLines.length) {
177 script.push('insert');
178 j++;
179 }
180
181 return script;
182 }
183
184 /**
185 * Group diff lines into hunks with surrounding context.
186 */
187 function groupIntoHunks(allLines: DiffLine[], contextLines: number): DiffHunk[] {
188 // Find indices of changed lines
189 const changeIndices: number[] = [];
190 for (let i = 0; i < allLines.length; i++) {
191 if (allLines[i].type !== 'context') {
192 changeIndices.push(i);
193 }
194 }
195
196 if (changeIndices.length === 0) return [];
197
198 // Group changes that are close together (within 2*context lines)
199 const groups: number[][] = [];
200 let currentGroup: number[] = [changeIndices[0]];
201
202 for (let i = 1; i < changeIndices.length; i++) {
203 if (changeIndices[i] - changeIndices[i - 1] <= contextLines * 2 + 1) {
204 currentGroup.push(changeIndices[i]);
205 } else {
206 groups.push(currentGroup);
207 currentGroup = [changeIndices[i]];
208 }
209 }
210 groups.push(currentGroup);
211
212 // Build hunks from groups
213 const hunks: DiffHunk[] = [];
214 for (const group of groups) {
215 const firstChange = group[0];
216 const lastChange = group[group.length - 1];
217
218 const start = Math.max(0, firstChange - contextLines);
219 const end = Math.min(allLines.length, lastChange + contextLines + 1);
220
221 const hunkLines = allLines.slice(start, end);
222
223 const oldStart = hunkLines.find((l) => l.oldLineNo !== null)?.oldLineNo ?? 1;
224 const newStart = hunkLines.find((l) => l.newLineNo !== null)?.newLineNo ?? 1;
225 const oldCount = hunkLines.filter((l) => l.type !== 'add').length;
226 const newCount = hunkLines.filter((l) => l.type !== 'delete').length;
227
228 hunks.push({ oldStart, oldCount, newStart, newCount, lines: hunkLines });
229 }
230
231 return hunks;
232 }
233