198 lines · 5.6 KB
1 /**
2 * Git object types and serialization.
3 *
4 * Git has four object types: blob, tree, commit, tag.
5 * Each is stored as: "{type} {size}\0{content}" then zlib-compressed.
6 */
7
8 const encoder = new TextEncoder();
9 const decoder = new TextDecoder();
10
11 export type GitObjectType = 'blob' | 'tree' | 'commit' | 'tag';
12
13 /** Numeric type IDs used in pack files. */
14 export const PACK_OBJECT_TYPE = {
15 COMMIT: 1,
16 TREE: 2,
17 BLOB: 3,
18 TAG: 4,
19 OFS_DELTA: 6,
20 REF_DELTA: 7,
21 } as const;
22
23 export type PackObjectTypeNum = (typeof PACK_OBJECT_TYPE)[keyof typeof PACK_OBJECT_TYPE];
24
25 export function packTypeToString(type: number): GitObjectType {
26 switch (type) {
27 case PACK_OBJECT_TYPE.COMMIT: return 'commit';
28 case PACK_OBJECT_TYPE.TREE: return 'tree';
29 case PACK_OBJECT_TYPE.BLOB: return 'blob';
30 case PACK_OBJECT_TYPE.TAG: return 'tag';
31 default: throw new Error(`Unknown pack object type: ${type}`);
32 }
33 }
34
35 export function stringToPackType(type: GitObjectType): number {
36 switch (type) {
37 case 'commit': return PACK_OBJECT_TYPE.COMMIT;
38 case 'tree': return PACK_OBJECT_TYPE.TREE;
39 case 'blob': return PACK_OBJECT_TYPE.BLOB;
40 case 'tag': return PACK_OBJECT_TYPE.TAG;
41 }
42 }
43
44 /** A parsed git object with its type, content, and computed SHA. */
45 export interface GitObject {
46 type: GitObjectType;
47 content: Uint8Array;
48 sha: string;
49 }
50
51 /** Serialize a git object into its storable form (header + content, before compression). */
52 export function serializeGitObject(type: GitObjectType, content: Uint8Array): Uint8Array {
53 const header = encoder.encode(`${type} ${content.length}\0`);
54 const result = new Uint8Array(header.length + content.length);
55 result.set(header, 0);
56 result.set(content, header.length);
57 return result;
58 }
59
60 /**
61 * Parse a raw git object (after decompression).
62 * Format: "{type} {size}\0{content}"
63 */
64 export function parseRawGitObject(raw: Uint8Array): { type: GitObjectType; content: Uint8Array } {
65 // Find the NUL byte separating header from content
66 const nulIdx = raw.indexOf(0);
67 if (nulIdx === -1) throw new Error('Invalid git object: no NUL separator');
68
69 const headerStr = decoder.decode(raw.slice(0, nulIdx));
70 const spaceIdx = headerStr.indexOf(' ');
71 if (spaceIdx === -1) throw new Error('Invalid git object header');
72
73 const type = headerStr.slice(0, spaceIdx) as GitObjectType;
74 const size = parseInt(headerStr.slice(spaceIdx + 1), 10);
75 const content = raw.slice(nulIdx + 1);
76
77 if (content.length !== size) {
78 throw new Error(`Git object size mismatch: header says ${size}, got ${content.length}`);
79 }
80
81 return { type, content };
82 }
83
84 // --- Tree parsing ---
85
86 export interface TreeEntry {
87 mode: string; // e.g., "100644", "040000", "120000"
88 name: string;
89 sha: string; // 40-char hex
90 }
91
92 /**
93 * Parse a tree object's content into entries.
94 * Tree format: repeated "{mode} {name}\0{20-byte-sha}"
95 */
96 export function parseTree(content: Uint8Array): TreeEntry[] {
97 const entries: TreeEntry[] = [];
98 let offset = 0;
99
100 while (offset < content.length) {
101 // Find space separating mode from name
102 const spaceIdx = content.indexOf(0x20, offset); // space
103 if (spaceIdx === -1) break;
104 const mode = decoder.decode(content.slice(offset, spaceIdx));
105
106 // Find NUL separating name from SHA
107 const nulIdx = content.indexOf(0, spaceIdx + 1);
108 if (nulIdx === -1) break;
109 const name = decoder.decode(content.slice(spaceIdx + 1, nulIdx));
110
111 // Next 20 bytes are the binary SHA-1
112 const shaBytes = content.slice(nulIdx + 1, nulIdx + 21);
113 let sha = '';
114 for (let i = 0; i < 20; i++) {
115 sha += shaBytes[i].toString(16).padStart(2, '0');
116 }
117
118 entries.push({ mode, name, sha });
119 offset = nulIdx + 21;
120 }
121
122 return entries;
123 }
124
125 /** Serialize tree entries into tree object content. */
126 export function serializeTree(entries: TreeEntry[]): Uint8Array {
127 const parts: Uint8Array[] = [];
128
129 for (const entry of entries) {
130 const modeAndName = encoder.encode(`${entry.mode} ${entry.name}\0`);
131 const shaBytes = new Uint8Array(20);
132 for (let i = 0; i < 20; i++) {
133 shaBytes[i] = parseInt(entry.sha.substring(i * 2, i * 2 + 2), 16);
134 }
135 const entryBuf = new Uint8Array(modeAndName.length + 20);
136 entryBuf.set(modeAndName, 0);
137 entryBuf.set(shaBytes, modeAndName.length);
138 parts.push(entryBuf);
139 }
140
141 const totalLen = parts.reduce((sum, p) => sum + p.length, 0);
142 const result = new Uint8Array(totalLen);
143 let offset = 0;
144 for (const part of parts) {
145 result.set(part, offset);
146 offset += part.length;
147 }
148 return result;
149 }
150
151 // --- Commit parsing ---
152
153 export interface ParsedCommit {
154 tree: string;
155 parents: string[];
156 author: string;
157 committer: string;
158 message: string;
159 }
160
161 /** Parse a commit object's content. */
162 export function parseCommit(content: Uint8Array): ParsedCommit {
163 const text = decoder.decode(content);
164 const lines = text.split('\n');
165 const result: ParsedCommit = {
166 tree: '',
167 parents: [],
168 author: '',
169 committer: '',
170 message: '',
171 };
172
173 let i = 0;
174 // Parse header lines
175 for (; i < lines.length; i++) {
176 const line = lines[i];
177 if (line === '') {
178 // Empty line separates headers from message
179 i++;
180 break;
181 }
182 if (line.startsWith('tree ')) {
183 result.tree = line.slice(5);
184 } else if (line.startsWith('parent ')) {
185 result.parents.push(line.slice(7));
186 } else if (line.startsWith('author ')) {
187 result.author = line.slice(7);
188 } else if (line.startsWith('committer ')) {
189 result.committer = line.slice(10);
190 }
191 }
192
193 // Remaining lines are the commit message
194 result.message = lines.slice(i).join('\n');
195
196 return result;
197 }
198