182 lines · 5.7 KB
1 /**
2 * Git pack file parser.
3 *
4 * Pack format (v2):
5 * - Header: "PACK" + version (4 bytes) + object count (4 bytes)
6 * - Objects: repeated { type+size (variable-length) + compressed data }
7 * - Trailer: 20-byte SHA-1 checksum of the entire pack
8 *
9 * Used by git-receive-pack to parse incoming pushes.
10 */
11
12 import { PACK_OBJECT_TYPE, packTypeToString, type GitObjectType } from './object.js';
13 import { inflateFromBuffer } from './zlib.js';
14 import { gitObjectHash } from './sha1.js';
15 import { applyDelta, readOfsOffset } from './delta.js';
16
17 const decoder = new TextDecoder();
18
19 /** A parsed object from a pack file. */
20 export interface PackObject {
21 type: GitObjectType;
22 content: Uint8Array;
23 sha: string;
24 }
25
26 /** Read a 4-byte big-endian unsigned integer. */
27 function readUint32BE(buf: Uint8Array, offset: number): number {
28 return (
29 (buf[offset] << 24) |
30 (buf[offset + 1] << 16) |
31 (buf[offset + 2] << 8) |
32 buf[offset + 3]
33 ) >>> 0;
34 }
35
36 /**
37 * Read the variable-length object header from a pack entry.
38 * Encoding: first byte has type in bits 6-4, size in bits 3-0.
39 * Subsequent bytes contribute 7 bits each to the size.
40 */
41 function readPackObjectHeader(
42 buf: Uint8Array,
43 offset: number
44 ): { type: number; size: number; bytesRead: number } {
45 let byte = buf[offset];
46 const type = (byte >> 4) & 0x07;
47 let size = byte & 0x0f;
48 let shift = 4;
49 let bytesRead = 1;
50
51 while (byte & 0x80) {
52 byte = buf[offset + bytesRead];
53 bytesRead++;
54 size |= (byte & 0x7f) << shift;
55 shift += 7;
56 }
57
58 return { type, size, bytesRead };
59 }
60
61 /**
62 * Parse a complete pack file buffer into individual git objects.
63 *
64 * Delta objects (OFS_DELTA, REF_DELTA) are resolved against their base
65 * objects. OFS_DELTA bases must appear earlier in the pack. REF_DELTA
66 * bases must be provided via the `existingObjects` callback for objects
67 * not in the current pack.
68 */
69 export async function parsePack(
70 packData: Uint8Array,
71 existingObjects?: (sha: string) => Promise<{ type: GitObjectType; content: Uint8Array } | null>
72 ): Promise<PackObject[]> {
73 // Validate header
74 const sig = decoder.decode(packData.slice(0, 4));
75 if (sig !== 'PACK') {
76 throw new Error(`Invalid pack signature: ${sig}`);
77 }
78
79 const version = readUint32BE(packData, 4);
80 if (version !== 2 && version !== 3) {
81 throw new Error(`Unsupported pack version: ${version}`);
82 }
83
84 const objectCount = readUint32BE(packData, 8);
85 let offset = 12; // Past header
86
87 // Store parsed objects with their pack offsets for OFS_DELTA resolution
88 const objects: PackObject[] = [];
89 const objectsByOffset = new Map<number, PackObject>();
90 const objectsBySha = new Map<string, PackObject>();
91
92 for (let i = 0; i < objectCount; i++) {
93 const entryOffset = offset;
94 const header = readPackObjectHeader(packData, offset);
95 offset += header.bytesRead;
96
97 if (
98 header.type === PACK_OBJECT_TYPE.COMMIT ||
99 header.type === PACK_OBJECT_TYPE.TREE ||
100 header.type === PACK_OBJECT_TYPE.BLOB ||
101 header.type === PACK_OBJECT_TYPE.TAG
102 ) {
103 // Regular object: decompress the data
104 const { result, bytesConsumed } = await inflateFromBuffer(packData, offset, header.size);
105 offset += bytesConsumed;
106
107 const type = packTypeToString(header.type);
108 const sha = await gitObjectHash(type, result);
109
110 const obj: PackObject = { type, content: result, sha };
111 objects.push(obj);
112 objectsByOffset.set(entryOffset, obj);
113 objectsBySha.set(sha, obj);
114
115 } else if (header.type === PACK_OBJECT_TYPE.OFS_DELTA) {
116 // OFS_DELTA: base is at a negative offset in this pack
117 const { value: negOffset, bytesRead: ofsBytes } = readOfsOffset(packData, offset);
118 offset += ofsBytes;
119
120 const { result: deltaData, bytesConsumed } = await inflateFromBuffer(packData, offset, header.size);
121 offset += bytesConsumed;
122
123 const baseOffset = entryOffset - negOffset;
124 const baseObj = objectsByOffset.get(baseOffset);
125 if (!baseObj) {
126 throw new Error(`OFS_DELTA base not found at offset ${baseOffset}`);
127 }
128
129 const content = applyDelta(baseObj.content, deltaData);
130 const sha = await gitObjectHash(baseObj.type, content);
131
132 const obj: PackObject = { type: baseObj.type, content, sha };
133 objects.push(obj);
134 objectsByOffset.set(entryOffset, obj);
135 objectsBySha.set(sha, obj);
136
137 } else if (header.type === PACK_OBJECT_TYPE.REF_DELTA) {
138 // REF_DELTA: base is referenced by SHA-1 (20 bytes)
139 const baseShaBytes = packData.slice(offset, offset + 20);
140 offset += 20;
141
142 let baseSha = '';
143 for (let j = 0; j < 20; j++) {
144 baseSha += baseShaBytes[j].toString(16).padStart(2, '0');
145 }
146
147 const { result: deltaData, bytesConsumed } = await inflateFromBuffer(packData, offset, header.size);
148 offset += bytesConsumed;
149
150 // Look for base in already-parsed objects
151 let baseObj = objectsBySha.get(baseSha);
152
153 // If not found in pack, try external lookup
154 if (!baseObj && existingObjects) {
155 const external = await existingObjects(baseSha);
156 if (external) {
157 const sha = await gitObjectHash(external.type, external.content);
158 baseObj = { type: external.type, content: external.content, sha };
159 }
160 }
161
162 if (!baseObj) {
163 throw new Error(`REF_DELTA base not found: ${baseSha}`);
164 }
165
166 const content = applyDelta(baseObj.content, deltaData);
167 const sha = await gitObjectHash(baseObj.type, content);
168
169 const obj: PackObject = { type: baseObj.type, content, sha };
170 objects.push(obj);
171 objectsByOffset.set(entryOffset, obj);
172 objectsBySha.set(sha, obj);
173
174 } else {
175 throw new Error(`Unknown pack object type: ${header.type}`);
176 }
177 }
178
179 return objects;
180 }
181
182