86 lines · 2.7 KB
| 1 | /** |
| 2 | * Zlib compression/decompression. |
| 3 | * |
| 4 | * Uses Node.js zlib module via nodejs_compat for reliable pack file parsing. |
| 5 | * The Web Streams DecompressionStream doesn't handle trailing data well in workerd, |
| 6 | * which is critical for parsing concatenated zlib streams in git pack files. |
| 7 | */ |
| 8 | |
| 9 | import { inflateSync, deflateSync, inflateRawSync } from 'node:zlib'; |
| 10 | |
| 11 | /** Deflate (compress) data using zlib format. */ |
| 12 | export async function deflate(data: Uint8Array): Promise<Uint8Array> { |
| 13 | return new Uint8Array(deflateSync(data)); |
| 14 | } |
| 15 | |
| 16 | /** Inflate (decompress) zlib-compressed data. */ |
| 17 | export async function inflate(data: Uint8Array): Promise<Uint8Array> { |
| 18 | return new Uint8Array(inflateSync(data)); |
| 19 | } |
| 20 | |
| 21 | /** |
| 22 | * Inflate a zlib-compressed entry from a pack file buffer. |
| 23 | * |
| 24 | * Pack entries are concatenated zlib streams. Node.js inflateSync gracefully |
| 25 | * stops at the end of the zlib stream, but we need to know how many compressed |
| 26 | * bytes were consumed to advance the offset to the next entry. |
| 27 | * |
| 28 | * Strategy: use inflateRawSync to decompress, then compute consumed bytes |
| 29 | * from the zlib structure (2-byte header + raw deflate + 4-byte adler32). |
| 30 | */ |
| 31 | export async function inflateFromBuffer( |
| 32 | buf: Uint8Array, |
| 33 | offset: number, |
| 34 | expectedSize: number |
| 35 | ): Promise<{ result: Uint8Array; bytesConsumed: number }> { |
| 36 | const data = buf.slice(offset); |
| 37 | |
| 38 | // Use Node.js inflateSync which handles trailing data gracefully |
| 39 | const result = new Uint8Array(inflateSync(data)); |
| 40 | |
| 41 | if (result.length !== expectedSize) { |
| 42 | throw new Error( |
| 43 | `Inflated size mismatch: expected ${expectedSize}, got ${result.length}` |
| 44 | ); |
| 45 | } |
| 46 | |
| 47 | // Now we need to find how many compressed bytes were consumed. |
| 48 | // The zlib format is: [2-byte header][raw deflate][4-byte adler32] |
| 49 | // Use binary search on raw deflate to find the exact boundary. |
| 50 | const rawStart = 2; // skip CMF + FLG bytes |
| 51 | |
| 52 | let lo = 1; |
| 53 | let hi = data.length - rawStart; |
| 54 | |
| 55 | // First verify the full raw data works |
| 56 | try { |
| 57 | const fullTest = new Uint8Array(inflateRawSync(data.slice(rawStart))); |
| 58 | if (fullTest.length !== expectedSize) { |
| 59 | // Something is off — just use the full remaining length minus trailer |
| 60 | return { result, bytesConsumed: data.length }; |
| 61 | } |
| 62 | } catch { |
| 63 | return { result, bytesConsumed: data.length }; |
| 64 | } |
| 65 | |
| 66 | // Binary search for the minimum raw bytes needed |
| 67 | while (lo < hi) { |
| 68 | const mid = Math.floor((lo + hi) / 2); |
| 69 | try { |
| 70 | const test = new Uint8Array(inflateRawSync(data.slice(rawStart, rawStart + mid))); |
| 71 | if (test.length === expectedSize) { |
| 72 | hi = mid; |
| 73 | } else { |
| 74 | lo = mid + 1; |
| 75 | } |
| 76 | } catch { |
| 77 | lo = mid + 1; |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | // Total consumed = 2 (zlib header) + raw bytes + 4 (adler32 checksum) |
| 82 | const bytesConsumed = rawStart + lo + 4; |
| 83 | |
| 84 | return { result, bytesConsumed }; |
| 85 | } |
| 86 |