198 lines · 5.6 KB
| 1 | /** |
| 2 | * Git object types and serialization. |
| 3 | * |
| 4 | * Git has four object types: blob, tree, commit, tag. |
| 5 | * Each is stored as: "{type} {size}\0{content}" then zlib-compressed. |
| 6 | */ |
| 7 | |
| 8 | const encoder = new TextEncoder(); |
| 9 | const decoder = new TextDecoder(); |
| 10 | |
| 11 | export type GitObjectType = 'blob' | 'tree' | 'commit' | 'tag'; |
| 12 | |
| 13 | /** Numeric type IDs used in pack files. */ |
| 14 | export const PACK_OBJECT_TYPE = { |
| 15 | COMMIT: 1, |
| 16 | TREE: 2, |
| 17 | BLOB: 3, |
| 18 | TAG: 4, |
| 19 | OFS_DELTA: 6, |
| 20 | REF_DELTA: 7, |
| 21 | } as const; |
| 22 | |
| 23 | export type PackObjectTypeNum = (typeof PACK_OBJECT_TYPE)[keyof typeof PACK_OBJECT_TYPE]; |
| 24 | |
| 25 | export function packTypeToString(type: number): GitObjectType { |
| 26 | switch (type) { |
| 27 | case PACK_OBJECT_TYPE.COMMIT: return 'commit'; |
| 28 | case PACK_OBJECT_TYPE.TREE: return 'tree'; |
| 29 | case PACK_OBJECT_TYPE.BLOB: return 'blob'; |
| 30 | case PACK_OBJECT_TYPE.TAG: return 'tag'; |
| 31 | default: throw new Error(`Unknown pack object type: ${type}`); |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | export function stringToPackType(type: GitObjectType): number { |
| 36 | switch (type) { |
| 37 | case 'commit': return PACK_OBJECT_TYPE.COMMIT; |
| 38 | case 'tree': return PACK_OBJECT_TYPE.TREE; |
| 39 | case 'blob': return PACK_OBJECT_TYPE.BLOB; |
| 40 | case 'tag': return PACK_OBJECT_TYPE.TAG; |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | /** A parsed git object with its type, content, and computed SHA. */ |
| 45 | export interface GitObject { |
| 46 | type: GitObjectType; |
| 47 | content: Uint8Array; |
| 48 | sha: string; |
| 49 | } |
| 50 | |
| 51 | /** Serialize a git object into its storable form (header + content, before compression). */ |
| 52 | export function serializeGitObject(type: GitObjectType, content: Uint8Array): Uint8Array { |
| 53 | const header = encoder.encode(`${type} ${content.length}\0`); |
| 54 | const result = new Uint8Array(header.length + content.length); |
| 55 | result.set(header, 0); |
| 56 | result.set(content, header.length); |
| 57 | return result; |
| 58 | } |
| 59 | |
| 60 | /** |
| 61 | * Parse a raw git object (after decompression). |
| 62 | * Format: "{type} {size}\0{content}" |
| 63 | */ |
| 64 | export function parseRawGitObject(raw: Uint8Array): { type: GitObjectType; content: Uint8Array } { |
| 65 | // Find the NUL byte separating header from content |
| 66 | const nulIdx = raw.indexOf(0); |
| 67 | if (nulIdx === -1) throw new Error('Invalid git object: no NUL separator'); |
| 68 | |
| 69 | const headerStr = decoder.decode(raw.slice(0, nulIdx)); |
| 70 | const spaceIdx = headerStr.indexOf(' '); |
| 71 | if (spaceIdx === -1) throw new Error('Invalid git object header'); |
| 72 | |
| 73 | const type = headerStr.slice(0, spaceIdx) as GitObjectType; |
| 74 | const size = parseInt(headerStr.slice(spaceIdx + 1), 10); |
| 75 | const content = raw.slice(nulIdx + 1); |
| 76 | |
| 77 | if (content.length !== size) { |
| 78 | throw new Error(`Git object size mismatch: header says ${size}, got ${content.length}`); |
| 79 | } |
| 80 | |
| 81 | return { type, content }; |
| 82 | } |
| 83 | |
| 84 | // --- Tree parsing --- |
| 85 | |
| 86 | export interface TreeEntry { |
| 87 | mode: string; // e.g., "100644", "040000", "120000" |
| 88 | name: string; |
| 89 | sha: string; // 40-char hex |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * Parse a tree object's content into entries. |
| 94 | * Tree format: repeated "{mode} {name}\0{20-byte-sha}" |
| 95 | */ |
| 96 | export function parseTree(content: Uint8Array): TreeEntry[] { |
| 97 | const entries: TreeEntry[] = []; |
| 98 | let offset = 0; |
| 99 | |
| 100 | while (offset < content.length) { |
| 101 | // Find space separating mode from name |
| 102 | const spaceIdx = content.indexOf(0x20, offset); // space |
| 103 | if (spaceIdx === -1) break; |
| 104 | const mode = decoder.decode(content.slice(offset, spaceIdx)); |
| 105 | |
| 106 | // Find NUL separating name from SHA |
| 107 | const nulIdx = content.indexOf(0, spaceIdx + 1); |
| 108 | if (nulIdx === -1) break; |
| 109 | const name = decoder.decode(content.slice(spaceIdx + 1, nulIdx)); |
| 110 | |
| 111 | // Next 20 bytes are the binary SHA-1 |
| 112 | const shaBytes = content.slice(nulIdx + 1, nulIdx + 21); |
| 113 | let sha = ''; |
| 114 | for (let i = 0; i < 20; i++) { |
| 115 | sha += shaBytes[i].toString(16).padStart(2, '0'); |
| 116 | } |
| 117 | |
| 118 | entries.push({ mode, name, sha }); |
| 119 | offset = nulIdx + 21; |
| 120 | } |
| 121 | |
| 122 | return entries; |
| 123 | } |
| 124 | |
| 125 | /** Serialize tree entries into tree object content. */ |
| 126 | export function serializeTree(entries: TreeEntry[]): Uint8Array { |
| 127 | const parts: Uint8Array[] = []; |
| 128 | |
| 129 | for (const entry of entries) { |
| 130 | const modeAndName = encoder.encode(`${entry.mode} ${entry.name}\0`); |
| 131 | const shaBytes = new Uint8Array(20); |
| 132 | for (let i = 0; i < 20; i++) { |
| 133 | shaBytes[i] = parseInt(entry.sha.substring(i * 2, i * 2 + 2), 16); |
| 134 | } |
| 135 | const entryBuf = new Uint8Array(modeAndName.length + 20); |
| 136 | entryBuf.set(modeAndName, 0); |
| 137 | entryBuf.set(shaBytes, modeAndName.length); |
| 138 | parts.push(entryBuf); |
| 139 | } |
| 140 | |
| 141 | const totalLen = parts.reduce((sum, p) => sum + p.length, 0); |
| 142 | const result = new Uint8Array(totalLen); |
| 143 | let offset = 0; |
| 144 | for (const part of parts) { |
| 145 | result.set(part, offset); |
| 146 | offset += part.length; |
| 147 | } |
| 148 | return result; |
| 149 | } |
| 150 | |
| 151 | // --- Commit parsing --- |
| 152 | |
| 153 | export interface ParsedCommit { |
| 154 | tree: string; |
| 155 | parents: string[]; |
| 156 | author: string; |
| 157 | committer: string; |
| 158 | message: string; |
| 159 | } |
| 160 | |
| 161 | /** Parse a commit object's content. */ |
| 162 | export function parseCommit(content: Uint8Array): ParsedCommit { |
| 163 | const text = decoder.decode(content); |
| 164 | const lines = text.split('\n'); |
| 165 | const result: ParsedCommit = { |
| 166 | tree: '', |
| 167 | parents: [], |
| 168 | author: '', |
| 169 | committer: '', |
| 170 | message: '', |
| 171 | }; |
| 172 | |
| 173 | let i = 0; |
| 174 | // Parse header lines |
| 175 | for (; i < lines.length; i++) { |
| 176 | const line = lines[i]; |
| 177 | if (line === '') { |
| 178 | // Empty line separates headers from message |
| 179 | i++; |
| 180 | break; |
| 181 | } |
| 182 | if (line.startsWith('tree ')) { |
| 183 | result.tree = line.slice(5); |
| 184 | } else if (line.startsWith('parent ')) { |
| 185 | result.parents.push(line.slice(7)); |
| 186 | } else if (line.startsWith('author ')) { |
| 187 | result.author = line.slice(7); |
| 188 | } else if (line.startsWith('committer ')) { |
| 189 | result.committer = line.slice(10); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | // Remaining lines are the commit message |
| 194 | result.message = lines.slice(i).join('\n'); |
| 195 | |
| 196 | return result; |
| 197 | } |
| 198 |