zip.zig 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. /// The .ZIP File Format Specification is found here:
  2. /// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt
  3. const std = @import("std");
  4. const testing = std.testing;
  5. pub const File = @import("zip/test.zig").File;
  6. pub const FileCache = @import("zip/test.zig").FileCache;
  7. pub const writeFile = @import("zip/test.zig").writeFile;
  8. pub const CompressionMethod = enum(u16) {
  9. store = 0,
  10. deflate = 8,
  11. deflate64 = 9,
  12. _,
  13. };
  14. pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 };
  15. pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 };
  16. pub const end_of_central_directory_sig = [4]u8{ 'P', 'K', 5, 6 };
  17. pub const LocalFileHeader = struct {
  18. signature: [4]u8,
  19. minimum_version: u16,
  20. flags: u16,
  21. compression_method: CompressionMethod,
  22. last_modification_time: u16,
  23. last_modification_date: u16,
  24. crc32: u32,
  25. compressed_size: u32,
  26. uncompressed_size: u32,
  27. filename_len: u16,
  28. extra_len: u16,
  29. pub fn deserialize(bytes: [30]u8) LocalFileHeader {
  30. return .{
  31. .signature = bytes[0..4].*,
  32. .minimum_version = std.mem.readInt(u16, bytes[4..6], .little),
  33. .flags = std.mem.readInt(u16, bytes[6..8], .little),
  34. .compression_method = @enumFromInt(std.mem.readInt(u16, bytes[8..10], .little)),
  35. .last_modification_time = std.mem.readInt(u16, bytes[10..12], .little),
  36. .last_modification_date = std.mem.readInt(u16, bytes[12..14], .little),
  37. .crc32 = std.mem.readInt(u32, bytes[14..18], .little),
  38. .compressed_size = std.mem.readInt(u32, bytes[18..22], .little),
  39. .uncompressed_size = std.mem.readInt(u32, bytes[22..26], .little),
  40. .filename_len = std.mem.readInt(u16, bytes[26..28], .little),
  41. .extra_len = std.mem.readInt(u16, bytes[28..30], .little),
  42. };
  43. }
  44. pub fn serialize(self: LocalFileHeader) [30]u8 {
  45. var result: [30]u8 = undefined;
  46. result[0..4].* = self.signature;
  47. std.mem.writeInt(u16, result[4..6], self.minimum_version, .little);
  48. std.mem.writeInt(u16, result[6..8], self.flags, .little);
  49. std.mem.writeInt(u16, result[8..10], @intFromEnum(self.compression_method), .little);
  50. std.mem.writeInt(u16, result[10..12], self.last_modification_time, .little);
  51. std.mem.writeInt(u16, result[12..14], self.last_modification_date, .little);
  52. std.mem.writeInt(u32, result[14..18], self.crc32, .little);
  53. std.mem.writeInt(u32, result[18..22], self.compressed_size, .little);
  54. std.mem.writeInt(u32, result[22..26], self.uncompressed_size, .little);
  55. std.mem.writeInt(u16, result[26..28], self.filename_len, .little);
  56. std.mem.writeInt(u16, result[28..30], self.extra_len, .little);
  57. return result;
  58. }
  59. };
  60. pub const CentralDirectoryFileHeader = struct {
  61. signature: [4]u8,
  62. version: u16,
  63. minimum_version: u16,
  64. flags: u16,
  65. compression_method: CompressionMethod,
  66. last_modification_time: u16,
  67. last_modification_date: u16,
  68. crc32: u32,
  69. compressed_size: u32,
  70. uncompressed_size: u32,
  71. filename_len: u16,
  72. extra_len: u16,
  73. comment_len: u16,
  74. disk_number: u16,
  75. internal_file_attributes: u16,
  76. external_file_attributes: u32,
  77. local_file_header_offset: u32,
  78. pub fn deserialize(bytes: [46]u8) CentralDirectoryFileHeader {
  79. return .{
  80. .signature = bytes[0..4].*,
  81. .version = std.mem.readInt(u16, bytes[4..6], .little),
  82. .minimum_version = std.mem.readInt(u16, bytes[6..8], .little),
  83. .flags = std.mem.readInt(u16, bytes[8..10], .little),
  84. .compression_method = @enumFromInt(std.mem.readInt(u16, bytes[10..12], .little)),
  85. .last_modification_time = std.mem.readInt(u16, bytes[12..14], .little),
  86. .last_modification_date = std.mem.readInt(u16, bytes[14..16], .little),
  87. .crc32 = std.mem.readInt(u32, bytes[16..20], .little),
  88. .compressed_size = std.mem.readInt(u32, bytes[20..24], .little),
  89. .uncompressed_size = std.mem.readInt(u32, bytes[24..28], .little),
  90. .filename_len = std.mem.readInt(u16, bytes[28..30], .little),
  91. .extra_len = std.mem.readInt(u16, bytes[30..32], .little),
  92. .comment_len = std.mem.readInt(u16, bytes[32..34], .little),
  93. .disk_number = std.mem.readInt(u16, bytes[34..36], .little),
  94. .internal_file_attributes = std.mem.readInt(u16, bytes[36..38], .little),
  95. .external_file_attributes = std.mem.readInt(u32, bytes[38..42], .little),
  96. .local_file_header_offset = std.mem.readInt(u32, bytes[42..46], .little),
  97. };
  98. }
  99. pub fn serialize(self: CentralDirectoryFileHeader) [46]u8 {
  100. var result: [46]u8 = undefined;
  101. result[0..4].* = self.signature;
  102. std.mem.writeInt(u16, result[4..6], self.version, .little);
  103. std.mem.writeInt(u16, result[6..8], self.minimum_version, .little);
  104. std.mem.writeInt(u16, result[8..10], self.flags, .little);
  105. std.mem.writeInt(u16, result[10..12], @intFromEnum(self.compression_method), .little);
  106. std.mem.writeInt(u16, result[12..14], self.last_modification_time, .little);
  107. std.mem.writeInt(u16, result[14..16], self.last_modification_date, .little);
  108. std.mem.writeInt(u32, result[16..20], self.crc32, .little);
  109. std.mem.writeInt(u32, result[20..24], self.compressed_size, .little);
  110. std.mem.writeInt(u32, result[24..28], self.uncompressed_size, .little);
  111. std.mem.writeInt(u16, result[28..30], self.filename_len, .little);
  112. std.mem.writeInt(u16, result[30..32], self.extra_len, .little);
  113. std.mem.writeInt(u16, result[32..34], self.comment_len, .little);
  114. std.mem.writeInt(u16, result[34..36], self.disk_number, .little);
  115. std.mem.writeInt(u16, result[36..38], self.internal_file_attributes, .little);
  116. std.mem.writeInt(u32, result[38..42], self.external_file_attributes, .little);
  117. std.mem.writeInt(u32, result[42..46], self.local_file_header_offset, .little);
  118. return result;
  119. }
  120. };
  121. pub const EndOfCentralDirectoryRecord = struct {
  122. disk_number: u16,
  123. central_directory_disk_number: u16,
  124. record_count_disk: u16,
  125. record_count_total: u16,
  126. central_directory_size: u32,
  127. central_directory_offset: u32,
  128. comment_len: u16,
  129. pub fn read(bytes: [22]u8) EndOfCentralDirectoryRecord {
  130. return EndOfCentralDirectoryRecord{
  131. .disk_number = std.mem.readInt(u16, bytes[4..6], .little),
  132. .central_directory_disk_number = std.mem.readInt(u16, bytes[6..8], .little),
  133. .record_count_disk = std.mem.readInt(u16, bytes[8..10], .little),
  134. .record_count_total = std.mem.readInt(u16, bytes[10..12], .little),
  135. .central_directory_size = std.mem.readInt(u32, bytes[12..16], .little),
  136. .central_directory_offset = std.mem.readInt(u32, bytes[16..20], .little),
  137. .comment_len = std.mem.readInt(u16, bytes[20..22], .little),
  138. };
  139. }
  140. pub fn serialize(self: EndOfCentralDirectoryRecord) [22]u8 {
  141. var result: [22]u8 = undefined;
  142. result[0..4].* = end_of_central_directory_sig;
  143. std.mem.writeInt(u16, result[4..6], self.disk_number, .little);
  144. std.mem.writeInt(u16, result[6..8], self.central_directory_disk_number, .little);
  145. std.mem.writeInt(u16, result[8..10], self.record_count_disk, .little);
  146. std.mem.writeInt(u16, result[10..12], self.record_count_total, .little);
  147. std.mem.writeInt(u32, result[12..16], self.central_directory_size, .little);
  148. std.mem.writeInt(u32, result[16..20], self.central_directory_offset, .little);
  149. std.mem.writeInt(u16, result[20..22], self.comment_len, .little);
  150. return result;
  151. }
  152. };
  153. pub fn findEocdr(file: std.fs.File) ![22]u8 {
  154. // The EOCD record can contain a variable-length comment at the end,
  155. // which makes ZIP file parsing ambiguous in general, since a valid
  156. // comment could contain the bytes of another valid EOCD record.
  157. // Here we just search backwards for the first instance of the EOCD
  158. // signature, and return an error if a valid EOCD record doesn't follow.
  159. // TODO: make this more efficient
  160. // we need a backward_buffered_reader
  161. const file_size = try file.getEndPos();
  162. const record_len = 22;
  163. var record: [record_len]u8 = undefined;
  164. if (file_size < record_len)
  165. return error.ZipTruncated;
  166. try file.seekFromEnd(-record_len);
  167. {
  168. const len = try file.readAll(&record);
  169. if (len != record_len)
  170. return error.ZipTruncated;
  171. }
  172. var comment_len: u16 = 0;
  173. while (true) {
  174. if (std.mem.eql(u8, record[0..4], &end_of_central_directory_sig) and
  175. std.mem.readInt(u16, record[20..22], .little) == comment_len)
  176. {
  177. break;
  178. }
  179. if (comment_len == std.math.maxInt(u16))
  180. return error.ZipMissingEocdr;
  181. std.mem.copyBackwards(u8, record[1..], record[0 .. record.len - 1]);
  182. comment_len += 1;
  183. if (@as(u64, record_len) + @as(u64, comment_len) > file_size)
  184. return error.ZipMissingEocdr;
  185. try file.seekFromEnd(-record_len - @as(i64, comment_len));
  186. {
  187. const len = try file.readAll(record[0..1]);
  188. if (len != 1)
  189. return error.ZipTruncated;
  190. }
  191. }
  192. return record;
  193. }
  194. fn LimitedReader(comptime UnderlyingReader: type) type {
  195. return struct {
  196. const Self = @This();
  197. underlying_reader: UnderlyingReader,
  198. remaining: usize,
  199. pub const Error = UnderlyingReader.Error;
  200. pub const Reader = std.io.Reader(*Self, Error, read);
  201. fn read(self: *Self, buffer: []u8) Error!usize {
  202. const next_read_len = @min(buffer.len, self.remaining);
  203. if (next_read_len == 0) return 0;
  204. const len = try self.underlying_reader.read(buffer[0..next_read_len]);
  205. self.remaining -= len;
  206. return len;
  207. }
  208. pub fn reader(self: *Self) Reader {
  209. return Reader{ .context = self };
  210. }
  211. };
  212. }
  213. fn limited_reader(reader: anytype, limit: usize) LimitedReader(@TypeOf(reader)) {
  214. return .{
  215. .underlying_reader = reader,
  216. .remaining = limit,
  217. };
  218. }
  219. /// `decompress` returns the actual CRC-32 of the decompressed bytes,
  220. /// which should be validated against the expected entry.crc32 value.
  221. /// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method.
  222. pub fn decompress(
  223. method: CompressionMethod,
  224. compressed_size: u32,
  225. uncompressed_size: u32,
  226. reader: anytype,
  227. writer: anytype,
  228. ) !u32 {
  229. var hash = std.hash.Crc32.init();
  230. switch (method) {
  231. .store => {
  232. if (compressed_size != uncompressed_size)
  233. return error.ZipUncompressSizeMismatch;
  234. var buf: [std.mem.page_size]u8 = undefined;
  235. var remaining: u32 = compressed_size;
  236. while (remaining > 0) {
  237. const chunk = buf[0..@min(remaining, buf.len)];
  238. try reader.readNoEof(chunk);
  239. try writer.writeAll(chunk);
  240. hash.update(chunk);
  241. remaining -= @intCast(chunk.len);
  242. }
  243. },
  244. .deflate, .deflate64 => {
  245. var br = std.io.bufferedReader(reader);
  246. var lr = limited_reader(br.reader(), compressed_size);
  247. var total_uncompressed: u32 = 0;
  248. var decompressor = std.compress.flate.decompressor(lr.reader());
  249. while (try decompressor.next()) |chunk| {
  250. try writer.writeAll(chunk);
  251. hash.update(chunk);
  252. total_uncompressed += @intCast(chunk.len);
  253. }
  254. if (total_uncompressed != uncompressed_size)
  255. return error.ZipUncompressSizeMismatch;
  256. },
  257. _ => return error.UnsupportedCompressionMethod,
  258. }
  259. return hash.final();
  260. }
  261. pub const Iterator = struct {
  262. file: std.fs.File,
  263. eocdr: EndOfCentralDirectoryRecord,
  264. next_central_header_index: u16,
  265. next_central_header_offset: u64,
  266. pub fn init(file: std.fs.File) !Iterator {
  267. const eocdr = blk: {
  268. const eocdr_bytes = try findEocdr(file);
  269. break :blk EndOfCentralDirectoryRecord.read(eocdr_bytes);
  270. };
  271. // Don't support multi-disk archives.
  272. if (eocdr.disk_number != 0 or
  273. eocdr.central_directory_disk_number != 0 or
  274. eocdr.record_count_disk != eocdr.record_count_total)
  275. {
  276. return error.ZipUnsupportedMultiDisk;
  277. }
  278. return .{
  279. .file = file,
  280. .eocdr = eocdr,
  281. .next_central_header_offset = 0,
  282. .next_central_header_index = 0,
  283. };
  284. }
  285. pub fn next(self: *Iterator) !?Entry {
  286. if (self.next_central_header_index >= self.eocdr.record_count_total) {
  287. return null;
  288. }
  289. const header_file_offset: u64 = @as(u64, self.eocdr.central_directory_offset) + self.next_central_header_offset;
  290. const header = blk: {
  291. try self.file.seekTo(header_file_offset);
  292. var header: [46]u8 = undefined;
  293. const len = try self.file.readAll(&header);
  294. if (len != header.len)
  295. return error.ZipTruncated;
  296. break :blk CentralDirectoryFileHeader.deserialize(header);
  297. };
  298. if (!std.mem.eql(u8, &header.signature, &central_file_header_sig))
  299. return error.ZipHeader;
  300. self.next_central_header_index += 1;
  301. self.next_central_header_offset += 46 + header.filename_len + header.extra_len + header.comment_len;
  302. if (header.disk_number != 0)
  303. return error.ZipUnsupportedMultiDisk;
  304. return .{
  305. .header_file_offset = header_file_offset,
  306. .header = header,
  307. };
  308. }
  309. pub const Entry = struct {
  310. header_file_offset: u64,
  311. header: CentralDirectoryFileHeader,
  312. pub fn extract(self: Entry, zip_file: std.fs.File, filename_buf: []u8, dest: std.fs.Dir) !u32 {
  313. if (filename_buf.len < self.header.filename_len)
  314. return error.ZipInsufficientBuffer;
  315. const filename = filename_buf[0..self.header.filename_len];
  316. try zip_file.seekTo(self.header_file_offset + 46);
  317. {
  318. const len = try zip_file.readAll(filename);
  319. if (len != filename.len)
  320. return error.ZipTruncated;
  321. }
  322. const local_data_header_offset: u64 = local_data_header_offset: {
  323. const local_header = blk: {
  324. try zip_file.seekTo(self.header.local_file_header_offset);
  325. var local_header: [30]u8 = undefined;
  326. const len = try zip_file.readAll(&local_header);
  327. if (len != local_header.len)
  328. return error.ZipTruncated;
  329. break :blk LocalFileHeader.deserialize(local_header);
  330. };
  331. if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig))
  332. return error.ZipHeader;
  333. // TODO: verify minimum_version
  334. // TODO: verify flags
  335. // TODO: verify compression method
  336. // TODO: verify last_mod_time
  337. // TODO: verify last_mod_date
  338. // TODO: verify filename_len and filename?
  339. // TODO: extra?
  340. if (local_header.crc32 != 0 and local_header.crc32 != self.header.crc32)
  341. return error.ZipRedundancyFail;
  342. if (local_header.compressed_size != 0 and
  343. local_header.compressed_size != self.header.compressed_size)
  344. return error.ZipRedundancyFail;
  345. if (local_header.uncompressed_size != 0 and
  346. local_header.uncompressed_size != self.header.uncompressed_size)
  347. return error.ZipRedundancyFail;
  348. break :local_data_header_offset @as(u64, local_header.filename_len) +
  349. @as(u64, local_header.extra_len);
  350. };
  351. if (filename.len == 0 or filename[0] == '/') {
  352. return error.Invalid;
  353. }
  354. // All entries that end in '/' are directories
  355. if (filename[filename.len - 1] == '/') {
  356. if (self.header.uncompressed_size != 0)
  357. return error.ZipInvalid;
  358. try dest.makePath(filename[0 .. filename.len - 1]);
  359. return std.hash.Crc32.hash(&.{});
  360. }
  361. const out_file = blk: {
  362. if (std.fs.path.dirname(filename)) |dirname| {
  363. var parent_dir = try dest.makeOpenPath(dirname, .{});
  364. defer parent_dir.close();
  365. const basename = std.fs.path.basename(filename);
  366. break :blk try parent_dir.createFile(basename, .{ .exclusive = true });
  367. }
  368. break :blk try dest.createFile(filename, .{ .exclusive = true });
  369. };
  370. defer out_file.close();
  371. const local_data_file_offset: u64 =
  372. @as(u64, self.header.local_file_header_offset) +
  373. @as(u64, 30) +
  374. local_data_header_offset;
  375. try zip_file.seekTo(local_data_file_offset);
  376. return try decompress(
  377. self.header.compression_method,
  378. self.header.compressed_size,
  379. self.header.uncompressed_size,
  380. zip_file.reader(),
  381. out_file.writer(),
  382. );
  383. }
  384. };
  385. };
  386. pub fn pipeToFileSystem(dest: std.fs.Dir, file: std.fs.File) !void {
  387. var iter = try Iterator.init(file);
  388. var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
  389. while (try iter.next()) |entry| {
  390. const crc32 = try entry.extract(file, &filename_buf, dest);
  391. if (crc32 != entry.header.crc32)
  392. return error.ZipCrcMismatch;
  393. }
  394. }
  395. fn testZip(comptime files: []const File) !void {
  396. var cache: [files.len]FileCache = undefined;
  397. try testZipWithCache(files, &cache);
  398. }
  399. fn testZipWithCache(files: []const File, cache: []FileCache) !void {
  400. var tmp = testing.tmpDir(.{ .no_follow = true });
  401. defer tmp.cleanup();
  402. const dir = tmp.dir;
  403. {
  404. var file = try dir.createFile("zip", .{});
  405. defer file.close();
  406. try writeFile(file, files, cache);
  407. }
  408. var zip_file = try dir.openFile("zip", .{});
  409. defer zip_file.close();
  410. try pipeToFileSystem(dir, zip_file);
  411. for (files) |test_file| {
  412. var file = try dir.openFile(test_file.name, .{});
  413. defer file.close();
  414. var buf: [4096]u8 = undefined;
  415. const n = try file.reader().readAll(&buf);
  416. try testing.expectEqualStrings(test_file.content, buf[0..n]);
  417. }
  418. }
  419. test "zip one file" {
  420. try testZip(&[_]File{
  421. .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store },
  422. });
  423. }
  424. test "zip multiple files" {
  425. try testZip(&[_]File{
  426. .{ .name = "foo", .content = "a foo file\n", .compression = .store },
  427. .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store },
  428. .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store },
  429. });
  430. }
  431. test "zip deflated" {
  432. try testZip(&[_]File{
  433. .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate },
  434. .{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 },
  435. .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store },
  436. });
  437. }