zip.zig 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797
  1. /// The .ZIP File Format Specification is found here:
  2. /// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt
  3. ///
  4. /// Note that this file uses the abbreviation "cd" for "central directory"
  5. ///
  6. const builtin = @import("builtin");
  7. const std = @import("std");
  8. const testing = std.testing;
  9. pub const testutil = @import("zip/test.zig");
  10. const File = testutil.File;
  11. const FileStore = testutil.FileStore;
  12. pub const CompressionMethod = enum(u16) {
  13. store = 0,
  14. deflate = 8,
  15. _,
  16. };
  17. pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 };
  18. pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 };
  19. pub const end_record_sig = [4]u8{ 'P', 'K', 5, 6 };
  20. pub const end_record64_sig = [4]u8{ 'P', 'K', 6, 6 };
  21. pub const end_locator64_sig = [4]u8{ 'P', 'K', 6, 7 };
  22. pub const ExtraHeader = enum(u16) {
  23. zip64_info = 0x1,
  24. _,
  25. };
  26. const GeneralPurposeFlags = packed struct(u16) {
  27. encrypted: bool,
  28. _: u15,
  29. };
  30. pub const LocalFileHeader = extern struct {
  31. signature: [4]u8 align(1),
  32. version_needed_to_extract: u16 align(1),
  33. flags: GeneralPurposeFlags align(1),
  34. compression_method: CompressionMethod align(1),
  35. last_modification_time: u16 align(1),
  36. last_modification_date: u16 align(1),
  37. crc32: u32 align(1),
  38. compressed_size: u32 align(1),
  39. uncompressed_size: u32 align(1),
  40. filename_len: u16 align(1),
  41. extra_len: u16 align(1),
  42. };
  43. pub const CentralDirectoryFileHeader = extern struct {
  44. signature: [4]u8 align(1),
  45. version_made_by: u16 align(1),
  46. version_needed_to_extract: u16 align(1),
  47. flags: GeneralPurposeFlags align(1),
  48. compression_method: CompressionMethod align(1),
  49. last_modification_time: u16 align(1),
  50. last_modification_date: u16 align(1),
  51. crc32: u32 align(1),
  52. compressed_size: u32 align(1),
  53. uncompressed_size: u32 align(1),
  54. filename_len: u16 align(1),
  55. extra_len: u16 align(1),
  56. comment_len: u16 align(1),
  57. disk_number: u16 align(1),
  58. internal_file_attributes: u16 align(1),
  59. external_file_attributes: u32 align(1),
  60. local_file_header_offset: u32 align(1),
  61. };
  62. pub const EndRecord64 = extern struct {
  63. signature: [4]u8 align(1),
  64. end_record_size: u64 align(1),
  65. version_made_by: u16 align(1),
  66. version_needed_to_extract: u16 align(1),
  67. disk_number: u32 align(1),
  68. central_directory_disk_number: u32 align(1),
  69. record_count_disk: u64 align(1),
  70. record_count_total: u64 align(1),
  71. central_directory_size: u64 align(1),
  72. central_directory_offset: u64 align(1),
  73. };
  74. pub const EndLocator64 = extern struct {
  75. signature: [4]u8 align(1),
  76. zip64_disk_count: u32 align(1),
  77. record_file_offset: u64 align(1),
  78. total_disk_count: u32 align(1),
  79. };
  80. pub const EndRecord = extern struct {
  81. signature: [4]u8 align(1),
  82. disk_number: u16 align(1),
  83. central_directory_disk_number: u16 align(1),
  84. record_count_disk: u16 align(1),
  85. record_count_total: u16 align(1),
  86. central_directory_size: u32 align(1),
  87. central_directory_offset: u32 align(1),
  88. comment_len: u16 align(1),
  89. pub fn need_zip64(self: EndRecord) bool {
  90. return isMaxInt(self.record_count_disk) or
  91. isMaxInt(self.record_count_total) or
  92. isMaxInt(self.central_directory_size) or
  93. isMaxInt(self.central_directory_offset);
  94. }
  95. };
  96. /// Find and return the end record for the given seekable zip stream.
  97. /// Note that `seekable_stream` must be an instance of `std.io.SeekabkeStream` and
  98. /// its context must also have a `.reader()` method that returns an instance of
  99. /// `std.io.Reader`.
  100. pub fn findEndRecord(seekable_stream: anytype, stream_len: u64) !EndRecord {
  101. var buf: [@sizeOf(EndRecord) + std.math.maxInt(u16)]u8 = undefined;
  102. const record_len_max = @min(stream_len, buf.len);
  103. var loaded_len: u32 = 0;
  104. var comment_len: u16 = 0;
  105. while (true) {
  106. const record_len: u32 = @as(u32, comment_len) + @sizeOf(EndRecord);
  107. if (record_len > record_len_max)
  108. return error.ZipNoEndRecord;
  109. if (record_len > loaded_len) {
  110. const new_loaded_len = @min(loaded_len + 300, record_len_max);
  111. const read_len = new_loaded_len - loaded_len;
  112. try seekable_stream.seekTo(stream_len - @as(u64, new_loaded_len));
  113. const read_buf: []u8 = buf[buf.len - new_loaded_len ..][0..read_len];
  114. const len = try seekable_stream.context.reader().readAll(read_buf);
  115. if (len != read_len)
  116. return error.ZipTruncated;
  117. loaded_len = new_loaded_len;
  118. }
  119. const record_bytes = buf[buf.len - record_len ..][0..@sizeOf(EndRecord)];
  120. if (std.mem.eql(u8, record_bytes[0..4], &end_record_sig) and
  121. std.mem.readInt(u16, record_bytes[20..22], .little) == comment_len)
  122. {
  123. const record: *align(1) EndRecord = @ptrCast(record_bytes.ptr);
  124. if (builtin.target.cpu.arch.endian() != .little) {
  125. std.mem.byteSwapAllFields(@TypeOf(record.*), record);
  126. }
  127. return record.*;
  128. }
  129. if (comment_len == std.math.maxInt(u16))
  130. return error.ZipNoEndRecord;
  131. comment_len += 1;
  132. }
  133. }
  134. /// Decompresses the given data from `reader` into `writer`. Stops early if more
  135. /// than `uncompressed_size` bytes are processed and verifies that exactly that
  136. /// number of bytes are decompressed. Returns the CRC-32 of the uncompressed data.
  137. /// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method.
  138. pub fn decompress(
  139. method: CompressionMethod,
  140. uncompressed_size: u64,
  141. reader: anytype,
  142. writer: anytype,
  143. ) !u32 {
  144. var hash = std.hash.Crc32.init();
  145. var total_uncompressed: u64 = 0;
  146. switch (method) {
  147. .store => {
  148. var buf: [std.mem.page_size]u8 = undefined;
  149. while (true) {
  150. const len = try reader.read(&buf);
  151. if (len == 0) break;
  152. try writer.writeAll(buf[0..len]);
  153. hash.update(buf[0..len]);
  154. total_uncompressed += @intCast(len);
  155. }
  156. },
  157. .deflate => {
  158. var br = std.io.bufferedReader(reader);
  159. var decompressor = std.compress.flate.decompressor(br.reader());
  160. while (try decompressor.next()) |chunk| {
  161. try writer.writeAll(chunk);
  162. hash.update(chunk);
  163. total_uncompressed += @intCast(chunk.len);
  164. if (total_uncompressed > uncompressed_size)
  165. return error.ZipUncompressSizeTooSmall;
  166. }
  167. if (br.end != br.start)
  168. return error.ZipDeflateTruncated;
  169. },
  170. _ => return error.UnsupportedCompressionMethod,
  171. }
  172. if (total_uncompressed != uncompressed_size)
  173. return error.ZipUncompressSizeMismatch;
  174. return hash.final();
  175. }
  176. fn isBadFilename(filename: []const u8) bool {
  177. if (filename.len == 0 or filename[0] == '/')
  178. return true;
  179. var it = std.mem.splitScalar(u8, filename, '/');
  180. while (it.next()) |part| {
  181. if (std.mem.eql(u8, part, ".."))
  182. return true;
  183. }
  184. return false;
  185. }
  186. fn isMaxInt(uint: anytype) bool {
  187. return uint == std.math.maxInt(@TypeOf(uint));
  188. }
  189. const FileExtents = struct {
  190. uncompressed_size: u64,
  191. compressed_size: u64,
  192. local_file_header_offset: u64,
  193. };
  194. fn readZip64FileExtents(header: CentralDirectoryFileHeader, extents: *FileExtents, data: []u8) !void {
  195. var data_offset: usize = 0;
  196. if (isMaxInt(header.uncompressed_size)) {
  197. if (data_offset + 8 > data.len)
  198. return error.ZipBadCd64Size;
  199. extents.uncompressed_size = std.mem.readInt(u64, data[data_offset..][0..8], .little);
  200. data_offset += 8;
  201. }
  202. if (isMaxInt(header.compressed_size)) {
  203. if (data_offset + 8 > data.len)
  204. return error.ZipBadCd64Size;
  205. extents.compressed_size = std.mem.readInt(u64, data[data_offset..][0..8], .little);
  206. data_offset += 8;
  207. }
  208. if (isMaxInt(header.local_file_header_offset)) {
  209. if (data_offset + 8 > data.len)
  210. return error.ZipBadCd64Size;
  211. extents.local_file_header_offset = std.mem.readInt(u64, data[data_offset..][0..8], .little);
  212. data_offset += 8;
  213. }
  214. if (isMaxInt(header.disk_number)) {
  215. if (data_offset + 4 > data.len)
  216. return error.ZipInvalid;
  217. const disk_number = std.mem.readInt(u32, data[data_offset..][0..4], .little);
  218. if (disk_number != 0)
  219. return error.ZipMultiDiskUnsupported;
  220. data_offset += 4;
  221. }
  222. if (data_offset > data.len)
  223. return error.ZipBadCd64Size;
  224. }
  225. pub fn Iterator(comptime SeekableStream: type) type {
  226. return struct {
  227. stream: SeekableStream,
  228. cd_record_count: u64,
  229. cd_zip_offset: u64,
  230. cd_size: u64,
  231. cd_record_index: u64 = 0,
  232. cd_record_offset: u64 = 0,
  233. const Self = @This();
  234. pub fn init(stream: SeekableStream) !Self {
  235. const stream_len = try stream.getEndPos();
  236. const end_record = try findEndRecord(stream, stream_len);
  237. if (!isMaxInt(end_record.record_count_disk) and end_record.record_count_disk > end_record.record_count_total)
  238. return error.ZipDiskRecordCountTooLarge;
  239. if (end_record.disk_number != 0 or end_record.central_directory_disk_number != 0)
  240. return error.ZipMultiDiskUnsupported;
  241. {
  242. const counts_valid = !isMaxInt(end_record.record_count_disk) and !isMaxInt(end_record.record_count_total);
  243. if (counts_valid and end_record.record_count_disk != end_record.record_count_total)
  244. return error.ZipMultiDiskUnsupported;
  245. }
  246. var result = Self{
  247. .stream = stream,
  248. .cd_record_count = end_record.record_count_total,
  249. .cd_zip_offset = end_record.central_directory_offset,
  250. .cd_size = end_record.central_directory_size,
  251. };
  252. if (!end_record.need_zip64()) return result;
  253. const locator_end_offset: u64 = @as(u64, end_record.comment_len) + @sizeOf(EndRecord) + @sizeOf(EndLocator64);
  254. if (locator_end_offset > stream_len)
  255. return error.ZipTruncated;
  256. try stream.seekTo(stream_len - locator_end_offset);
  257. const locator = try readStructEndian(stream.context.reader(), EndLocator64, .little);
  258. if (!std.mem.eql(u8, &locator.signature, &end_locator64_sig))
  259. return error.ZipBadLocatorSig;
  260. if (locator.zip64_disk_count != 0)
  261. return error.ZipUnsupportedZip64DiskCount;
  262. if (locator.total_disk_count != 1)
  263. return error.ZipMultiDiskUnsupported;
  264. try stream.seekTo(locator.record_file_offset);
  265. const record64 = try readStructEndian(stream.context.reader(), EndRecord64, .little);
  266. if (!std.mem.eql(u8, &record64.signature, &end_record64_sig))
  267. return error.ZipBadEndRecord64Sig;
  268. if (record64.end_record_size < @sizeOf(EndRecord64) - 12)
  269. return error.ZipEndRecord64SizeTooSmall;
  270. if (record64.end_record_size > @sizeOf(EndRecord64) - 12)
  271. return error.ZipEndRecord64UnhandledExtraData;
  272. if (record64.version_needed_to_extract > 45)
  273. return error.ZipUnsupportedVersion;
  274. {
  275. const is_multidisk = record64.disk_number != 0 or
  276. record64.central_directory_disk_number != 0 or
  277. record64.record_count_disk != record64.record_count_total;
  278. if (is_multidisk)
  279. return error.ZipMultiDiskUnsupported;
  280. }
  281. if (isMaxInt(end_record.record_count_total)) {
  282. result.cd_record_count = record64.record_count_total;
  283. } else if (end_record.record_count_total != record64.record_count_total)
  284. return error.Zip64RecordCountTotalMismatch;
  285. if (isMaxInt(end_record.central_directory_offset)) {
  286. result.cd_zip_offset = record64.central_directory_offset;
  287. } else if (end_record.central_directory_offset != record64.central_directory_offset)
  288. return error.Zip64CentralDirectoryOffsetMismatch;
  289. if (isMaxInt(end_record.central_directory_size)) {
  290. result.cd_size = record64.central_directory_size;
  291. } else if (end_record.central_directory_size != record64.central_directory_size)
  292. return error.Zip64CentralDirectorySizeMismatch;
  293. return result;
  294. }
  295. pub fn next(self: *Self) !?Entry {
  296. if (self.cd_record_index == self.cd_record_count) {
  297. if (self.cd_record_offset != self.cd_size)
  298. return if (self.cd_size > self.cd_record_offset)
  299. error.ZipCdOversized
  300. else
  301. error.ZipCdUndersized;
  302. return null;
  303. }
  304. const header_zip_offset = self.cd_zip_offset + self.cd_record_offset;
  305. try self.stream.seekTo(header_zip_offset);
  306. const header = try readStructEndian(self.stream.context.reader(), CentralDirectoryFileHeader, .little);
  307. if (!std.mem.eql(u8, &header.signature, &central_file_header_sig))
  308. return error.ZipBadCdOffset;
  309. self.cd_record_index += 1;
  310. self.cd_record_offset += @sizeOf(CentralDirectoryFileHeader) + header.filename_len + header.extra_len + header.comment_len;
  311. // Note: checking the version_needed_to_extract doesn't seem to be helpful, i.e. the zip file
  312. // at https://github.com/ninja-build/ninja/releases/download/v1.12.0/ninja-linux.zip
  313. // has an undocumented version 788 but extracts just fine.
  314. if (header.flags.encrypted)
  315. return error.ZipEncryptionUnsupported;
  316. // TODO: check/verify more flags
  317. if (header.disk_number != 0)
  318. return error.ZipMultiDiskUnsupported;
  319. var extents: FileExtents = .{
  320. .uncompressed_size = header.uncompressed_size,
  321. .compressed_size = header.compressed_size,
  322. .local_file_header_offset = header.local_file_header_offset,
  323. };
  324. if (header.extra_len > 0) {
  325. var extra_buf: [std.math.maxInt(u16)]u8 = undefined;
  326. const extra = extra_buf[0..header.extra_len];
  327. {
  328. try self.stream.seekTo(header_zip_offset + @sizeOf(CentralDirectoryFileHeader) + header.filename_len);
  329. const len = try self.stream.context.reader().readAll(extra);
  330. if (len != extra.len)
  331. return error.ZipTruncated;
  332. }
  333. var extra_offset: usize = 0;
  334. while (extra_offset + 4 <= extra.len) {
  335. const header_id = std.mem.readInt(u16, extra[extra_offset..][0..2], .little);
  336. const data_size = std.mem.readInt(u16, extra[extra_offset..][2..4], .little);
  337. const end = extra_offset + 4 + data_size;
  338. if (end > extra.len)
  339. return error.ZipBadExtraFieldSize;
  340. const data = extra[extra_offset + 4 .. end];
  341. switch (@as(ExtraHeader, @enumFromInt(header_id))) {
  342. .zip64_info => try readZip64FileExtents(header, &extents, data),
  343. else => {}, // ignore
  344. }
  345. extra_offset = end;
  346. }
  347. }
  348. return .{
  349. .version_needed_to_extract = header.version_needed_to_extract,
  350. .flags = header.flags,
  351. .compression_method = header.compression_method,
  352. .last_modification_time = header.last_modification_time,
  353. .last_modification_date = header.last_modification_date,
  354. .header_zip_offset = header_zip_offset,
  355. .crc32 = header.crc32,
  356. .filename_len = header.filename_len,
  357. .compressed_size = extents.compressed_size,
  358. .uncompressed_size = extents.uncompressed_size,
  359. .file_offset = extents.local_file_header_offset,
  360. };
  361. }
  362. pub const Entry = struct {
  363. version_needed_to_extract: u16,
  364. flags: GeneralPurposeFlags,
  365. compression_method: CompressionMethod,
  366. last_modification_time: u16,
  367. last_modification_date: u16,
  368. header_zip_offset: u64,
  369. crc32: u32,
  370. filename_len: u32,
  371. compressed_size: u64,
  372. uncompressed_size: u64,
  373. file_offset: u64,
  374. pub fn extract(
  375. self: Entry,
  376. stream: SeekableStream,
  377. options: ExtractOptions,
  378. filename_buf: []u8,
  379. dest: std.fs.Dir,
  380. ) !u32 {
  381. if (filename_buf.len < self.filename_len)
  382. return error.ZipInsufficientBuffer;
  383. const filename = filename_buf[0..self.filename_len];
  384. try stream.seekTo(self.header_zip_offset + @sizeOf(CentralDirectoryFileHeader));
  385. {
  386. const len = try stream.context.reader().readAll(filename);
  387. if (len != filename.len)
  388. return error.ZipBadFileOffset;
  389. }
  390. const local_data_header_offset: u64 = local_data_header_offset: {
  391. const local_header = blk: {
  392. try stream.seekTo(self.file_offset);
  393. break :blk try readStructEndian(stream.context.reader(), LocalFileHeader, .little);
  394. };
  395. if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig))
  396. return error.ZipBadFileOffset;
  397. if (local_header.version_needed_to_extract != self.version_needed_to_extract)
  398. return error.ZipMismatchVersionNeeded;
  399. if (local_header.last_modification_time != self.last_modification_time)
  400. return error.ZipMismatchModTime;
  401. if (local_header.last_modification_date != self.last_modification_date)
  402. return error.ZipMismatchModDate;
  403. if (@as(u16, @bitCast(local_header.flags)) != @as(u16, @bitCast(self.flags)))
  404. return error.ZipMismatchFlags;
  405. if (local_header.crc32 != 0 and local_header.crc32 != self.crc32)
  406. return error.ZipMismatchCrc32;
  407. if (local_header.compressed_size != 0 and
  408. local_header.compressed_size != self.compressed_size)
  409. return error.ZipMismatchCompLen;
  410. if (local_header.uncompressed_size != 0 and
  411. local_header.uncompressed_size != self.uncompressed_size)
  412. return error.ZipMismatchUncompLen;
  413. if (local_header.filename_len != self.filename_len)
  414. return error.ZipMismatchFilenameLen;
  415. break :local_data_header_offset @as(u64, local_header.filename_len) +
  416. @as(u64, local_header.extra_len);
  417. };
  418. if (isBadFilename(filename))
  419. return error.ZipBadFilename;
  420. if (options.allow_backslashes) {
  421. std.mem.replaceScalar(u8, filename, '\\', '/');
  422. } else {
  423. if (std.mem.indexOfScalar(u8, filename, '\\')) |_|
  424. return error.ZipFilenameHasBackslash;
  425. }
  426. // All entries that end in '/' are directories
  427. if (filename[filename.len - 1] == '/') {
  428. if (self.uncompressed_size != 0)
  429. return error.ZipBadDirectorySize;
  430. try dest.makePath(filename[0 .. filename.len - 1]);
  431. return std.hash.Crc32.hash(&.{});
  432. }
  433. const out_file = blk: {
  434. if (std.fs.path.dirname(filename)) |dirname| {
  435. var parent_dir = try dest.makeOpenPath(dirname, .{});
  436. defer parent_dir.close();
  437. const basename = std.fs.path.basename(filename);
  438. break :blk try parent_dir.createFile(basename, .{ .exclusive = true });
  439. }
  440. break :blk try dest.createFile(filename, .{ .exclusive = true });
  441. };
  442. defer out_file.close();
  443. const local_data_file_offset: u64 =
  444. @as(u64, self.file_offset) +
  445. @as(u64, @sizeOf(LocalFileHeader)) +
  446. local_data_header_offset;
  447. try stream.seekTo(local_data_file_offset);
  448. var limited_reader = std.io.limitedReader(stream.context.reader(), self.compressed_size);
  449. const crc = try decompress(
  450. self.compression_method,
  451. self.uncompressed_size,
  452. limited_reader.reader(),
  453. out_file.writer(),
  454. );
  455. if (limited_reader.bytes_left != 0)
  456. return error.ZipDecompressTruncated;
  457. return crc;
  458. }
  459. };
  460. };
  461. }
  462. // returns true if `filename` starts with `root` followed by a forward slash
  463. fn filenameInRoot(filename: []const u8, root: []const u8) bool {
  464. return (filename.len >= root.len + 1) and
  465. (filename[root.len] == '/') and
  466. std.mem.eql(u8, filename[0..root.len], root);
  467. }
  468. pub const Diagnostics = struct {
  469. allocator: std.mem.Allocator,
  470. /// The common root directory for all extracted files if there is one.
  471. root_dir: []const u8 = "",
  472. saw_first_file: bool = false,
  473. pub fn deinit(self: *Diagnostics) void {
  474. self.allocator.free(self.root_dir);
  475. self.* = undefined;
  476. }
  477. // This function assumes name is a filename from a zip file which has already been verified to
  478. // not start with a slash, backslashes have been normalized to forward slashes, and directories
  479. // always end in a slash.
  480. pub fn nextFilename(self: *Diagnostics, name: []const u8) error{OutOfMemory}!void {
  481. if (!self.saw_first_file) {
  482. self.saw_first_file = true;
  483. std.debug.assert(self.root_dir.len == 0);
  484. const root_len = std.mem.indexOfScalar(u8, name, '/') orelse return;
  485. std.debug.assert(root_len > 0);
  486. self.root_dir = try self.allocator.dupe(u8, name[0..root_len]);
  487. } else if (self.root_dir.len > 0) {
  488. if (!filenameInRoot(name, self.root_dir)) {
  489. self.allocator.free(self.root_dir);
  490. self.root_dir = "";
  491. }
  492. }
  493. }
  494. };
  495. pub const ExtractOptions = struct {
  496. /// Allow filenames within the zip to use backslashes. Back slashes are normalized
  497. /// to forward slashes before forwarding them to platform APIs.
  498. allow_backslashes: bool = false,
  499. diagnostics: ?*Diagnostics = null,
  500. };
  501. /// Extract the zipped files inside `seekable_stream` to the given `dest` directory.
  502. /// Note that `seekable_stream` must be an instance of `std.io.SeekabkeStream` and
  503. /// its context must also have a `.reader()` method that returns an instance of
  504. /// `std.io.Reader`.
  505. pub fn extract(dest: std.fs.Dir, seekable_stream: anytype, options: ExtractOptions) !void {
  506. const SeekableStream = @TypeOf(seekable_stream);
  507. var iter = try Iterator(SeekableStream).init(seekable_stream);
  508. var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
  509. while (try iter.next()) |entry| {
  510. const crc32 = try entry.extract(seekable_stream, options, &filename_buf, dest);
  511. if (crc32 != entry.crc32)
  512. return error.ZipCrcMismatch;
  513. if (options.diagnostics) |d| {
  514. try d.nextFilename(filename_buf[0..entry.filename_len]);
  515. }
  516. }
  517. }
  518. fn testZip(options: ExtractOptions, comptime files: []const File, write_opt: testutil.WriteZipOptions) !void {
  519. var store: [files.len]FileStore = undefined;
  520. try testZipWithStore(options, files, write_opt, &store);
  521. }
  522. fn testZipWithStore(
  523. options: ExtractOptions,
  524. test_files: []const File,
  525. write_opt: testutil.WriteZipOptions,
  526. store: []FileStore,
  527. ) !void {
  528. var zip_buf: [4096]u8 = undefined;
  529. var fbs = try testutil.makeZipWithStore(&zip_buf, test_files, write_opt, store);
  530. var tmp = testing.tmpDir(.{ .no_follow = true });
  531. defer tmp.cleanup();
  532. try extract(tmp.dir, fbs.seekableStream(), options);
  533. try testutil.expectFiles(test_files, tmp.dir, .{});
  534. }
  535. fn testZipError(expected_error: anyerror, file: File, options: ExtractOptions) !void {
  536. var zip_buf: [4096]u8 = undefined;
  537. var store: [1]FileStore = undefined;
  538. var fbs = try testutil.makeZipWithStore(&zip_buf, &[_]File{file}, .{}, &store);
  539. var tmp = testing.tmpDir(.{ .no_follow = true });
  540. defer tmp.cleanup();
  541. try testing.expectError(expected_error, extract(tmp.dir, fbs.seekableStream(), options));
  542. }
  543. test "zip one file" {
  544. try testZip(.{}, &[_]File{
  545. .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store },
  546. }, .{});
  547. }
  548. test "zip multiple files" {
  549. try testZip(.{ .allow_backslashes = true }, &[_]File{
  550. .{ .name = "foo", .content = "a foo file\n", .compression = .store },
  551. .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store },
  552. .{ .name = "subdir\\whoa", .content = "you can do backslashes", .compression = .store },
  553. .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store },
  554. }, .{});
  555. }
  556. test "zip deflated" {
  557. try testZip(.{}, &[_]File{
  558. .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate },
  559. // TODO: re-enable this if/when we add support for deflate64
  560. //.{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 },
  561. .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store },
  562. }, .{});
  563. }
  564. test "zip verify filenames" {
  565. // no empty filenames
  566. try testZipError(error.ZipBadFilename, .{ .name = "", .content = "", .compression = .store }, .{});
  567. // no absolute paths
  568. try testZipError(error.ZipBadFilename, .{ .name = "/", .content = "", .compression = .store }, .{});
  569. try testZipError(error.ZipBadFilename, .{ .name = "/foo", .content = "", .compression = .store }, .{});
  570. try testZipError(error.ZipBadFilename, .{ .name = "/foo/bar", .content = "", .compression = .store }, .{});
  571. // no '..' components
  572. try testZipError(error.ZipBadFilename, .{ .name = "..", .content = "", .compression = .store }, .{});
  573. try testZipError(error.ZipBadFilename, .{ .name = "foo/..", .content = "", .compression = .store }, .{});
  574. try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/..", .content = "", .compression = .store }, .{});
  575. try testZipError(error.ZipBadFilename, .{ .name = "foo/bar/../", .content = "", .compression = .store }, .{});
  576. // no backslashes
  577. try testZipError(error.ZipFilenameHasBackslash, .{ .name = "foo\\bar", .content = "", .compression = .store }, .{});
  578. }
  579. test "zip64" {
  580. const test_files = [_]File{
  581. .{ .name = "fram", .content = "fram foo fro fraba", .compression = .store },
  582. .{ .name = "subdir/barro", .content = "aljdk;jal;jfd;lajkf", .compression = .store },
  583. };
  584. try testZip(.{}, &test_files, .{
  585. .end = .{
  586. .zip64 = .{},
  587. .record_count_disk = std.math.maxInt(u16), // trigger zip64
  588. },
  589. });
  590. try testZip(.{}, &test_files, .{
  591. .end = .{
  592. .zip64 = .{},
  593. .record_count_total = std.math.maxInt(u16), // trigger zip64
  594. },
  595. });
  596. try testZip(.{}, &test_files, .{
  597. .end = .{
  598. .zip64 = .{},
  599. .record_count_disk = std.math.maxInt(u16), // trigger zip64
  600. .record_count_total = std.math.maxInt(u16), // trigger zip64
  601. },
  602. });
  603. try testZip(.{}, &test_files, .{
  604. .end = .{
  605. .zip64 = .{},
  606. .central_directory_size = std.math.maxInt(u32), // trigger zip64
  607. },
  608. });
  609. try testZip(.{}, &test_files, .{
  610. .end = .{
  611. .zip64 = .{},
  612. .central_directory_offset = std.math.maxInt(u32), // trigger zip64
  613. },
  614. });
  615. }
  616. test "bad zip files" {
  617. var tmp = testing.tmpDir(.{ .no_follow = true });
  618. defer tmp.cleanup();
  619. var zip_buf: [4096]u8 = undefined;
  620. const file_a = [_]File{.{ .name = "a", .content = "", .compression = .store }};
  621. {
  622. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .sig = [_]u8{ 1, 2, 3, 4 } } });
  623. try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{}));
  624. }
  625. {
  626. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .comment_len = 1 } });
  627. try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{}));
  628. }
  629. {
  630. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .comment = "a", .comment_len = 0 } });
  631. try testing.expectError(error.ZipNoEndRecord, extract(tmp.dir, fbs.seekableStream(), .{}));
  632. }
  633. {
  634. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .disk_number = 1 } });
  635. try testing.expectError(error.ZipMultiDiskUnsupported, extract(tmp.dir, fbs.seekableStream(), .{}));
  636. }
  637. {
  638. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_disk_number = 1 } });
  639. try testing.expectError(error.ZipMultiDiskUnsupported, extract(tmp.dir, fbs.seekableStream(), .{}));
  640. }
  641. {
  642. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .record_count_disk = 1 } });
  643. try testing.expectError(error.ZipDiskRecordCountTooLarge, extract(tmp.dir, fbs.seekableStream(), .{}));
  644. }
  645. {
  646. var fbs = try testutil.makeZip(&zip_buf, &.{}, .{ .end = .{ .central_directory_size = 1 } });
  647. try testing.expectError(error.ZipCdOversized, extract(tmp.dir, fbs.seekableStream(), .{}));
  648. }
  649. {
  650. var fbs = try testutil.makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_size = 0 } });
  651. try testing.expectError(error.ZipCdUndersized, extract(tmp.dir, fbs.seekableStream(), .{}));
  652. }
  653. {
  654. var fbs = try testutil.makeZip(&zip_buf, &file_a, .{ .end = .{ .central_directory_offset = 0 } });
  655. try testing.expectError(error.ZipBadCdOffset, extract(tmp.dir, fbs.seekableStream(), .{}));
  656. }
  657. {
  658. var fbs = try testutil.makeZip(&zip_buf, &file_a, .{
  659. .end = .{
  660. .zip64 = .{ .locator_sig = [_]u8{ 1, 2, 3, 4 } },
  661. .central_directory_size = std.math.maxInt(u32), // trigger 64
  662. },
  663. });
  664. try testing.expectError(error.ZipBadLocatorSig, extract(tmp.dir, fbs.seekableStream(), .{}));
  665. }
  666. }
  667. const native_endian = @import("builtin").target.cpu.arch.endian();
  668. pub fn readStructEndian(reader: anytype, comptime T: type, endian: std.builtin.Endian) anyerror!T {
  669. var res = try reader.readStruct(T);
  670. if (native_endian != endian) {
  671. byteSwapAllFields(T, &res);
  672. }
  673. return res;
  674. }
  675. pub fn byteSwapAllFields(comptime S: type, ptr: *S) void {
  676. switch (@typeInfo(S)) {
  677. .Struct => {
  678. inline for (std.meta.fields(S)) |f| {
  679. switch (@typeInfo(f.type)) {
  680. .Struct => |struct_info| if (struct_info.backing_integer) |Int| {
  681. @field(ptr, f.name) = @bitCast(@byteSwap(@as(Int, @bitCast(@field(ptr, f.name)))));
  682. } else {
  683. byteSwapAllFields(f.type, &@field(ptr, f.name));
  684. },
  685. .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)),
  686. .Enum => {
  687. @field(ptr, f.name) = @enumFromInt(@byteSwap(@intFromEnum(@field(ptr, f.name))));
  688. },
  689. else => {
  690. @field(ptr, f.name) = @byteSwap(@field(ptr, f.name));
  691. },
  692. }
  693. }
  694. },
  695. .Array => {
  696. for (ptr) |*item| {
  697. switch (@typeInfo(@TypeOf(item.*))) {
  698. .Struct, .Array => byteSwapAllFields(@TypeOf(item.*), item),
  699. .Enum => {
  700. item.* = @enumFromInt(@byteSwap(@intFromEnum(item.*)));
  701. },
  702. else => {
  703. item.* = @byteSwap(item.*);
  704. },
  705. }
  706. }
  707. },
  708. else => @compileError("byteSwapAllFields expects a struct or array as the first argument"),
  709. }
  710. }