123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481 |
- /// The .ZIP File Format Specification is found here:
- /// https://pkwaredownloads.blob.core.windows.net/pem/APPNOTE.txt
- const std = @import("std");
- const testing = std.testing;
- pub const File = @import("zip/test.zig").File;
- pub const FileCache = @import("zip/test.zig").FileCache;
- pub const writeFile = @import("zip/test.zig").writeFile;
- pub const CompressionMethod = enum(u16) {
- store = 0,
- deflate = 8,
- deflate64 = 9,
- _,
- };
- pub const central_file_header_sig = [4]u8{ 'P', 'K', 1, 2 };
- pub const local_file_header_sig = [4]u8{ 'P', 'K', 3, 4 };
- pub const end_of_central_directory_sig = [4]u8{ 'P', 'K', 5, 6 };
- pub const LocalFileHeader = struct {
- signature: [4]u8,
- minimum_version: u16,
- flags: u16,
- compression_method: CompressionMethod,
- last_modification_time: u16,
- last_modification_date: u16,
- crc32: u32,
- compressed_size: u32,
- uncompressed_size: u32,
- filename_len: u16,
- extra_len: u16,
- pub fn deserialize(bytes: [30]u8) LocalFileHeader {
- return .{
- .signature = bytes[0..4].*,
- .minimum_version = std.mem.readInt(u16, bytes[4..6], .little),
- .flags = std.mem.readInt(u16, bytes[6..8], .little),
- .compression_method = @enumFromInt(std.mem.readInt(u16, bytes[8..10], .little)),
- .last_modification_time = std.mem.readInt(u16, bytes[10..12], .little),
- .last_modification_date = std.mem.readInt(u16, bytes[12..14], .little),
- .crc32 = std.mem.readInt(u32, bytes[14..18], .little),
- .compressed_size = std.mem.readInt(u32, bytes[18..22], .little),
- .uncompressed_size = std.mem.readInt(u32, bytes[22..26], .little),
- .filename_len = std.mem.readInt(u16, bytes[26..28], .little),
- .extra_len = std.mem.readInt(u16, bytes[28..30], .little),
- };
- }
- pub fn serialize(self: LocalFileHeader) [30]u8 {
- var result: [30]u8 = undefined;
- result[0..4].* = self.signature;
- std.mem.writeInt(u16, result[4..6], self.minimum_version, .little);
- std.mem.writeInt(u16, result[6..8], self.flags, .little);
- std.mem.writeInt(u16, result[8..10], @intFromEnum(self.compression_method), .little);
- std.mem.writeInt(u16, result[10..12], self.last_modification_time, .little);
- std.mem.writeInt(u16, result[12..14], self.last_modification_date, .little);
- std.mem.writeInt(u32, result[14..18], self.crc32, .little);
- std.mem.writeInt(u32, result[18..22], self.compressed_size, .little);
- std.mem.writeInt(u32, result[22..26], self.uncompressed_size, .little);
- std.mem.writeInt(u16, result[26..28], self.filename_len, .little);
- std.mem.writeInt(u16, result[28..30], self.extra_len, .little);
- return result;
- }
- };
- pub const CentralDirectoryFileHeader = struct {
- signature: [4]u8,
- version: u16,
- minimum_version: u16,
- flags: u16,
- compression_method: CompressionMethod,
- last_modification_time: u16,
- last_modification_date: u16,
- crc32: u32,
- compressed_size: u32,
- uncompressed_size: u32,
- filename_len: u16,
- extra_len: u16,
- comment_len: u16,
- disk_number: u16,
- internal_file_attributes: u16,
- external_file_attributes: u32,
- local_file_header_offset: u32,
- pub fn deserialize(bytes: [46]u8) CentralDirectoryFileHeader {
- return .{
- .signature = bytes[0..4].*,
- .version = std.mem.readInt(u16, bytes[4..6], .little),
- .minimum_version = std.mem.readInt(u16, bytes[6..8], .little),
- .flags = std.mem.readInt(u16, bytes[8..10], .little),
- .compression_method = @enumFromInt(std.mem.readInt(u16, bytes[10..12], .little)),
- .last_modification_time = std.mem.readInt(u16, bytes[12..14], .little),
- .last_modification_date = std.mem.readInt(u16, bytes[14..16], .little),
- .crc32 = std.mem.readInt(u32, bytes[16..20], .little),
- .compressed_size = std.mem.readInt(u32, bytes[20..24], .little),
- .uncompressed_size = std.mem.readInt(u32, bytes[24..28], .little),
- .filename_len = std.mem.readInt(u16, bytes[28..30], .little),
- .extra_len = std.mem.readInt(u16, bytes[30..32], .little),
- .comment_len = std.mem.readInt(u16, bytes[32..34], .little),
- .disk_number = std.mem.readInt(u16, bytes[34..36], .little),
- .internal_file_attributes = std.mem.readInt(u16, bytes[36..38], .little),
- .external_file_attributes = std.mem.readInt(u32, bytes[38..42], .little),
- .local_file_header_offset = std.mem.readInt(u32, bytes[42..46], .little),
- };
- }
- pub fn serialize(self: CentralDirectoryFileHeader) [46]u8 {
- var result: [46]u8 = undefined;
- result[0..4].* = self.signature;
- std.mem.writeInt(u16, result[4..6], self.version, .little);
- std.mem.writeInt(u16, result[6..8], self.minimum_version, .little);
- std.mem.writeInt(u16, result[8..10], self.flags, .little);
- std.mem.writeInt(u16, result[10..12], @intFromEnum(self.compression_method), .little);
- std.mem.writeInt(u16, result[12..14], self.last_modification_time, .little);
- std.mem.writeInt(u16, result[14..16], self.last_modification_date, .little);
- std.mem.writeInt(u32, result[16..20], self.crc32, .little);
- std.mem.writeInt(u32, result[20..24], self.compressed_size, .little);
- std.mem.writeInt(u32, result[24..28], self.uncompressed_size, .little);
- std.mem.writeInt(u16, result[28..30], self.filename_len, .little);
- std.mem.writeInt(u16, result[30..32], self.extra_len, .little);
- std.mem.writeInt(u16, result[32..34], self.comment_len, .little);
- std.mem.writeInt(u16, result[34..36], self.disk_number, .little);
- std.mem.writeInt(u16, result[36..38], self.internal_file_attributes, .little);
- std.mem.writeInt(u32, result[38..42], self.external_file_attributes, .little);
- std.mem.writeInt(u32, result[42..46], self.local_file_header_offset, .little);
- return result;
- }
- };
- pub const EndOfCentralDirectoryRecord = struct {
- disk_number: u16,
- central_directory_disk_number: u16,
- record_count_disk: u16,
- record_count_total: u16,
- central_directory_size: u32,
- central_directory_offset: u32,
- comment_len: u16,
- pub fn read(bytes: [22]u8) EndOfCentralDirectoryRecord {
- return EndOfCentralDirectoryRecord{
- .disk_number = std.mem.readInt(u16, bytes[4..6], .little),
- .central_directory_disk_number = std.mem.readInt(u16, bytes[6..8], .little),
- .record_count_disk = std.mem.readInt(u16, bytes[8..10], .little),
- .record_count_total = std.mem.readInt(u16, bytes[10..12], .little),
- .central_directory_size = std.mem.readInt(u32, bytes[12..16], .little),
- .central_directory_offset = std.mem.readInt(u32, bytes[16..20], .little),
- .comment_len = std.mem.readInt(u16, bytes[20..22], .little),
- };
- }
- pub fn serialize(self: EndOfCentralDirectoryRecord) [22]u8 {
- var result: [22]u8 = undefined;
- result[0..4].* = end_of_central_directory_sig;
- std.mem.writeInt(u16, result[4..6], self.disk_number, .little);
- std.mem.writeInt(u16, result[6..8], self.central_directory_disk_number, .little);
- std.mem.writeInt(u16, result[8..10], self.record_count_disk, .little);
- std.mem.writeInt(u16, result[10..12], self.record_count_total, .little);
- std.mem.writeInt(u32, result[12..16], self.central_directory_size, .little);
- std.mem.writeInt(u32, result[16..20], self.central_directory_offset, .little);
- std.mem.writeInt(u16, result[20..22], self.comment_len, .little);
- return result;
- }
- };
- pub fn findEocdr(file: std.fs.File) ![22]u8 {
- // The EOCD record can contain a variable-length comment at the end,
- // which makes ZIP file parsing ambiguous in general, since a valid
- // comment could contain the bytes of another valid EOCD record.
- // Here we just search backwards for the first instance of the EOCD
- // signature, and return an error if a valid EOCD record doesn't follow.
- // TODO: make this more efficient
- // we need a backward_buffered_reader
- const file_size = try file.getEndPos();
- const record_len = 22;
- var record: [record_len]u8 = undefined;
- if (file_size < record_len)
- return error.ZipTruncated;
- try file.seekFromEnd(-record_len);
- {
- const len = try file.readAll(&record);
- if (len != record_len)
- return error.ZipTruncated;
- }
- var comment_len: u16 = 0;
- while (true) {
- if (std.mem.eql(u8, record[0..4], &end_of_central_directory_sig) and
- std.mem.readInt(u16, record[20..22], .little) == comment_len)
- {
- break;
- }
- if (comment_len == std.math.maxInt(u16))
- return error.ZipMissingEocdr;
- std.mem.copyBackwards(u8, record[1..], record[0 .. record.len - 1]);
- comment_len += 1;
- if (@as(u64, record_len) + @as(u64, comment_len) > file_size)
- return error.ZipMissingEocdr;
- try file.seekFromEnd(-record_len - @as(i64, comment_len));
- {
- const len = try file.readAll(record[0..1]);
- if (len != 1)
- return error.ZipTruncated;
- }
- }
- return record;
- }
- fn LimitedReader(comptime UnderlyingReader: type) type {
- return struct {
- const Self = @This();
- underlying_reader: UnderlyingReader,
- remaining: usize,
- pub const Error = UnderlyingReader.Error;
- pub const Reader = std.io.Reader(*Self, Error, read);
- fn read(self: *Self, buffer: []u8) Error!usize {
- const next_read_len = @min(buffer.len, self.remaining);
- if (next_read_len == 0) return 0;
- const len = try self.underlying_reader.read(buffer[0..next_read_len]);
- self.remaining -= len;
- return len;
- }
- pub fn reader(self: *Self) Reader {
- return Reader{ .context = self };
- }
- };
- }
- fn limitedReader(reader: anytype, limit: usize) LimitedReader(@TypeOf(reader)) {
- return .{
- .underlying_reader = reader,
- .remaining = limit,
- };
- }
- /// `decompress` returns the actual CRC-32 of the decompressed bytes,
- /// which should be validated against the expected entry.crc32 value.
- /// `writer` can be anything with a `writeAll(self: *Self, chunk: []const u8) anyerror!void` method.
- pub fn decompress(
- method: CompressionMethod,
- uncompressed_size: u32,
- reader: anytype,
- writer: anytype,
- ) !u32 {
- var hash = std.hash.Crc32.init();
- switch (method) {
- .store => {
- var buf: [std.mem.page_size]u8 = undefined;
- while (true) {
- const len = try reader.read(&buf);
- if (len == 0) break;
- try writer.writeAll(buf[0..len]);
- hash.update(buf[0..len]);
- }
- },
- .deflate, .deflate64 => {
- var br = std.io.bufferedReader(reader);
- var total_uncompressed: u32 = 0;
- var decompressor = std.compress.flate.decompressor(br.reader());
- while (try decompressor.next()) |chunk| {
- try writer.writeAll(chunk);
- hash.update(chunk);
- total_uncompressed += @intCast(chunk.len);
- }
- if (br.end != br.start)
- return error.ZipDeflateTruncated;
- if (total_uncompressed != uncompressed_size)
- return error.ZipUncompressSizeMismatch;
- },
- _ => return error.UnsupportedCompressionMethod,
- }
- return hash.final();
- }
- pub const Iterator = struct {
- file: std.fs.File,
- eocdr: EndOfCentralDirectoryRecord,
- next_central_header_index: u16,
- next_central_header_offset: u64,
- pub fn init(file: std.fs.File) !Iterator {
- const eocdr = blk: {
- const eocdr_bytes = try findEocdr(file);
- break :blk EndOfCentralDirectoryRecord.read(eocdr_bytes);
- };
- // Don't support multi-disk archives.
- if (eocdr.disk_number != 0 or
- eocdr.central_directory_disk_number != 0 or
- eocdr.record_count_disk != eocdr.record_count_total)
- {
- return error.ZipUnsupportedMultiDisk;
- }
- return .{
- .file = file,
- .eocdr = eocdr,
- .next_central_header_offset = 0,
- .next_central_header_index = 0,
- };
- }
- pub fn next(self: *Iterator) !?Entry {
- if (self.next_central_header_index >= self.eocdr.record_count_total) {
- return null;
- }
- const header_file_offset: u64 = @as(u64, self.eocdr.central_directory_offset) + self.next_central_header_offset;
- const header = blk: {
- try self.file.seekTo(header_file_offset);
- var header: [46]u8 = undefined;
- const len = try self.file.readAll(&header);
- if (len != header.len)
- return error.ZipTruncated;
- break :blk CentralDirectoryFileHeader.deserialize(header);
- };
- if (!std.mem.eql(u8, &header.signature, ¢ral_file_header_sig))
- return error.ZipHeader;
- self.next_central_header_index += 1;
- self.next_central_header_offset += 46 + header.filename_len + header.extra_len + header.comment_len;
- if (header.disk_number != 0)
- return error.ZipUnsupportedMultiDisk;
- return .{
- .header_file_offset = header_file_offset,
- .header = header,
- };
- }
- pub const Entry = struct {
- header_file_offset: u64,
- header: CentralDirectoryFileHeader,
- pub fn extract(self: Entry, zip_file: std.fs.File, filename_buf: []u8, dest: std.fs.Dir) !u32 {
- if (filename_buf.len < self.header.filename_len)
- return error.ZipInsufficientBuffer;
- const filename = filename_buf[0..self.header.filename_len];
- try zip_file.seekTo(self.header_file_offset + 46);
- {
- const len = try zip_file.readAll(filename);
- if (len != filename.len)
- return error.ZipTruncated;
- }
- const local_data_header_offset: u64 = local_data_header_offset: {
- const local_header = blk: {
- try zip_file.seekTo(self.header.local_file_header_offset);
- var local_header: [30]u8 = undefined;
- const len = try zip_file.readAll(&local_header);
- if (len != local_header.len)
- return error.ZipTruncated;
- break :blk LocalFileHeader.deserialize(local_header);
- };
- if (!std.mem.eql(u8, &local_header.signature, &local_file_header_sig))
- return error.ZipHeader;
- // TODO: verify minimum_version
- // TODO: verify flags
- // TODO: verify compression method
- // TODO: verify last_mod_time
- // TODO: verify last_mod_date
- // TODO: verify filename_len and filename?
- // TODO: extra?
- if (local_header.crc32 != 0 and local_header.crc32 != self.header.crc32)
- return error.ZipRedundancyFail;
- if (local_header.compressed_size != 0 and
- local_header.compressed_size != self.header.compressed_size)
- return error.ZipRedundancyFail;
- if (local_header.uncompressed_size != 0 and
- local_header.uncompressed_size != self.header.uncompressed_size)
- return error.ZipRedundancyFail;
- break :local_data_header_offset @as(u64, local_header.filename_len) +
- @as(u64, local_header.extra_len);
- };
- if (filename.len == 0 or filename[0] == '/')
- return error.ZipBadFilename;
- // All entries that end in '/' are directories
- if (filename[filename.len - 1] == '/') {
- if (self.header.uncompressed_size != 0)
- return error.ZipBadDirectorySize;
- try dest.makePath(filename[0 .. filename.len - 1]);
- return std.hash.Crc32.hash(&.{});
- }
- const out_file = blk: {
- if (std.fs.path.dirname(filename)) |dirname| {
- var parent_dir = try dest.makeOpenPath(dirname, .{});
- defer parent_dir.close();
- const basename = std.fs.path.basename(filename);
- break :blk try parent_dir.createFile(basename, .{ .exclusive = true });
- }
- break :blk try dest.createFile(filename, .{ .exclusive = true });
- };
- defer out_file.close();
- const local_data_file_offset: u64 =
- @as(u64, self.header.local_file_header_offset) +
- @as(u64, 30) +
- local_data_header_offset;
- try zip_file.seekTo(local_data_file_offset);
- var limited_reader = limitedReader(zip_file.reader(), self.header.compressed_size);
- const crc = try decompress(
- self.header.compression_method,
- self.header.uncompressed_size,
- limited_reader.reader(),
- out_file.writer(),
- );
- if (limited_reader.remaining != 0)
- return error.ZipDecompressTruncated;
- return crc;
- }
- };
- };
- pub fn pipeToFileSystem(dest: std.fs.Dir, file: std.fs.File) !void {
- var iter = try Iterator.init(file);
- var filename_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
- while (try iter.next()) |entry| {
- const crc32 = try entry.extract(file, &filename_buf, dest);
- if (crc32 != entry.header.crc32)
- return error.ZipCrcMismatch;
- }
- }
- fn testZip(comptime files: []const File) !void {
- var cache: [files.len]FileCache = undefined;
- try testZipWithCache(files, &cache);
- }
- fn testZipWithCache(files: []const File, cache: []FileCache) !void {
- var tmp = testing.tmpDir(.{ .no_follow = true });
- defer tmp.cleanup();
- const dir = tmp.dir;
- {
- var file = try dir.createFile("zip", .{});
- defer file.close();
- try writeFile(file, files, cache);
- }
- var zip_file = try dir.openFile("zip", .{});
- defer zip_file.close();
- try pipeToFileSystem(dir, zip_file);
- for (files) |test_file| {
- var file = try dir.openFile(test_file.name, .{});
- defer file.close();
- var buf: [4096]u8 = undefined;
- const n = try file.reader().readAll(&buf);
- try testing.expectEqualStrings(test_file.content, buf[0..n]);
- }
- }
- test "zip one file" {
- try testZip(&[_]File{
- .{ .name = "onefile.txt", .content = "Just a single file\n", .compression = .store },
- });
- }
- test "zip multiple files" {
- try testZip(&[_]File{
- .{ .name = "foo", .content = "a foo file\n", .compression = .store },
- .{ .name = "subdir/bar", .content = "bar is this right?\nanother newline\n", .compression = .store },
- .{ .name = "subdir/another/baz", .content = "bazzy mc bazzerson", .compression = .store },
- });
- }
- test "zip deflated" {
- try testZip(&[_]File{
- .{ .name = "deflateme", .content = "This is a deflated file.\nIt should be smaller in the Zip file1\n", .compression = .deflate },
- .{ .name = "deflateme64", .content = "The 64k version of deflate!\n", .compression = .deflate64 },
- .{ .name = "raw", .content = "Not all files need to be deflated in the same Zip.\n", .compression = .store },
- });
- }
|