zip.zig 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. const builtin = @import("builtin");
  2. const std = @import("std");
  3. fn oom(e: error{OutOfMemory}) noreturn {
  4. @panic(@errorName(e));
  5. }
  6. fn fatal(comptime fmt: []const u8, args: anytype) noreturn {
  7. std.log.err(fmt, args);
  8. std.process.exit(0xff);
  9. }
  10. fn usage() noreturn {
  11. std.io.getStdErr().writer().writeAll(
  12. "Usage: zip [-options] ZIP_FILE FILES/DIRS..\n",
  13. ) catch |e| @panic(@errorName(e));
  14. std.process.exit(1);
  15. }
  16. var windows_args_arena = if (builtin.os.tag == .windows)
  17. std.heap.ArenaAllocator.init(std.heap.page_allocator) else struct{}{};
  18. pub fn cmdlineArgs() [][*:0]u8 {
  19. if (builtin.os.tag == .windows) {
  20. const slices = std.process.argsAlloc(windows_args_arena.allocator()) catch |err| switch (err) {
  21. error.OutOfMemory => oom(error.OutOfMemory),
  22. //error.InvalidCmdLine => @panic("InvalidCmdLine"),
  23. error.Overflow => @panic("Overflow while parsing command line"),
  24. };
  25. const args = windows_args_arena.allocator().alloc([*:0]u8, slices.len - 1) catch |e| oom(e);
  26. for (slices[1..], 0..) |slice, i| {
  27. args[i] = slice.ptr;
  28. }
  29. return args;
  30. }
  31. return std.os.argv.ptr[1 .. std.os.argv.len];
  32. }
  33. pub fn main() !void {
  34. var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
  35. defer arena_instance.deinit();
  36. const arena = arena_instance.allocator();
  37. const cmd_args = blk: {
  38. const cmd_args = cmdlineArgs();
  39. var arg_index: usize = 0;
  40. var non_option_len: usize = 0;
  41. while (arg_index < cmd_args.len) : (arg_index += 1) {
  42. const arg = std.mem.span(cmd_args[arg_index]);
  43. if (!std.mem.startsWith(u8, arg, "-")) {
  44. cmd_args[non_option_len] = arg;
  45. non_option_len += 1;
  46. } else {
  47. fatal("unknown cmdline option '{s}'", .{arg});
  48. }
  49. }
  50. break :blk cmd_args[0 .. non_option_len];
  51. };
  52. if (cmd_args.len < 2) usage();
  53. const zip_file_arg = std.mem.span(cmd_args[0]);
  54. const paths_to_include = cmd_args[1..];
  55. // expand cmdline arguments to a list of files
  56. var file_entries: std.ArrayListUnmanaged(FileEntry) = .{};
  57. for (paths_to_include) |path_ptr| {
  58. const path = std.mem.span(path_ptr);
  59. const stat = std.fs.cwd().statFile(path) catch |err| switch (err) {
  60. error.FileNotFound => fatal("path '{s}' is not found", .{path}),
  61. else => |e| return e,
  62. };
  63. switch (stat.kind) {
  64. .directory => {
  65. @panic("todo: directories");
  66. },
  67. .file => {
  68. if (isBadFilename(path))
  69. fatal("filename '{s}' is invalid for zip files", .{path});
  70. try file_entries.append(arena, .{
  71. .path = path,
  72. .size = stat.size,
  73. });
  74. },
  75. .sym_link => fatal("todo: symlinks", .{}),
  76. .block_device,
  77. .character_device,
  78. .named_pipe,
  79. .unix_domain_socket,
  80. .whiteout,
  81. .door,
  82. .event_port,
  83. .unknown => fatal("file '{s}' is an unsupported type {s}", .{path, @tagName(stat.kind)}),
  84. }
  85. }
  86. const store = try arena.alloc(FileStore, file_entries.items.len);
  87. // no need to free
  88. {
  89. const zip_file = std.fs.cwd().createFile(zip_file_arg, .{}) catch |err|
  90. fatal("create file '{s}' failed: {s}", .{zip_file_arg, @errorName(err)});
  91. defer zip_file.close();
  92. try writeZip(zip_file, file_entries.items, store);
  93. }
  94. // go fix up the local file headers
  95. {
  96. const zip_file = std.fs.cwd().openFile(zip_file_arg, .{ .mode = .read_write }) catch |err|
  97. fatal("open file '{s}' failed: {s}", .{zip_file_arg, @errorName(err)});
  98. defer zip_file.close();
  99. for (file_entries.items, 0..) |file, i| {
  100. try zip_file.seekTo(store[i].file_offset);
  101. const hdr: std.zip.LocalFileHeader = .{
  102. .signature = std.zip.local_file_header_sig,
  103. .version_needed_to_extract = 10,
  104. .flags = .{ .encrypted = false, ._ = 0 },
  105. .compression_method = store[i].compression,
  106. .last_modification_time = 0,
  107. .last_modification_date = 0,
  108. .crc32 = store[i].crc32,
  109. .compressed_size = store[i].compressed_size,
  110. .uncompressed_size = @intCast(file.size),
  111. .filename_len = @intCast(file.path.len),
  112. .extra_len = 0,
  113. };
  114. try writeStructEndian(zip_file.writer(), hdr, .little);
  115. }
  116. }
  117. }
  118. const FileEntry = struct {
  119. path: []const u8,
  120. size: u64,
  121. };
  122. fn writeZip(
  123. out_zip: std.fs.File,
  124. file_entries: []const FileEntry,
  125. store: []FileStore,
  126. ) !void {
  127. var zipper = initZipper(out_zip.writer());
  128. for (file_entries, 0..) |file_entry, i| {
  129. const file_offset = zipper.counting_writer.bytes_written;
  130. const compression: std.zip.CompressionMethod = .deflate;
  131. try zipper.writeFileHeader(file_entry.path, compression);
  132. var file = try std.fs.cwd().openFile(file_entry.path, .{});
  133. defer file.close();
  134. var crc32: u32 = undefined;
  135. var compressed_size = file_entry.size;
  136. switch (compression) {
  137. .store => {
  138. var hash = std.hash.Crc32.init();
  139. var full_rw_buf: [std.mem.page_size]u8 = undefined;
  140. var remaining = file_entry.size;
  141. while (remaining > 0) {
  142. const buf = full_rw_buf[0 .. @min(remaining, full_rw_buf.len)];
  143. const read_len = try file.reader().read(buf);
  144. std.debug.assert(read_len == buf.len);
  145. hash.update(buf);
  146. try zipper.counting_writer.writer().writeAll(buf);
  147. remaining -= buf.len;
  148. }
  149. crc32 = hash.final();
  150. },
  151. .deflate => {
  152. const start_offset = zipper.counting_writer.bytes_written;
  153. var br = std.io.bufferedReader(file.reader());
  154. var cr = Crc32Reader(@TypeOf(br.reader())){ .underlying_reader = br.reader() };
  155. try std.compress.flate.deflate.compress(
  156. .raw,
  157. cr.reader(),
  158. zipper.counting_writer.writer(),
  159. .{ .level = .best },
  160. );
  161. if (br.end != br.start) fatal("deflate compressor didn't read all data", .{});
  162. compressed_size = zipper.counting_writer.bytes_written - start_offset;
  163. crc32 = cr.crc32.final();
  164. },
  165. else => @panic("codebug"),
  166. }
  167. store[i] = .{
  168. .file_offset = file_offset,
  169. .compression = compression,
  170. .uncompressed_size = @intCast(file_entry.size),
  171. .crc32 = crc32,
  172. .compressed_size = @intCast(compressed_size),
  173. };
  174. }
  175. for (file_entries, 0..) |file, i| {
  176. try zipper.writeCentralRecord(store[i], .{
  177. .name = file.path,
  178. });
  179. }
  180. try zipper.writeEndRecord();
  181. }
  182. pub fn Crc32Reader(comptime ReaderType: type) type {
  183. return struct {
  184. underlying_reader: ReaderType,
  185. crc32: std.hash.Crc32 = std.hash.Crc32.init(),
  186. pub const Error = ReaderType.Error;
  187. pub const Reader = std.io.Reader(*Self, Error, read);
  188. const Self = @This();
  189. pub fn read(self: *Self, dest: []u8) Error!usize {
  190. const len = try self.underlying_reader.read(dest);
  191. self.crc32.update(dest[0..len]);
  192. return len;
  193. }
  194. pub fn reader(self: *Self) Reader {
  195. return .{ .context = self };
  196. }
  197. };
  198. }
  199. fn isBadFilename(filename: []const u8) bool {
  200. if (std.mem.indexOfScalar(u8, filename, '\\')) |_|
  201. return true;
  202. if (filename.len == 0 or filename[0] == '/' or filename[0] == '\\')
  203. return true;
  204. var it = std.mem.splitAny(u8, filename, "/\\");
  205. while (it.next()) |part| {
  206. if (std.mem.eql(u8, part, ".."))
  207. return true;
  208. }
  209. return false;
  210. }
  211. // Used to store any data from writing a file to the zip archive that's needed
  212. // when writing the corresponding central directory record.
  213. pub const FileStore = struct {
  214. file_offset: u64,
  215. compression: std.zip.CompressionMethod,
  216. uncompressed_size: u32,
  217. crc32: u32,
  218. compressed_size: u32,
  219. };
  220. pub fn initZipper(writer: anytype) Zipper(@TypeOf(writer)) {
  221. return .{ .counting_writer = std.io.countingWriter(writer) };
  222. }
  223. fn Zipper(comptime Writer: type) type {
  224. return struct {
  225. counting_writer: std.io.CountingWriter(Writer),
  226. central_count: u64 = 0,
  227. first_central_offset: ?u64 = null,
  228. last_central_limit: ?u64 = null,
  229. const Self = @This();
  230. pub fn writeFileHeader(
  231. self: *Self,
  232. name: []const u8,
  233. compression: std.zip.CompressionMethod,
  234. ) !void {
  235. const writer = self.counting_writer.writer();
  236. const hdr: std.zip.LocalFileHeader = .{
  237. .signature = std.zip.local_file_header_sig,
  238. .version_needed_to_extract = 10,
  239. .flags = .{ .encrypted = false, ._ = 0 },
  240. .compression_method = compression,
  241. .last_modification_time = 0,
  242. .last_modification_date = 0,
  243. .crc32 = 0,
  244. .compressed_size = 0,
  245. .uncompressed_size = 0,
  246. .filename_len = @intCast(name.len),
  247. .extra_len = 0,
  248. };
  249. try writeStructEndian(writer, hdr, .little);
  250. try writer.writeAll(name);
  251. }
  252. pub fn writeCentralRecord(
  253. self: *Self,
  254. store: FileStore,
  255. opt: struct {
  256. name: []const u8,
  257. version_needed_to_extract: u16 = 10,
  258. },
  259. ) !void {
  260. if (self.first_central_offset == null) {
  261. self.first_central_offset = self.counting_writer.bytes_written;
  262. }
  263. self.central_count += 1;
  264. const hdr: std.zip.CentralDirectoryFileHeader = .{
  265. .signature = std.zip.central_file_header_sig,
  266. .version_made_by = 0,
  267. .version_needed_to_extract = opt.version_needed_to_extract,
  268. .flags = .{ .encrypted = false, ._ = 0 },
  269. .compression_method = store.compression,
  270. .last_modification_time = 0,
  271. .last_modification_date = 0,
  272. .crc32 = store.crc32,
  273. .compressed_size = store.compressed_size,
  274. .uncompressed_size = @intCast(store.uncompressed_size),
  275. .filename_len = @intCast(opt.name.len),
  276. .extra_len = 0,
  277. .comment_len = 0,
  278. .disk_number = 0,
  279. .internal_file_attributes = 0,
  280. .external_file_attributes = 0,
  281. .local_file_header_offset = @intCast(store.file_offset),
  282. };
  283. try writeStructEndian(self.counting_writer.writer(), hdr, .little);
  284. try self.counting_writer.writer().writeAll(opt.name);
  285. self.last_central_limit = self.counting_writer.bytes_written;
  286. }
  287. pub fn writeEndRecord(self: *Self) !void {
  288. const cd_offset = self.first_central_offset orelse 0;
  289. const cd_end = self.last_central_limit orelse 0;
  290. const hdr: std.zip.EndRecord = .{
  291. .signature = std.zip.end_record_sig,
  292. .disk_number = 0,
  293. .central_directory_disk_number = 0,
  294. .record_count_disk = @intCast(self.central_count),
  295. .record_count_total = @intCast(self.central_count),
  296. .central_directory_size = @intCast(cd_end - cd_offset),
  297. .central_directory_offset = @intCast(cd_offset),
  298. .comment_len = 0,
  299. };
  300. try writeStructEndian(self.counting_writer.writer(), hdr, .little);
  301. }
  302. };
  303. }
  304. const native_endian = @import("builtin").target.cpu.arch.endian();
  305. fn writeStructEndian(writer: anytype, value: anytype, endian: std.builtin.Endian) anyerror!void {
  306. // TODO: make sure this value is not a reference type
  307. if (native_endian == endian) {
  308. return writer.writeStruct(value);
  309. } else {
  310. var copy = value;
  311. byteSwapAllFields(@TypeOf(value), &copy);
  312. return writer.writeStruct(copy);
  313. }
  314. }
  315. pub fn byteSwapAllFields(comptime S: type, ptr: *S) void {
  316. switch (@typeInfo(S)) {
  317. .Struct => {
  318. inline for (std.meta.fields(S)) |f| {
  319. switch (@typeInfo(f.type)) {
  320. .Struct => |struct_info| if (struct_info.backing_integer) |Int| {
  321. @field(ptr, f.name) = @bitCast(@byteSwap(@as(Int, @bitCast(@field(ptr, f.name)))));
  322. } else {
  323. byteSwapAllFields(f.type, &@field(ptr, f.name));
  324. },
  325. .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)),
  326. .Enum => {
  327. @field(ptr, f.name) = @enumFromInt(@byteSwap(@intFromEnum(@field(ptr, f.name))));
  328. },
  329. else => {
  330. @field(ptr, f.name) = @byteSwap(@field(ptr, f.name));
  331. },
  332. }
  333. }
  334. },
  335. .Array => {
  336. for (ptr) |*item| {
  337. switch (@typeInfo(@TypeOf(item.*))) {
  338. .Struct, .Array => byteSwapAllFields(@TypeOf(item.*), item),
  339. .Enum => {
  340. item.* = @enumFromInt(@byteSwap(@intFromEnum(item.*)));
  341. },
  342. else => {
  343. item.* = @byteSwap(item.*);
  344. },
  345. }
  346. }
  347. },
  348. else => @compileError("byteSwapAllFields expects a struct or array as the first argument"),
  349. }
  350. }