zip.zig 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. const builtin = @import("builtin");
  2. const std = @import("std");
  3. fn oom(e: error{OutOfMemory}) noreturn {
  4. @panic(@errorName(e));
  5. }
  6. fn fatal(comptime fmt: []const u8, args: anytype) noreturn {
  7. std.log.err(fmt, args);
  8. std.process.exit(0xff);
  9. }
  10. fn usage() noreturn {
  11. std.io.getStdErr().writer().writeAll(
  12. "Usage: zip [-options] ZIP_FILE FILES/DIRS..\n",
  13. ) catch |e| @panic(@errorName(e));
  14. std.process.exit(1);
  15. }
  16. var windows_args_arena = if (builtin.os.tag == .windows)
  17. std.heap.ArenaAllocator.init(std.heap.page_allocator)
  18. else
  19. struct {}{};
  20. pub fn cmdlineArgs() [][*:0]u8 {
  21. if (builtin.os.tag == .windows) {
  22. const slices = std.process.argsAlloc(windows_args_arena.allocator()) catch |err| switch (err) {
  23. error.OutOfMemory => oom(error.OutOfMemory),
  24. //error.InvalidCmdLine => @panic("InvalidCmdLine"),
  25. error.Overflow => @panic("Overflow while parsing command line"),
  26. };
  27. const args = windows_args_arena.allocator().alloc([*:0]u8, slices.len - 1) catch |e| oom(e);
  28. for (slices[1..], 0..) |slice, i| {
  29. args[i] = slice.ptr;
  30. }
  31. return args;
  32. }
  33. return std.os.argv.ptr[1..std.os.argv.len];
  34. }
  35. pub fn main() !void {
  36. var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
  37. defer arena_instance.deinit();
  38. const arena = arena_instance.allocator();
  39. const cmd_args = blk: {
  40. const cmd_args = cmdlineArgs();
  41. var arg_index: usize = 0;
  42. var non_option_len: usize = 0;
  43. while (arg_index < cmd_args.len) : (arg_index += 1) {
  44. const arg = std.mem.span(cmd_args[arg_index]);
  45. if (!std.mem.startsWith(u8, arg, "-")) {
  46. cmd_args[non_option_len] = arg;
  47. non_option_len += 1;
  48. } else {
  49. fatal("unknown cmdline option '{s}'", .{arg});
  50. }
  51. }
  52. break :blk cmd_args[0..non_option_len];
  53. };
  54. if (cmd_args.len < 2) usage();
  55. const zip_file_arg = std.mem.span(cmd_args[0]);
  56. const paths_to_include = cmd_args[1..];
  57. // expand cmdline arguments to a list of files
  58. var file_entries: std.ArrayListUnmanaged(FileEntry) = .{};
  59. for (paths_to_include) |path_ptr| {
  60. const path = std.mem.span(path_ptr);
  61. const stat = std.fs.cwd().statFile(path) catch |err| switch (err) {
  62. error.FileNotFound => fatal("path '{s}' is not found", .{path}),
  63. else => |e| return e,
  64. };
  65. switch (stat.kind) {
  66. .directory => {
  67. @panic("todo: directories");
  68. },
  69. .file => {
  70. if (isBadFilename(path))
  71. fatal("filename '{s}' is invalid for zip files", .{path});
  72. try file_entries.append(arena, .{
  73. .path = path,
  74. .size = stat.size,
  75. });
  76. },
  77. .sym_link => fatal("todo: symlinks", .{}),
  78. .block_device,
  79. .character_device,
  80. .named_pipe,
  81. .unix_domain_socket,
  82. .whiteout,
  83. .door,
  84. .event_port,
  85. .unknown,
  86. => fatal("file '{s}' is an unsupported type {s}", .{ path, @tagName(stat.kind) }),
  87. }
  88. }
  89. const store = try arena.alloc(FileStore, file_entries.items.len);
  90. // no need to free
  91. {
  92. const zip_file = std.fs.cwd().createFile(zip_file_arg, .{}) catch |err|
  93. fatal("create file '{s}' failed: {s}", .{ zip_file_arg, @errorName(err) });
  94. defer zip_file.close();
  95. try writeZip(zip_file, file_entries.items, store);
  96. }
  97. // go fix up the local file headers
  98. {
  99. const zip_file = std.fs.cwd().openFile(zip_file_arg, .{ .mode = .read_write }) catch |err|
  100. fatal("open file '{s}' failed: {s}", .{ zip_file_arg, @errorName(err) });
  101. defer zip_file.close();
  102. for (file_entries.items, 0..) |file, i| {
  103. try zip_file.seekTo(store[i].file_offset);
  104. const hdr: std.zip.LocalFileHeader = .{
  105. .signature = std.zip.local_file_header_sig,
  106. .version_needed_to_extract = 10,
  107. .flags = .{ .encrypted = false, ._ = 0 },
  108. .compression_method = store[i].compression,
  109. .last_modification_time = 0,
  110. .last_modification_date = 0,
  111. .crc32 = store[i].crc32,
  112. .compressed_size = store[i].compressed_size,
  113. .uncompressed_size = @intCast(file.size),
  114. .filename_len = @intCast(file.path.len),
  115. .extra_len = 0,
  116. };
  117. try writeStructEndian(zip_file.writer(), hdr, .little);
  118. }
  119. }
  120. }
  121. const FileEntry = struct {
  122. path: []const u8,
  123. size: u64,
  124. };
  125. fn writeZip(
  126. out_zip: std.fs.File,
  127. file_entries: []const FileEntry,
  128. store: []FileStore,
  129. ) !void {
  130. var zipper = initZipper(out_zip.writer());
  131. for (file_entries, 0..) |file_entry, i| {
  132. const file_offset = zipper.counting_writer.bytes_written;
  133. const compression: std.zip.CompressionMethod = .deflate;
  134. try zipper.writeFileHeader(file_entry.path, compression);
  135. var file = try std.fs.cwd().openFile(file_entry.path, .{});
  136. defer file.close();
  137. var crc32: u32 = undefined;
  138. var compressed_size = file_entry.size;
  139. switch (compression) {
  140. .store => {
  141. var hash = std.hash.Crc32.init();
  142. var full_rw_buf: [std.mem.page_size]u8 = undefined;
  143. var remaining = file_entry.size;
  144. while (remaining > 0) {
  145. const buf = full_rw_buf[0..@min(remaining, full_rw_buf.len)];
  146. const read_len = try file.reader().read(buf);
  147. std.debug.assert(read_len == buf.len);
  148. hash.update(buf);
  149. try zipper.counting_writer.writer().writeAll(buf);
  150. remaining -= buf.len;
  151. }
  152. crc32 = hash.final();
  153. },
  154. .deflate => {
  155. const start_offset = zipper.counting_writer.bytes_written;
  156. var br = std.io.bufferedReader(file.reader());
  157. var cr = Crc32Reader(@TypeOf(br.reader())){ .underlying_reader = br.reader() };
  158. try std.compress.flate.deflate.compress(
  159. .raw,
  160. cr.reader(),
  161. zipper.counting_writer.writer(),
  162. .{ .level = .best },
  163. );
  164. if (br.end != br.start) fatal("deflate compressor didn't read all data", .{});
  165. compressed_size = zipper.counting_writer.bytes_written - start_offset;
  166. crc32 = cr.crc32.final();
  167. },
  168. else => @panic("codebug"),
  169. }
  170. store[i] = .{
  171. .file_offset = file_offset,
  172. .compression = compression,
  173. .uncompressed_size = @intCast(file_entry.size),
  174. .crc32 = crc32,
  175. .compressed_size = @intCast(compressed_size),
  176. };
  177. }
  178. for (file_entries, 0..) |file, i| {
  179. try zipper.writeCentralRecord(store[i], .{
  180. .name = file.path,
  181. });
  182. }
  183. try zipper.writeEndRecord();
  184. }
  185. pub fn Crc32Reader(comptime ReaderType: type) type {
  186. return struct {
  187. underlying_reader: ReaderType,
  188. crc32: std.hash.Crc32 = std.hash.Crc32.init(),
  189. pub const Error = ReaderType.Error;
  190. pub const Reader = std.io.Reader(*Self, Error, read);
  191. const Self = @This();
  192. pub fn read(self: *Self, dest: []u8) Error!usize {
  193. const len = try self.underlying_reader.read(dest);
  194. self.crc32.update(dest[0..len]);
  195. return len;
  196. }
  197. pub fn reader(self: *Self) Reader {
  198. return .{ .context = self };
  199. }
  200. };
  201. }
  202. fn isBadFilename(filename: []const u8) bool {
  203. if (std.mem.indexOfScalar(u8, filename, '\\')) |_|
  204. return true;
  205. if (filename.len == 0 or filename[0] == '/' or filename[0] == '\\')
  206. return true;
  207. var it = std.mem.splitAny(u8, filename, "/\\");
  208. while (it.next()) |part| {
  209. if (std.mem.eql(u8, part, ".."))
  210. return true;
  211. }
  212. return false;
  213. }
  214. // Used to store any data from writing a file to the zip archive that's needed
  215. // when writing the corresponding central directory record.
  216. pub const FileStore = struct {
  217. file_offset: u64,
  218. compression: std.zip.CompressionMethod,
  219. uncompressed_size: u32,
  220. crc32: u32,
  221. compressed_size: u32,
  222. };
  223. pub fn initZipper(writer: anytype) Zipper(@TypeOf(writer)) {
  224. return .{ .counting_writer = std.io.countingWriter(writer) };
  225. }
  226. fn Zipper(comptime Writer: type) type {
  227. return struct {
  228. counting_writer: std.io.CountingWriter(Writer),
  229. central_count: u64 = 0,
  230. first_central_offset: ?u64 = null,
  231. last_central_limit: ?u64 = null,
  232. const Self = @This();
  233. pub fn writeFileHeader(
  234. self: *Self,
  235. name: []const u8,
  236. compression: std.zip.CompressionMethod,
  237. ) !void {
  238. const writer = self.counting_writer.writer();
  239. const hdr: std.zip.LocalFileHeader = .{
  240. .signature = std.zip.local_file_header_sig,
  241. .version_needed_to_extract = 10,
  242. .flags = .{ .encrypted = false, ._ = 0 },
  243. .compression_method = compression,
  244. .last_modification_time = 0,
  245. .last_modification_date = 0,
  246. .crc32 = 0,
  247. .compressed_size = 0,
  248. .uncompressed_size = 0,
  249. .filename_len = @intCast(name.len),
  250. .extra_len = 0,
  251. };
  252. try writeStructEndian(writer, hdr, .little);
  253. try writer.writeAll(name);
  254. }
  255. pub fn writeCentralRecord(
  256. self: *Self,
  257. store: FileStore,
  258. opt: struct {
  259. name: []const u8,
  260. version_needed_to_extract: u16 = 10,
  261. },
  262. ) !void {
  263. if (self.first_central_offset == null) {
  264. self.first_central_offset = self.counting_writer.bytes_written;
  265. }
  266. self.central_count += 1;
  267. const hdr: std.zip.CentralDirectoryFileHeader = .{
  268. .signature = std.zip.central_file_header_sig,
  269. .version_made_by = 0,
  270. .version_needed_to_extract = opt.version_needed_to_extract,
  271. .flags = .{ .encrypted = false, ._ = 0 },
  272. .compression_method = store.compression,
  273. .last_modification_time = 0,
  274. .last_modification_date = 0,
  275. .crc32 = store.crc32,
  276. .compressed_size = store.compressed_size,
  277. .uncompressed_size = @intCast(store.uncompressed_size),
  278. .filename_len = @intCast(opt.name.len),
  279. .extra_len = 0,
  280. .comment_len = 0,
  281. .disk_number = 0,
  282. .internal_file_attributes = 0,
  283. .external_file_attributes = 0,
  284. .local_file_header_offset = @intCast(store.file_offset),
  285. };
  286. try writeStructEndian(self.counting_writer.writer(), hdr, .little);
  287. try self.counting_writer.writer().writeAll(opt.name);
  288. self.last_central_limit = self.counting_writer.bytes_written;
  289. }
  290. pub fn writeEndRecord(self: *Self) !void {
  291. const cd_offset = self.first_central_offset orelse 0;
  292. const cd_end = self.last_central_limit orelse 0;
  293. const hdr: std.zip.EndRecord = .{
  294. .signature = std.zip.end_record_sig,
  295. .disk_number = 0,
  296. .central_directory_disk_number = 0,
  297. .record_count_disk = @intCast(self.central_count),
  298. .record_count_total = @intCast(self.central_count),
  299. .central_directory_size = @intCast(cd_end - cd_offset),
  300. .central_directory_offset = @intCast(cd_offset),
  301. .comment_len = 0,
  302. };
  303. try writeStructEndian(self.counting_writer.writer(), hdr, .little);
  304. }
  305. };
  306. }
  307. const native_endian = @import("builtin").target.cpu.arch.endian();
  308. fn writeStructEndian(writer: anytype, value: anytype, endian: std.builtin.Endian) anyerror!void {
  309. // TODO: make sure this value is not a reference type
  310. if (native_endian == endian) {
  311. return writer.writeStruct(value);
  312. } else {
  313. var copy = value;
  314. byteSwapAllFields(@TypeOf(value), &copy);
  315. return writer.writeStruct(copy);
  316. }
  317. }
  318. pub fn byteSwapAllFields(comptime S: type, ptr: *S) void {
  319. switch (@typeInfo(S)) {
  320. .Struct => {
  321. inline for (std.meta.fields(S)) |f| {
  322. switch (@typeInfo(f.type)) {
  323. .Struct => |struct_info| if (struct_info.backing_integer) |Int| {
  324. @field(ptr, f.name) = @bitCast(@byteSwap(@as(Int, @bitCast(@field(ptr, f.name)))));
  325. } else {
  326. byteSwapAllFields(f.type, &@field(ptr, f.name));
  327. },
  328. .Array => byteSwapAllFields(f.type, &@field(ptr, f.name)),
  329. .Enum => {
  330. @field(ptr, f.name) = @enumFromInt(@byteSwap(@intFromEnum(@field(ptr, f.name))));
  331. },
  332. else => {
  333. @field(ptr, f.name) = @byteSwap(@field(ptr, f.name));
  334. },
  335. }
  336. }
  337. },
  338. .Array => {
  339. for (ptr) |*item| {
  340. switch (@typeInfo(@TypeOf(item.*))) {
  341. .Struct, .Array => byteSwapAllFields(@TypeOf(item.*), item),
  342. .Enum => {
  343. item.* = @enumFromInt(@byteSwap(@intFromEnum(item.*)));
  344. },
  345. else => {
  346. item.* = @byteSwap(item.*);
  347. },
  348. }
  349. }
  350. },
  351. else => @compileError("byteSwapAllFields expects a struct or array as the first argument"),
  352. }
  353. }