blockdiff.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/python2
  2. #
  3. # Copyright (C) 2013 The Android Open Source Project
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. """Block diff utility."""
  18. from __future__ import print_function
  19. # pylint: disable=import-error
  20. import argparse
  21. import sys
  22. class BlockDiffError(Exception):
  23. pass
  24. def BlockDiff(block_size, file1, file2, name1, name2, max_length=-1):
  25. """Performs a binary diff of two files by blocks.
  26. Args:
  27. block_size: the size of a block to diff by
  28. file1: first file object
  29. file2: second file object
  30. name1: name of first file (for error reporting)
  31. name2: name of second file (for error reporting)
  32. max_length: the maximum length to read/diff in bytes (optional)
  33. Returns:
  34. A list of (start, length) pairs representing block extents that differ
  35. between the two files.
  36. Raises:
  37. BlockDiffError if there were errors while diffing.
  38. """
  39. if max_length < 0:
  40. max_length = sys.maxint
  41. diff_list = []
  42. num_blocks = extent_start = extent_length = 0
  43. while max_length or extent_length:
  44. read_length = min(max_length, block_size)
  45. data1 = file1.read(read_length)
  46. data2 = file2.read(read_length)
  47. if len(data1) != len(data2):
  48. raise BlockDiffError('read %d bytes from %s but %d bytes from %s' %
  49. (len(data1), name1, len(data2), name2))
  50. if data1 != data2:
  51. # Data is different, mark it down.
  52. if extent_length:
  53. # Stretch the current diff extent.
  54. extent_length += 1
  55. else:
  56. # Start a new diff extent.
  57. extent_start = num_blocks
  58. extent_length = 1
  59. elif extent_length:
  60. # Record the previous extent.
  61. diff_list.append((extent_start, extent_length))
  62. extent_length = 0
  63. # Are we done reading?
  64. if not data1:
  65. break
  66. max_length -= len(data1)
  67. num_blocks += 1
  68. return diff_list
  69. def main(argv):
  70. # Parse command-line arguments.
  71. parser = argparse.ArgumentParser(
  72. description='Compare FILE1 and FILE2 by blocks.',
  73. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  74. parser.add_argument('-b', '--block-size', metavar='NUM', type=int,
  75. default=4096, help='the block size to use')
  76. parser.add_argument('-m', '--max-length', metavar='NUM', type=int, default=-1,
  77. help='maximum number of bytes to compare')
  78. parser.add_argument('file1', metavar='FILE1')
  79. parser.add_argument('file2', metavar='FILE2')
  80. args = parser.parse_args(argv[1:])
  81. # Perform the block diff.
  82. try:
  83. with open(args.file1) as file1:
  84. with open(args.file2) as file2:
  85. diff_list = BlockDiff(args.block_size, file1, file2,
  86. args.file1, args.file2, args.max_length)
  87. except BlockDiffError as e:
  88. print('Error: ' % e, file=sys.stderr)
  89. return 2
  90. # Print the diff, if such was found.
  91. if diff_list:
  92. total_diff_blocks = 0
  93. for extent_start, extent_length in diff_list:
  94. total_diff_blocks += extent_length
  95. print('%d->%d (%d)' %
  96. (extent_start, extent_start + extent_length, extent_length))
  97. print('total diff: %d blocks' % total_diff_blocks)
  98. return 1
  99. return 0
  100. if __name__ == '__main__':
  101. sys.exit(main(sys.argv))