#!/usr/bin/python # TODO: Handle very large files import sys import re import string import StringIO import md5 header_re = re.compile(r'^(.*): ?(.*)$') def parse_header_block(fp): order = [] headers = {} while 1: line = fp.readline() if line == '': return order, headers, 1 line = string.strip(line) if line == '': return order, headers, 0 matches = header_re.match(line) if not matches: raise Exception('Malformed header block') order.append(matches.group(1)) headers[matches.group(1)] = matches.group(2) def dump_header_block(fp, order, headers): for header in order: fp.write(header + ': ' + headers[header] + '\n') fp.write('\n') def parse_props(fp): props = {} while 1: line = fp.readline() if line == '': raise Exception('Malformed hash block') line = string.strip(line) if line == 'PROPS-END': return props if line[:2] != 'K ': raise Exception('Malformed hash block') bytes = int(line[2:]) key = fp.read(bytes) fp.read(1) # Skip the newline line = string.strip(fp.readline()) if line[:2] != 'V ': raise Exception('Malformed hash block') bytes = int(line[2:]) props[key] = fp.read(bytes) fp.read(1) # Skip the newline return props def filter_dump(fp, fp_out): eol_style_files = {} headers = {} eof = 0 fixed = 0 while 1: # Parse a block of headers order, headers, eof = parse_header_block(fp) if headers.has_key('SVN-fs-dump-format-version'): if int(headers['SVN-fs-dump-format-version']) != 2: sys.exit('Error: Can only handle version 2 dump files') # This is a revision header block if headers.has_key('Revision-number'): sys.stderr.write('Checking revision ' + headers['Revision-number'] + '\n') # Skip the contents prop_len = headers.get('Prop-content-length', 0) revprops = fp.read(int(prop_len)) dump_header_block(fp_out, order, headers) fp_out.write(revprops) # This is a node header block elif headers.has_key('Node-path'): node_path = headers['Node-path'] # Skip the content props_len = headers.get('Prop-content-length', 0) props_string = fp.read(int(props_len)) text_len = headers.get('Text-content-length', 0) text = fp.read(int(text_len)) if props_len > 0: props = parse_props(StringIO.StringIO(props_string)) if props.has_key('svn:eol-style'): if text_len == 0 and not eol_style_files.has_key(node_path): sys.exit("ERROR: svn:eol-style was set on a revision " "which does not have any text\n" "changes. This is not a problem except that " "this tool cannot handle it).") eol_style_files[node_path] = None elif eol_style_files.has_key(node_path): del eol_style_files[node_path] if text_len > 0 and eol_style_files.has_key(node_path): if text.find("\r\n") != -1: # Make sure the file does not have CRNL EOLs if eol_style_files[node_path] is None: sys.stderr.write(' Fixing "' + headers['Node-path'] + '"\n') eol_style_files[node_path] = 1 fixed += 1 text = text.replace("\r\n", "\n") headers['Text-content-md5'] = md5.new(text).hexdigest() headers['Text-content-length'] = str(len(text)) headers['Content-length'] = str(len(text) + int(props_len)) dump_header_block(fp_out, order, headers) fp_out.write(props_string) fp_out.write(text) elif order or not eof: dump_header_block(fp_out, order, headers) if eof: break sys.stderr.write('Fixed %d files.\n' % fixed) def usage(): print 'Usage: ' + sys.argv[0] + ' [INPUT DUMPFILE] [OUTPUT DUMPFILE]' print '' print 'Reads a Subversion dumpfile from INPUT DUMPFILE (or standard' print 'input if not provided) and writes to OUTPUT DUMPFILE (or standard' print 'output if not provided), making sure that all files with' print 'svn:eol-style set have valid end-of-lines in the repository.' sys.exit(0) def main(): if len(sys.argv) > 1: if sys.argv[1] == '--help': usage() fp = open(sys.argv[1], 'rb') else: fp = sys.stdin if len(sys.argv) > 2: fp_out = open(sys.argv[2], 'rb') else: fp_out = sys.stdout filter_dump(fp, fp_out) if __name__ == '__main__': main()