blob: a734bcdd56899a5ef004ac0cbe9e749fc80f594d [file] [edit]
#!/usr/bin/env python3
#
#===- clang-format-diff.py - ClangFormat Diff Reformatter ----*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#
"""
This script reads input from a unified diff and reformats all the changed
lines. This is useful to reformat all the lines touched by a specific patch.
Example usage for git/svn users:
git diff -U0 --no-color HEAD^ | clang-format-diff.py -p1 -i
svn diff --diff-cmd=diff -x-U0 | clang-format-diff.py -i
"""
import argparse
import difflib
import re
import subprocess
import sys
if sys.version_info.major >= 3:
from io import StringIO
else:
from io import BytesIO as StringIO
def main():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=
argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', action='store_true', default=False,
help='apply edits to files instead of displaying a diff')
parser.add_argument('-p', metavar='NUM', default=0,
help='strip the smallest prefix containing P slashes')
parser.add_argument('-regex', metavar='PATTERN', default=None,
help='custom pattern selecting file paths to reformat '
'(case sensitive, overrides -iregex)')
parser.add_argument('-iregex', metavar='PATTERN', default=
r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hh|hpp|m|mm|inc|js|ts|proto'
r'|protodevel|java|cs)',
help='custom pattern selecting file paths to reformat '
'(case insensitive, overridden by -regex)')
parser.add_argument('-sort-includes', action='store_true', default=False,
help='let clang-format sort include blocks')
parser.add_argument('-v', '--verbose', action='store_true',
help='be more verbose, ineffective without -i')
parser.add_argument('-style',
help='formatting style to apply (LLVM, Google, Chromium, '
'Mozilla, WebKit)')
parser.add_argument('-binary', default='clang-format',
help='location of binary to use for clang-format')
args = parser.parse_args()
# Extract changed lines for each file.
filename = None
lines_by_file = {}
for line in sys.stdin:
match = re.search(r'^\+\+\+\ (.*?/){%s}(\S*)' % args.p, line)
if match:
filename = match.group(2)
if filename is None:
continue
if args.regex is not None:
if not re.match('^%s$' % args.regex, filename):
continue
else:
if not re.match('^%s$' % args.iregex, filename, re.IGNORECASE):
continue
match = re.search(r'^@@.*\+(\d+)(,(\d+))?', line)
if match:
start_line = int(match.group(1))
line_count = 1
if match.group(3):
line_count = int(match.group(3))
if line_count == 0:
continue
end_line = start_line + line_count - 1
lines_by_file.setdefault(filename, []).extend(
['-lines', str(start_line) + ':' + str(end_line)])
# Reformat files containing changes in place.
for filename, lines in lines_by_file.items():
if args.i and args.verbose:
print('Formatting {}'.format(filename))
command = [args.binary, filename]
# We no longer pass '-i' to clang-format directly.
# We need to intercept the stdout to filter out whitespace-only changes.
if args.sort_includes:
command.append('-sort-includes')
command.extend(lines)
if args.style:
command.extend(['-style', args.style])
p = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=None,
stdin=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = p.communicate()
if p.returncode != 0:
sys.exit(p.returncode)
with open(filename) as f:
code = f.readlines()
formatted_code = StringIO(stdout).readlines()
# From Clang-Format 18 to 20+ there were some dumb changes, things like
#
# const char* x;
# const char *x;
#
# were getting triggered as errors and failing CI
# We want clang-format for general formatting, but this nit-pick level
# is dumb to flag an error for what clearing seems to be a clang-format regression
#
# Filter out 1:1 line changes where the only difference is whitespace
# directly adjacent to an asterisk (*) or ampersand (&).
adjusted_formatted_code = []
sm = difflib.SequenceMatcher(None, code, formatted_code)
for tag, i1, i2, j1, j2 in sm.get_opcodes():
if tag == 'equal':
adjusted_formatted_code.extend(code[i1:i2])
elif tag == 'replace' and (i2 - i1) == (j2 - j1):
for original, formatted in zip(code[i1:i2], formatted_code[j1:j2]):
# Strip spaces around * and & using regex, then compare.
# r'\s*([*&])\s*' matches any whitespace, an asterisk or ampersand, and any trailing whitespace.
# r'\1' replaces it with just the captured character (* or &).
norm_orig = re.sub(r'\s*([*&])\s*', r'\1', original)
norm_fmt = re.sub(r'\s*([*&])\s*', r'\1', formatted)
if norm_orig == norm_fmt:
adjusted_formatted_code.append(original)
else:
adjusted_formatted_code.append(formatted)
else:
# Non 1:1 replacement (e.g. line wrapping or breaking), accept formatting
adjusted_formatted_code.extend(formatted_code[j1:j2])
if args.i:
# If in-place mode is on, write back to file ONLY if there are actual changes left
if code != adjusted_formatted_code:
with open(filename, 'w') as f:
f.writelines(adjusted_formatted_code)
else:
# Otherwise, print the unified diff based on the adjusted code
diff = difflib.unified_diff(code, adjusted_formatted_code,
filename, filename,
'(before formatting)', '(after formatting)')
diff_string = ''.join(diff)
if len(diff_string) > 0:
sys.stdout.write(diff_string)
if __name__ == '__main__':
main()