#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-or-later """ Example: ./source/tools/utils/code_clean.py /src/cmake_debug --match ".*/editmesh_.*" --fix=use_const_vars Note: currently this is limited to paths in "source/" and "intern/", we could change this if it's needed. """ import argparse import re import subprocess import sys import os import string from typing import ( Any, Dict, Generator, List, Optional, Sequence, Set, Tuple, Type, ) # List of (source_file, all_arguments) ProcessedCommands = List[Tuple[str, str]] USE_MULTIPROCESS = True VERBOSE = False # Print the output of the compiler (_very_ noisy, only useful for troubleshooting compiler issues). VERBOSE_COMPILER = False # Print the result of each attempted edit: # # - Causes code not to compile. # - Compiles but changes the resulting behavior. # - Succeeds. VERBOSE_EDIT_ACTION = False BASE_DIR = os.path.abspath(os.path.dirname(__file__)) SOURCE_DIR = os.path.normpath(os.path.join(BASE_DIR, "..", "..", "..")) # ----------------------------------------------------------------------------- # Generic Constants # Sorted numeric types. # Intentionally missing are "unsigned". BUILT_IN_NUMERIC_TYPES = ( "bool", "char", "char32_t", "double", "float", "int", "int16_t", "int32_t", "int64_t", "int8_t", "intptr_t", "long", "off_t", "ptrdiff_t", "short", "size_t", "ssize_t", "uchar", "uint", "uint16_t", "uint32_t", "uint64_t", "uint8_t", "uintptr_t", "ulong", "ushort", ) IDENTIFIER_CHARS = set(string.ascii_letters + "_" + string.digits) # ----------------------------------------------------------------------------- # General Utilities # Note that we could use a hash, however there is no advantage, compare it's contents. def file_as_bytes(filename: str) -> bytes: with open(filename, 'rb') as fh: return fh.read() def line_from_span(text: str, start: int, end: int) -> str: while start > 0 and text[start - 1] != '\n': start -= 1 while end < len(text) and text[end] != '\n': end += 1 return text[start:end] def files_recursive_with_ext(path: str, ext: Tuple[str, ...]) -> Generator[str, None, None]: for dirpath, dirnames, filenames in os.walk(path): # skip '.git' and other dot-files. dirnames[:] = [d for d in dirnames if not d.startswith(".")] for filename in filenames: if filename.endswith(ext): yield os.path.join(dirpath, filename) def text_matching_bracket_forward( data: str, pos_beg: int, pos_limit: int, beg_bracket: str, end_bracket: str, ) -> int: """ Return the matching bracket or -1. .. note:: This is not sophisticated, brackets in strings will confuse the function. """ level = 1 # The next bracket. pos = pos_beg + 1 # Clamp the limit. limit = min(pos_beg + pos_limit, len(data)) while pos < limit: c = data[pos] if c == beg_bracket: level += 1 elif c == end_bracket: level -= 1 if level == 0: return pos pos += 1 return -1 def text_matching_bracket_backward( data: str, pos_end: int, pos_limit: int, beg_bracket: str, end_bracket: str, ) -> int: """ Return the matching bracket or -1. .. note:: This is not sophisticated, brackets in strings will confuse the function. """ level = 1 # The next bracket. pos = pos_end - 1 # Clamp the limit. limit = max(0, pos_limit) while pos >= limit: c = data[pos] if c == end_bracket: level += 1 elif c == beg_bracket: level -= 1 if level == 0: return pos pos -= 1 return -1 # ----------------------------------------------------------------------------- # Execution Wrappers def run(args: Sequence[str], *, cwd: Optional[str], quiet: bool) -> int: if VERBOSE_COMPILER and not quiet: out = sys.stdout.fileno() else: out = subprocess.DEVNULL p = subprocess.Popen(args, stdout=out, stderr=out, cwd=cwd) p.wait() return p.returncode # ----------------------------------------------------------------------------- # Build System Access def cmake_cache_var(cmake_dir: str, var: str) -> Optional[str]: with open(os.path.join(cmake_dir, "CMakeCache.txt"), encoding='utf-8') as cache_file: lines = [ l_strip for l in cache_file if (l_strip := l.strip()) if not l_strip.startswith(("//", "#")) ] for l in lines: if l.split(":")[0] == var: return l.split("=", 1)[-1] return None def cmake_cache_var_is_true(cmake_var: Optional[str]) -> bool: if cmake_var is None: return False cmake_var = cmake_var.upper() if cmake_var in {"ON", "YES", "TRUE", "Y"}: return True if cmake_var.isdigit() and cmake_var != "0": return True return False RE_CFILE_SEARCH = re.compile(r"\s\-c\s([\S]+)") def process_commands(cmake_dir: str, data: Sequence[str]) -> Optional[ProcessedCommands]: compiler_c = cmake_cache_var(cmake_dir, "CMAKE_C_COMPILER") compiler_cxx = cmake_cache_var(cmake_dir, "CMAKE_CXX_COMPILER") if compiler_c is None: sys.stderr.write("Can't find C compiler in %r\n" % cmake_dir) return None if compiler_cxx is None: sys.stderr.write("Can't find C++ compiler in %r\n" % cmake_dir) return None # Check for unsupported configurations. for arg in ("WITH_UNITY_BUILD", "WITH_COMPILER_CCACHE"): if cmake_cache_var_is_true(cmake_cache_var(cmake_dir, arg)): sys.stderr.write("The option '%s' must be disabled for proper functionality\n" % arg) return None file_args = [] for l in data: if ( (compiler_c in l) or (compiler_cxx in l) ): # Extract: # -c SOME_FILE c_file_search = re.search(RE_CFILE_SEARCH, l) if c_file_search is not None: c_file = c_file_search.group(1) file_args.append((c_file, l)) else: # could print, NO C FILE FOUND? pass file_args.sort() return file_args def find_build_args_ninja(build_dir: str) -> Optional[ProcessedCommands]: import time cmake_dir = build_dir make_exe = "ninja" process = subprocess.Popen( [make_exe, "-t", "commands"], stdout=subprocess.PIPE, cwd=build_dir, ) while process.poll(): time.sleep(1) assert process.stdout is not None out = process.stdout.read() process.stdout.close() # print("done!", len(out), "bytes") data = out.decode("utf-8", errors="ignore").split("\n") return process_commands(cmake_dir, data) def find_build_args_make(build_dir: str) -> Optional[ProcessedCommands]: import time make_exe = "make" process = subprocess.Popen( [make_exe, "--always-make", "--dry-run", "--keep-going", "VERBOSE=1"], stdout=subprocess.PIPE, cwd=build_dir, ) while process.poll(): time.sleep(1) assert process.stdout is not None out = process.stdout.read() process.stdout.close() # print("done!", len(out), "bytes") data = out.decode("utf-8", errors="ignore").split("\n") return process_commands(build_dir, data) # ----------------------------------------------------------------------------- # Create Edit Lists # Create an edit list from a file, in the format: # # [((start_index, end_index), text_to_replace), ...] # # Note that edits should not overlap, in the _very_ rare case overlapping edits are needed, # this could be run multiple times on the same code-base. # # Although this seems like it's not a common use-case. from collections import namedtuple Edit = namedtuple( "Edit", ( # Keep first, for sorting. "span", "content", "content_fail", # Optional. "extra_build_args", ), defaults=( # `extra_build_args`. None, ) ) del namedtuple class EditGenerator: __slots__ = () def __new__(cls, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any: raise RuntimeError("%s should not be instantiated" % cls) @staticmethod def edit_list_from_file(_source: str, _data: str, _shared_edit_data: Any) -> List[Edit]: raise RuntimeError("This function must be overridden by it's subclass!") return [] @staticmethod def setup() -> Any: return None @staticmethod def teardown(_shared_edit_data: Any) -> None: pass class edit_generators: # fake module. class sizeof_fixed_array(EditGenerator): """ Use fixed size array syntax with `sizeof`: Replace: sizeof(float) * 4 * 4 With: sizeof(float[4][4]) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] for match in re.finditer(r"sizeof\(([a-zA-Z_]+)\) \* (\d+) \* (\d+)", data): edits.append(Edit( span=match.span(), content='sizeof(%s[%s][%s])' % (match.group(1), match.group(2), match.group(3)), content_fail='__ALWAYS_FAIL__', )) for match in re.finditer(r"sizeof\(([a-zA-Z_]+)\) \* (\d+)", data): edits.append(Edit( span=match.span(), content='sizeof(%s[%s])' % (match.group(1), match.group(2)), content_fail='__ALWAYS_FAIL__', )) for match in re.finditer(r"\b(\d+) \* sizeof\(([a-zA-Z_]+)\)", data): edits.append(Edit( span=match.span(), content='sizeof(%s[%s])' % (match.group(2), match.group(1)), content_fail='__ALWAYS_FAIL__', )) return edits class use_const(EditGenerator): """ Use const variables: Replace: float abc[3] = {0, 1, 2}; With: const float abc[3] = {0, 1, 2}; Replace: float abc[3] With: const float abc[3] As well as casts. Replace: (float *) With: (const float *) Replace: (float (*)) With: (const float (*)) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # `float abc[3] = {0, 1, 2};` -> `const float abc[3] = {0, 1, 2};` for match in re.finditer(r"(\(|, | )([a-zA-Z_0-9]+ [a-zA-Z_0-9]+\[)\b([^\n]+ = )", data): edits.append(Edit( span=match.span(), content='%s const %s%s' % (match.group(1), match.group(2), match.group(3)), content_fail='__ALWAYS_FAIL__', )) # `float abc[3]` -> `const float abc[3]` for match in re.finditer(r"(\(|, )([a-zA-Z_0-9]+ [a-zA-Z_0-9]+\[)", data): edits.append(Edit( span=match.span(), content='%s const %s' % (match.group(1), match.group(2)), content_fail='__ALWAYS_FAIL__', )) # `(float *)` -> `(const float *)` # `(float (*))` -> `(const float (*))` # `(float (*)[4])` -> `(const float (*)[4])` for match in re.finditer( r"(\()" r"([a-zA-Z_0-9]+\s*)" r"(\*+\)|\(\*+\))" r"(|\[[a-zA-Z_0-9]+\])", data, ): edits.append(Edit( span=match.span(), content='%sconst %s%s%s' % (match.group(1), match.group(2), match.group(3), match.group(4)), content_fail='__ALWAYS_FAIL__', )) return edits class use_zero_before_float_suffix(EditGenerator): """ Use zero before the float suffix. Replace: 1.f With: 1.0f Replace: 1.0F With: 1.0f """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # `1.f` -> `1.0f` for match in re.finditer(r"\b(\d+)\.([fF])\b", data): edits.append(Edit( span=match.span(), content='%s.0%s' % (match.group(1), match.group(2)), content_fail='__ALWAYS_FAIL__', )) # `1.0F` -> `1.0f` for match in re.finditer(r"\b(\d+\.\d+)F\b", data): edits.append(Edit( span=match.span(), content='%sf' % (match.group(1),), content_fail='__ALWAYS_FAIL__', )) return edits class use_brief_types(EditGenerator): """ Use less verbose unsigned types. Replace: unsigned int With: uint """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # `unsigned char` -> `uchar`. for match in re.finditer(r"(unsigned)\s+([a-z]+)", data): edits.append(Edit( span=match.span(), content='u%s' % match.group(2), content_fail='__ALWAYS_FAIL__', )) # There may be some remaining uses of `unsigned` without any integer type afterwards. # `unsigned` -> `uint`. for match in re.finditer(r"\bunsigned\b", data): edits.append(Edit( span=match.span(), content='uint', content_fail='__ALWAYS_FAIL__', )) return edits class use_nullptr(EditGenerator): """ Use ``nullptr`` instead of ``NULL`` for C++ code. Replace: NULL With: nullptr """ @staticmethod def edit_list_from_file(source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # The user might exclude C++, if they forget, it is better not to operate on C. if not source.lower().endswith((".h", ".c")): return edits # `NULL` -> `nullptr`. for match in re.finditer(r"\bNULL\b", data): edits.append(Edit( span=match.span(), content='nullptr', content_fail='__ALWAYS_FAIL__', )) # There may be some remaining uses of `unsigned` without any integer type afterwards. # `unsigned` -> `uint`. for match in re.finditer(r"\bunsigned\b", data): edits.append(Edit( span=match.span(), content='uint', content_fail='__ALWAYS_FAIL__', )) return edits class unused_arg_as_comment(EditGenerator): """ Replace `UNUSED(argument)` in C++ code. Replace: void function(int UNUSED(arg)) {...} With: void function(int /*arg*/) {...} """ @staticmethod def edit_list_from_file(source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits: List[Edit] = [] # The user might exclude C++, if they forget, it is better not to operate on C. if not source.lower().endswith((".h", ".c")): return edits # `UNUSED(arg)` -> `/*arg*/`. for match in re.finditer( r"\b(UNUSED)" # # Opening parenthesis. r"\(" # Capture the identifier as group 1. r"([" + "".join(list(IDENTIFIER_CHARS)) + "]+)" # # Capture any non-identifier characters as group 2. # (e.g. `[3]`) which need to be added outside the comment. r"([^\)]*)" # Closing parenthesis of `UNUSED(..)`. r"\)", data, ): edits.append(Edit( span=match.span(), content='/*%s*/%s' % (match.group(2), match.group(3)), content_fail='__ALWAYS_FAIL__(%s%s)' % (match.group(2), match.group(3)), )) return edits class use_elem_macro(EditGenerator): """ Use the `ELEM` macro for more abbreviated expressions. Replace: (a == b || a == c) (a != b && a != c) With: (ELEM(a, b, c)) (!ELEM(a, b, c)) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] for use_brackets in (True, False): test_equal = ( r'([^\|\(\)]+)' # group 1 (no (|)) r'\s+==\s+' r'([^\|\(\)]+)' # group 2 (no (|)) ) test_not_equal = ( r'([^\|\(\)]+)' # group 1 (no (|)) r'\s+!=\s+' r'([^\|\(\)]+)' # group 2 (no (|)) ) if use_brackets: test_equal = r'\(' + test_equal + r'\)' test_not_equal = r'\(' + test_not_equal + r'\)' for is_equal in (True, False): for n in reversed(range(2, 64)): if is_equal: re_str = r'\(' + r'\s+\|\|\s+'.join([test_equal] * n) + r'\)' else: re_str = r'\(' + r'\s+\&\&\s+'.join([test_not_equal] * n) + r'\)' for match in re.finditer(re_str, data): var = match.group(1) var_rest = [] groups = match.groups() groups_paired = [(groups[i * 2], groups[i * 2 + 1]) for i in range(len(groups) // 2)] found = True for a, b in groups_paired: # Unlikely but possible the checks are swapped. if b == var and a != var: a, b = b, a if a != var: found = False break var_rest.append(b) if found: edits.append(Edit( span=match.span(), content='(%sELEM(%s, %s))' % ( ('' if is_equal else '!'), var, ', '.join(var_rest), ), # Use same expression otherwise this can change values # inside assert when it shouldn't. content_fail='(%s__ALWAYS_FAIL__(%s, %s))' % ( ('' if is_equal else '!'), var, ', '.join(var_rest), ), )) return edits class use_str_elem_macro(EditGenerator): """ Use `STR_ELEM` macro: Replace: (STREQ(a, b) || STREQ(a, c)) With: (STR_ELEM(a, b, c)) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] for use_brackets in (True, False): test_equal = ( r'STREQ' r'\(' r'([^\|\(\),]+)' # group 1 (no (|,)) r',\s+' r'([^\|\(\),]+)' # group 2 (no (|,)) r'\)' ) test_not_equal = ( '!' # Only difference. r'STREQ' r'\(' r'([^\|\(\),]+)' # group 1 (no (|,)) r',\s+' r'([^\|\(\),]+)' # group 2 (no (|,)) r'\)' ) if use_brackets: test_equal = r'\(' + test_equal + r'\)' test_not_equal = r'\(' + test_not_equal + r'\)' for is_equal in (True, False): for n in reversed(range(2, 64)): if is_equal: re_str = r'\(' + r'\s+\|\|\s+'.join([test_equal] * n) + r'\)' else: re_str = r'\(' + r'\s+\&\&\s+'.join([test_not_equal] * n) + r'\)' for match in re.finditer(re_str, data): var = match.group(1) var_rest = [] groups = match.groups() groups_paired = [(groups[i * 2], groups[i * 2 + 1]) for i in range(len(groups) // 2)] found = True for a, b in groups_paired: # Unlikely but possible the checks are swapped. if b == var and a != var: a, b = b, a if a != var: found = False break var_rest.append(b) if found: edits.append(Edit( span=match.span(), content='(%sSTR_ELEM(%s, %s))' % ( ('' if is_equal else '!'), var, ', '.join(var_rest), ), # Use same expression otherwise this can change values # inside assert when it shouldn't. content_fail='(%s__ALWAYS_FAIL__(%s, %s))' % ( ('' if is_equal else '!'), var, ', '.join(var_rest), ), )) return edits class use_const_vars(EditGenerator): """ Use `const` where possible: Replace: float abc[3] = {0, 1, 2}; With: const float abc[3] = {0, 1, 2}; """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # for match in re.finditer(r"( [a-zA-Z0-9_]+ [a-zA-Z0-9_]+ = [A-Z][A-Z_0-9_]*;)", data): # edits.append(Edit( # span=match.span(), # content='const %s' % (match.group(1).lstrip()), # content_fail='__ALWAYS_FAIL__', # )) for match in re.finditer(r"( [a-zA-Z0-9_]+ [a-zA-Z0-9_]+ = .*;)", data): edits.append(Edit( span=match.span(), content='const %s' % (match.group(1).lstrip()), content_fail='__ALWAYS_FAIL__', )) return edits class remove_return_parens(EditGenerator): """ Remove redundant parenthesis around return arguments: Replace: return (value); With: return value; """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # Remove `return (NULL);` for match in re.finditer(r"return \(([a-zA-Z_0-9]+)\);", data): edits.append(Edit( span=match.span(), content='return %s;' % (match.group(1)), content_fail='return __ALWAYS_FAIL__;', )) return edits class use_streq_macro(EditGenerator): """ Use `STREQ` macro: Replace: strcmp(a, b) == 0 With: STREQ(a, b) Replace: strcmp(a, b) != 0 With: !STREQ(a, b) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # `strcmp(a, b) == 0` -> `STREQ(a, b)` for match in re.finditer(r"\bstrcmp\((.*)\) == 0", data): edits.append(Edit( span=match.span(), content='STREQ(%s)' % (match.group(1)), content_fail='__ALWAYS_FAIL__', )) for match in re.finditer(r"!strcmp\((.*)\)", data): edits.append(Edit( span=match.span(), content='STREQ(%s)' % (match.group(1)), content_fail='__ALWAYS_FAIL__', )) # `strcmp(a, b) != 0` -> `!STREQ(a, b)` for match in re.finditer(r"\bstrcmp\((.*)\) != 0", data): edits.append(Edit( span=match.span(), content='!STREQ(%s)' % (match.group(1)), content_fail='__ALWAYS_FAIL__', )) for match in re.finditer(r"\bstrcmp\((.*)\)", data): edits.append(Edit( span=match.span(), content='!STREQ(%s)' % (match.group(1)), content_fail='__ALWAYS_FAIL__', )) return edits class use_array_size_macro(EditGenerator): """ Use macro for an error checked array size: Replace: sizeof(foo) / sizeof(*foo) With: ARRAY_SIZE(foo) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # Note that this replacement is only valid in some cases, # so only apply with validation that binary output matches. for match in re.finditer(r"\bsizeof\((.*)\) / sizeof\([^\)]+\)", data): edits.append(Edit( span=match.span(), content='ARRAY_SIZE(%s)' % match.group(1), content_fail='__ALWAYS_FAIL__', )) return edits class parenthesis_cleanup(EditGenerator): """ Use macro for an error checked array size: Replace: ((a + b)) With: (a + b) Replace: (func(a + b)) With: func(a + b) Note that the `CFLAGS` should be set so missing parentheses that contain assignments - error instead of warn: With GCC: `-Werror=parentheses` """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # Give up after searching for a bracket this many characters and finding none. bracket_seek_limit = 4000 # Don't match double brackets because this will not match multiple overlapping matches # Where 3 brackets should be checked as two separate pairs. for match in re.finditer(r"(\()", data): outer_beg = match.span()[0] inner_beg = outer_beg + 1 if data[inner_beg] != "(": continue inner_end = text_matching_bracket_forward(data, inner_beg, inner_beg + bracket_seek_limit, "(", ")") if inner_end == -1: continue outer_beg = inner_beg - 1 outer_end = text_matching_bracket_forward(data, outer_beg, inner_end + 1, "(", ")") if outer_end != inner_end + 1: continue text = data[inner_beg:inner_end + 1] edits.append(Edit( span=(outer_beg, outer_end + 1), content=text, content_fail='(__ALWAYS_FAIL__)', )) # Handle `(func(a + b))` -> `func(a + b)` for match in re.finditer(r"(\))", data): inner_end = match.span()[0] outer_end = inner_end + 1 if data[outer_end] != ")": continue inner_beg = text_matching_bracket_backward(data, inner_end, inner_end - bracket_seek_limit, "(", ")") if inner_beg == -1: continue outer_beg = text_matching_bracket_backward(data, outer_end, outer_end - bracket_seek_limit, "(", ")") if outer_beg == -1: continue # The text between the first two opening brackets: # `(function_name(a + b))` -> `function_name`. text = data[outer_beg + 1:inner_beg] # Handled in the first loop looking for forward brackets. if text == "": continue # Don't convert `prefix(func(a + b))` -> `prefixfunc(a + b)` if data[outer_beg - 1] in IDENTIFIER_CHARS: continue # Don't convert `static_cast<float>(foo(bar))` -> `static_cast<float>foo(bar)` # While this will always fail to compile it slows down tests. if data[outer_beg - 1] == ">": continue # Exact rule here is arbitrary, in general though spaces mean there are operations # that can use the brackets. if " " in text: continue # Search back an arbitrary number of chars 8 should be enough # but manual formatting can add additional white-space, so increase # the size to account for that. prefix = data[max(outer_beg - 20, 0):outer_beg].strip() if prefix: # Avoid `if (SOME_MACRO(..)) {..}` -> `if SOME_MACRO(..) {..}` # While correct it relies on parenthesis within the macro which isn't ideal. if prefix.split()[-1] in {"if", "while", "switch"}: continue # Avoid `*(--foo)` -> `*--foo`. # While correct it reads badly. if data[outer_beg - 1] == "*": continue text_no_parens = data[outer_beg + 1: outer_end] edits.append(Edit( span=(outer_beg, outer_end + 1), content=text_no_parens, content_fail='__ALWAYS_FAIL__', )) return edits class header_clean(EditGenerator): """ Clean headers, ensuring that the headers removed are not used directly or indirectly. Note that the `CFLAGS` should be set so missing prototypes error instead of warn: With GCC: `-Werror=missing-prototypes` """ @staticmethod def _header_guard_from_filename(f: str) -> str: return '__%s__' % os.path.basename(f).replace('.', '_').upper() @classmethod def setup(cls) -> Any: # For each file replace `pragma once` with old-style header guard. # This is needed so we can remove the header with the knowledge the source file didn't use it indirectly. files: List[Tuple[str, str, str, str]] = [] shared_edit_data = { 'files': files, } for f in files_recursive_with_ext( os.path.join(SOURCE_DIR, 'source'), ('.h', '.hh', '.inl', '.hpp', '.hxx'), ): with open(f, 'r', encoding='utf-8') as fh: data = fh.read() for match in re.finditer(r'^[ \t]*#\s*(pragma\s+once)\b', data, flags=re.MULTILINE): header_guard = cls._header_guard_from_filename(f) start, end = match.span() src = data[start:end] dst = ( '#ifndef %s\n#define %s' % (header_guard, header_guard) ) dst_footer = '\n#endif /* %s */\n' % header_guard files.append((f, src, dst, dst_footer)) data = data[:start] + dst + data[end:] + dst_footer with open(f, 'w', encoding='utf-8') as fh: fh.write(data) break return shared_edit_data @staticmethod def teardown(shared_edit_data: Any) -> None: files = shared_edit_data['files'] for f, src, dst, dst_footer in files: with open(f, 'r', encoding='utf-8') as fh: data = fh.read() data = data.replace( dst, src, ).replace( dst_footer, '', ) with open(f, 'w', encoding='utf-8') as fh: fh.write(data) @classmethod def edit_list_from_file(cls, _source: str, data: str, _shared_edit_data: Any) -> List[Edit]: edits = [] # Remove include. for match in re.finditer(r"^(([ \t]*#\s*include\s+\")([^\"]+)(\"[^\n]*\n))", data, flags=re.MULTILINE): header_name = match.group(3) header_guard = cls._header_guard_from_filename(header_name) edits.append(Edit( span=match.span(), content='', # Remove the header. content_fail='%s__ALWAYS_FAIL__%s' % (match.group(2), match.group(4)), extra_build_args=('-D' + header_guard, ), )) return edits class use_function_style_cast(EditGenerator): """ Use function call style casts (C++ only). Replace: (float)(a + b) With: float(a + b) Also support more complex cases involving right hand bracket insertion. Replace: (float)foo(a + b) With: float(foo(a + b)) """ @staticmethod def edit_list_from_file(_source: str, data: str, _shared_edit_data: Any) -> List[Edit]: any_number_re = "(" + "|".join(BUILT_IN_NUMERIC_TYPES) + ")" edits = [] # Handle both: # - Simple case: `(float)(a + b)` -> `float(a + b)`. # - Complex Case: `(float)foo(a + b) + c` -> `float(foo(a + b)) + c` for match in re.finditer( "(\\()" + # 1st group. any_number_re + # 2nd group. "(\\))", # 3rd group. data, ): beg, end = match.span() # This could be ignored, but `sizeof` accounts for such a large number # of cases that should be left as-is, that it's best to explicitly ignore them. if ( (beg > 6) and (data[beg - 6: beg] == 'sizeof') and (not data[beg - 7].isalpha()) ): continue char_after = data[end] if char_after == "(": # Simple case. edits.append(Edit( span=(beg, end), content=match.group(2), content_fail='__ALWAYS_FAIL__', )) else: # The complex case is involved as brackets need to be added. # Currently this is not handled in a clever way, just try add in brackets # and rely on matching build output to know if they were added in the right place. text = match.group(2) span = (beg, end) for offset_end in range(end + 1, len(data)): # Not technically correct, but it's rare that this will span lines. if "\n" == data[offset_end]: break if ( (data[offset_end - 1] in IDENTIFIER_CHARS) and (data[offset_end] in IDENTIFIER_CHARS) ): continue # Include `text_tail` in fail content in case it contains comments. text_tail = "(" + data[end:offset_end] + ")" edits.append(Edit( span=(beg, offset_end), content=text + text_tail, content_fail='(__ALWAYS_FAIL__)' + text_tail, )) # Simple case: `static_cast<float>(a + b)` => `float(a + b)`. for match in re.finditer( r"\b(static_cast<)" + # 1st group. any_number_re + # 2nd group. "(>)", # 3rd group. data, ): edits.append(Edit( span=match.span(), content='%s' % match.group(2), content_fail='__ALWAYS_FAIL__', )) return edits def test_edit( source: str, output: str, output_bytes: Optional[bytes], build_args: Sequence[str], build_cwd: Optional[str], data: str, data_test: str, keep_edits: bool = True, expect_failure: bool = False, ) -> bool: """ Return true if `data_test` has the same object output as `data`. """ if os.path.exists(output): os.remove(output) with open(source, 'w', encoding='utf-8') as fh: fh.write(data_test) ret = run(build_args, cwd=build_cwd, quiet=expect_failure) if ret == 0: output_bytes_test = file_as_bytes(output) if (output_bytes is None) or (file_as_bytes(output) == output_bytes): if not keep_edits: with open(source, 'w', encoding='utf-8') as fh: fh.write(data) return True else: if VERBOSE_EDIT_ACTION: print("Changed code, skip...", hex(hash(output_bytes)), hex(hash(output_bytes_test))) else: if not expect_failure: if VERBOSE_EDIT_ACTION: print("Failed to compile, skip...") with open(source, 'w', encoding='utf-8') as fh: fh.write(data) return False # ----------------------------------------------------------------------------- # List Fix Functions def edit_function_get_all() -> List[str]: fixes = [] for name in dir(edit_generators): value = getattr(edit_generators, name) if type(value) is type and issubclass(value, EditGenerator): fixes.append(name) fixes.sort() return fixes def edit_class_from_id(name: str) -> Type[EditGenerator]: result = getattr(edit_generators, name) assert issubclass(result, EditGenerator) # MYPY 0.812 doesn't recognize the assert above. return result # type: ignore # ----------------------------------------------------------------------------- # Accept / Reject Edits def apply_edit(data: str, text_to_replace: str, start: int, end: int, *, verbose: bool) -> str: if verbose: line_before = line_from_span(data, start, end) data = data[:start] + text_to_replace + data[end:] if verbose: end += len(text_to_replace) - (end - start) line_after = line_from_span(data, start, end) print("") print("Testing edit:") print(line_before) print(line_after) return data def wash_source_with_edits( source: str, output: str, build_args: Sequence[str], build_cwd: Optional[str], edit_to_apply: str, skip_test: bool, shared_edit_data: Any, ) -> None: # build_args = build_args + " -Werror=duplicate-decl-specifier" with open(source, 'r', encoding='utf-8') as fh: data = fh.read() edit_generator_class = edit_class_from_id(edit_to_apply) # After performing all edits, store the result in this set. # # This is a heavy solution that guarantees edits never oscillate between # multiple states, so re-visiting a previously visited state will always exit. data_states: Set[str] = set() # When overlapping edits are found, keep attempting edits. edit_again = True while edit_again: edit_again = False edits = edit_generator_class.edit_list_from_file(source, data, shared_edit_data) # Sort by span, in a way that tries shorter spans first # This is more efficient when testing multiple overlapping edits, # since when a smaller edit succeeds, it's less likely to have to try as many edits that span wider ranges. # (This applies to `use_function_style_cast`). edits.sort(reverse=True, key=lambda edit: (edit.span[0], -edit.span[1])) if not edits: return if skip_test: # Just apply all edits. for (start, end), text, _text_always_fail, _extra_build_args in edits: data = apply_edit(data, text, start, end, verbose=VERBOSE) with open(source, 'w', encoding='utf-8') as fh: fh.write(data) return test_edit( source, output, None, build_args, build_cwd, data, data, keep_edits=False, ) if not os.path.exists(output): # raise Exception("Failed to produce output file: " + output) # NOTE(@campbellbarton): This fails very occasionally and needs to be investigated why. # For now skip, as it's disruptive to force-quit in the middle of all other changes. print("Failed to produce output file, skipping:", repr(output)) return output_bytes = file_as_bytes(output) # Dummy value that won't cause problems. edit_prev_start = len(data) + 1 for (start, end), text, text_always_fail, extra_build_args in edits: if end >= edit_prev_start: # Run the edits again, in case this would have succeeded, # but was skipped due to edit-overlap. edit_again = True continue build_args_for_edit = build_args if extra_build_args: # Add directly after the compile command. build_args_for_edit = build_args[:1] + extra_build_args + build_args[1:] data_test = apply_edit(data, text, start, end, verbose=VERBOSE) if test_edit( source, output, output_bytes, build_args_for_edit, build_cwd, data, data_test, keep_edits=False, ): # This worked, check if the change would fail if replaced with 'text_always_fail'. data_test_always_fail = apply_edit(data, text_always_fail, start, end, verbose=False) if test_edit( source, output, output_bytes, build_args_for_edit, build_cwd, data, data_test_always_fail, expect_failure=True, keep_edits=False, ): if VERBOSE_EDIT_ACTION: print("Edit at", (start, end), "doesn't fail, assumed to be ifdef'd out, continuing") continue # Apply the edit. data = data_test with open(source, 'w', encoding='utf-8') as fh: fh.write(data) # Update the last successful edit, the end of the next edit must not overlap this one. edit_prev_start = start # Finished applying `edits`, check if further edits should be applied. if edit_again: data_states_len = len(data_states) data_states.add(data) if data_states_len == len(data_states): # Avoid the *extremely* unlikely case that edits re-visit previously visited states. edit_again = False else: # It is interesting to know how many passes run when debugging. # print("Passes for: ", source, len(data_states)) pass # ----------------------------------------------------------------------------- # Edit Source Code From Args def run_edits_on_directory( build_dir: str, regex_list: List[re.Pattern[str]], edit_to_apply: str, skip_test: bool = False, ) -> int: # currently only supports ninja or makefiles build_file_ninja = os.path.join(build_dir, "build.ninja") build_file_make = os.path.join(build_dir, "Makefile") if os.path.exists(build_file_ninja): print("Using Ninja") args = find_build_args_ninja(build_dir) elif os.path.exists(build_file_make): print("Using Make") args = find_build_args_make(build_dir) else: sys.stderr.write( "Can't find Ninja or Makefile (%r or %r), aborting" % (build_file_ninja, build_file_make) ) return 1 if args is None: # Error will have been reported. return 1 # needed for when arguments are referenced relatively os.chdir(build_dir) # Weak, but we probably don't want to handle extern. # this limit could be removed. source_paths = ( os.path.join("intern", "ghost"), os.path.join("intern", "guardedalloc"), os.path.join("source"), ) def split_build_args_with_cwd(build_args_str: str) -> Tuple[Sequence[str], Optional[str]]: import shlex build_args = shlex.split(build_args_str) cwd = None if len(build_args) > 3: if build_args[0] == "cd" and build_args[2] == "&&": cwd = build_args[1] del build_args[0:3] return build_args, cwd def output_from_build_args(build_args: Sequence[str], cwd: Optional[str]) -> str: i = build_args.index("-o") # Assume the output is a relative path is a CWD was set. if cwd: return os.path.join(cwd, build_args[i + 1]) return build_args[i + 1] def test_path(c: str) -> bool: # Skip any generated source files (files in the build directory). if os.path.abspath(c).startswith(build_dir): return False # Raise an exception since this should never happen, # we want to know about it early if it does, as it will cause failure # when attempting to compile the missing file. if not os.path.exists(c): raise Exception("Missing source file: " + c) for source_path in source_paths: index = c.rfind(source_path) # print(c) if index != -1: # Remove first part of the path, we don't want to match # against paths in Blender's repo. # print(source_path) c_strip = c[index:] for regex in regex_list: if regex.match(c_strip) is not None: return True return False # Filter out build args. args_orig_len = len(args) args_with_cwd = [ (c, *split_build_args_with_cwd(build_args_str)) for (c, build_args_str) in args if test_path(c) ] del args print("Operating on %d of %d files..." % (len(args_with_cwd), args_orig_len)) for (c, build_args, build_cwd) in args_with_cwd: print(" ", c) del args_orig_len edit_generator_class = edit_class_from_id(edit_to_apply) shared_edit_data = edit_generator_class.setup() try: if USE_MULTIPROCESS: args_expanded = [( c, output_from_build_args(build_args, build_cwd), build_args, build_cwd, edit_to_apply, skip_test, shared_edit_data, ) for (c, build_args, build_cwd) in args_with_cwd] import multiprocessing job_total = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=job_total * 2) pool.starmap(wash_source_with_edits, args_expanded) del args_expanded else: # now we have commands for c, build_args, build_cwd in args_with_cwd: wash_source_with_edits( c, output_from_build_args(build_args, build_cwd), build_args, build_cwd, edit_to_apply, skip_test, shared_edit_data, ) except Exception as ex: raise ex finally: edit_generator_class.teardown(shared_edit_data) print("\n" "Exit without errors") return 0 def create_parser() -> argparse.ArgumentParser: from textwrap import indent, dedent edits_all = edit_function_get_all() # Create docstring for edits. edits_all_docs = [] for edit in edits_all: edits_all_docs.append( " %s\n%s" % ( edit, indent(dedent(getattr(edit_generators, edit).__doc__ or '').strip('\n') + '\n', ' '), ) ) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument( "build_dir", help="list of files or directories to check", ) parser.add_argument( "--match", nargs='+', required=True, metavar="REGEX", help="Match file paths against this expression", ) parser.add_argument( "--edit", dest="edit", choices=edits_all, help="Specify the edit preset to run.\n\n" + "\n".join(edits_all_docs) + "\n", required=True, ) parser.add_argument( "--skip-test", dest="skip_test", default=False, action='store_true', help=( "Perform all edits without testing if they perform functional changes. " "Use to quickly preview edits, or to perform edits which are manually checked (default=False)" ), required=False, ) return parser def main() -> int: parser = create_parser() args = parser.parse_args() build_dir = args.build_dir regex_list = [] for expr in args.match: try: regex_list.append(re.compile(expr)) except Exception as ex: print(f"Error in expression: {expr}\n {ex}") return 1 return run_edits_on_directory(build_dir, regex_list, args.edit, args.skip_test) if __name__ == "__main__": sys.exit(main())