Commit 4186fe8f authored by Campbell Barton's avatar Campbell Barton
Browse files

Initial code-clean utility

Perform automated edits to source files which are validated to produce
identical binary output.

Currently this has only been tested to work with GCC on Linux.
parent ea2a2fa5
#!/usr/bin/env python3
# ##### BEGIN GPL LICENSE BLOCK #####
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# ##### END GPL LICENSE BLOCK #####
# <pep8-80 compliant>
"""
Example:
./source/tools/utils/code_clean.py /src/cmake_debug --match ".*/editmesh_.*"
Note: currently this is limited to paths in "source/" and "intern/",
we could change this if it's needed.
"""
import re
import subprocess
import sys
import os
USE_MULTIPROCESS = False
VERBOSE = True
# Print the output of the compiler (_very_ noisy, only useful for troubleshooting compiler issues).
VERBOSE_COMPILER = False
# -----------------------------------------------------------------------------
# General Utilities
# Note that we could use a hash, however there is no advantage, compare it's contents.
def file_as_bytes(filename):
with open(filename, 'rb') as fh:
return fh.read()
def line_from_span(text, start, end):
while start > 0 and text[start - 1] != '\n':
start -= 1
while end < len(text) and text[end] != '\n':
end += 1
return text[start:end]
# -----------------------------------------------------------------------------
# Execution Wrappers
def run(args, *, quiet):
if VERBOSE_COMPILER and not quiet:
out = sys.stdout
else:
out = subprocess.DEVNULL
import shlex
p = subprocess.Popen(shlex.split(args), stdout=out, stderr=out)
p.wait()
return p.returncode
# -----------------------------------------------------------------------------
# Build System Access
def cmake_cache_var(cmake_dir, var):
cache_file = open(os.path.join(cmake_dir, "CMakeCache.txt"), encoding='utf-8')
lines = [l_strip for l in cache_file for l_strip in (l.strip(),)
if l_strip if not l_strip.startswith("//") if not l_strip.startswith("#")]
cache_file.close()
for l in lines:
if l.split(":")[0] == var:
return l.split("=", 1)[-1]
return None
RE_CFILE_SEARCH = re.compile(r"\s\-c\s([\S]+)")
def process_commands(cmake_dir, data):
compiler_c = cmake_cache_var(cmake_dir, "CMAKE_C_COMPILER")
compiler_cxx = cmake_cache_var(cmake_dir, "CMAKE_CXX_COMPILER")
file_args = []
for l in data:
if (
(compiler_c in l) or
(compiler_cxx in l)
):
# Extract:
# -c SOME_FILE
c_file_search = re.search(RE_CFILE_SEARCH, l)
if c_file_search is not None:
c_file = c_file_search.group(1)
file_args.append((c_file, l))
else:
# could print, NO C FILE FOUND?
pass
file_args.sort()
return file_args
def find_build_args_ninja(build_dir):
cmake_dir = build_dir
make_exe = "ninja"
process = subprocess.Popen(
[make_exe, "-t", "commands"],
stdout=subprocess.PIPE,
cwd=build_dir,
)
while process.poll():
time.sleep(1)
out = process.stdout.read()
process.stdout.close()
# print("done!", len(out), "bytes")
data = out.decode("utf-8", errors="ignore").split("\n")
return process_commands(cmake_dir, data)
def find_build_args_make(build_dir):
make_exe = "make"
process = subprocess.Popen(
[make_exe, "--always-make", "--dry-run", "--keep-going", "VERBOSE=1"],
stdout=subprocess.PIPE,
cwd=build_dir,
)
while process.poll():
time.sleep(1)
out = process.stdout.read()
process.stdout.close()
# print("done!", len(out), "bytes")
data = out.decode("utf-8", errors="ignore").split("\n")
return process_commands(cmake_dir, data)
# -----------------------------------------------------------------------------
# Create Edit Lists
# Create an edit list from a file, in the format:
#
# [((start_index, end_index), text_to_replace), ...]
#
# Note that edits should not overlap, in the _very_ rare case overlapping edits are needed,
# this could be run multiple times on the same code-base.
#
# Although this seems like it's not a common use-case.
def edit_list_from_file__sizeof_fixed_array(_source, data):
edits = []
for match in re.finditer(r"sizeof\(([a-zA-Z_]+)\) \* (\d+) \* (\d+)", data):
edits.append((
match.span(),
'sizeof(%s[%s][%s])' % (match.group(1), match.group(2), match.group(3)),
'__ALWAYS_FAIL__',
))
for match in re.finditer(r"sizeof\(([a-zA-Z_]+)\) \* (\d+)", data):
edits.append((
match.span(),
'sizeof(%s[%s])' % (match.group(1), match.group(2)),
'__ALWAYS_FAIL__',
))
for match in re.finditer(r"\b(\d+) \* sizeof\(([a-zA-Z_]+)\)", data):
edits.append((
match.span(),
'sizeof(%s[%s])' % (match.group(2), match.group(1)),
'__ALWAYS_FAIL__',
))
return edits
def edit_list_from_file__use_const(_source, data):
edits = []
# Replace:
# float abc[3] = {0, 1, 2};
# With:
# const float abc[3] = {0, 1, 2};
for match in re.finditer(r"(\(|, | )([a-zA-Z_0-9]+ [a-zA-Z_0-9]+\[)\b([^\n]+ = )", data):
edits.append((
match.span(),
'%s const %s%s' % (match.group(1), match.group(2), match.group(3)),
'__ALWAYS_FAIL__',
))
# Replace:
# float abc[3]
# With:
# const float abc[3]
for match in re.finditer(r"(\(|, )([a-zA-Z_0-9]+ [a-zA-Z_0-9]+\[)", data):
edits.append((
match.span(),
'%s const %s' % (match.group(1), match.group(2)),
'__ALWAYS_FAIL__',
))
return edits
def edit_list_from_file__use_const_vars(_source, data):
edits = []
# Replace:
# float abc[3] = {0, 1, 2};
# With:
# const float abc[3] = {0, 1, 2};
# for match in re.finditer(r"( [a-zA-Z0-9_]+ [a-zA-Z0-9_]+ = [A-Z][A-Z_0-9_]*;)", data):
# edits.append((
# match.span(),
# 'const %s' % (match.group(1).lstrip()),
# '__ALWAYS_FAIL__',
# ))
for match in re.finditer(r"( [a-zA-Z0-9_]+ [a-zA-Z0-9_]+ = .*;)", data):
edits.append((
match.span(),
'const %s' % (match.group(1).lstrip()),
'__ALWAYS_FAIL__',
))
return edits
def edit_list_from_file__return_parens(_source, data):
edits = []
# Remove `return (NULL);`
for match in re.finditer(r"return \(([a-zA-Z_0-9]+)\);", data):
edits.append((
match.span(),
'return %s;' % (match.group(1)),
'return __ALWAYS_FAIL__;',
))
return edits
def edit_list_from_file__use_streq_macro(_source, data):
edits = []
# Replace:
# strcmp(a, b) == 0
# With:
# STREQ(a, b)
for match in re.finditer(r"\bstrcmp\((.*)\) == 0", data):
edits.append((
match.span(),
'STREQ(%s)' % (match.group(1)),
'__ALWAYS_FAIL__',
))
for match in re.finditer(r"!strcmp\((.*)\)", data):
edits.append((
match.span(),
'STREQ(%s)' % (match.group(1)),
'__ALWAYS_FAIL__',
))
# Replace:
# strcmp(a, b) != 0
# With:
# !STREQ(a, b)
for match in re.finditer(r"\bstrcmp\((.*)\) != 0", data):
edits.append((
match.span(),
'!STREQ(%s)' % (match.group(1)),
'__ALWAYS_FAIL__',
))
for match in re.finditer(r"\bstrcmp\((.*)\)", data):
edits.append((
match.span(),
'!STREQ(%s)' % (match.group(1)),
'__ALWAYS_FAIL__',
))
return edits
def edit_list_from_file__use_array_size_macro(_source, data):
edits = []
# Replace:
# sizeof(foo) / sizeof(*foo)
# With:
# ARRAY_SIZE(foo)
#
# Note that this replacement is only valid in some cases,
# so only apply with validation that binary output matches.
for match in re.finditer(r"\bsizeof\((.*)\) / sizeof\([^\)]+\)", data):
edits.append((
match.span(),
'ARRAY_SIZE(%s)' % match.group(1),
'__ALWAYS_FAIL__',
))
return edits
def test_edit(source, output, output_bytes, build_args, data, data_test, keep_edits=True, expect_failure=False):
"""
Return true if `data_test` has the same object output as `data`.
"""
if os.path.exists(output):
os.remove(output)
with open(source, 'w', encoding='utf-8') as fh:
fh.write(data_test)
ret = run(build_args, quiet=expect_failure)
if ret == 0:
output_bytes_test = file_as_bytes(output)
if (output_bytes is None) or (file_as_bytes(output) == output_bytes):
if not keep_edits:
with open(source, 'w', encoding='utf-8') as fh:
fh.write(data)
return True
else:
print("Changed code, skip...", hex(hash(output_bytes)), hex(hash(output_bytes_test)))
else:
if not expect_failure:
print("Failed to compile, skip...")
with open(source, 'w', encoding='utf-8') as fh:
fh.write(data)
return False
# -----------------------------------------------------------------------------
# Accept / Reject Edits
def apply_edit(data, text_to_replace, start, end, *, verbose):
if verbose:
line_before = line_from_span(data, start, end)
data = data[:start] + text_to_replace + data[end:]
if verbose:
end += len(text_to_replace) - (end - start)
line_after = line_from_span(data, start, end)
print("")
print("Testing edit:")
print(line_before)
print(line_after)
return data
def wash_source_with_edits(arg_group):
(source, output, build_args, skip_test) = arg_group
# build_args = build_args + " -Werror=duplicate-decl-specifier"
with open(source, 'r', encoding='utf-8') as fh:
data = fh.read()
edits = edit_list_from_file__use_const_vars(source, data)
edits.sort(reverse=True)
if not edits:
return
if skip_test:
# Just apply all edits.
for (start, end), text, text_always_fail in edits:
data = apply_edit(data, text, start, end, verbose=VERBOSE)
with open(source, 'w', encoding='utf-8') as fh:
fh.write(data)
return
test_edit(
source, output, None, build_args, data, data,
keep_edits=False,
)
if not os.path.exists(output):
raise Exception("Failed to produce output file: " + output)
output_bytes = file_as_bytes(output)
for (start, end), text, text_always_fail in edits:
data_test = apply_edit(data, text, start, end, verbose=VERBOSE)
if test_edit(
source, output, output_bytes, build_args, data, data_test,
keep_edits=False,
):
# This worked, check if the change would fail if replaced with 'text_always_fail'.
data_test_always_fail = apply_edit(data, text_always_fail, start, end, verbose=False)
if test_edit(
source, output, output_bytes, build_args, data, data_test_always_fail,
expect_failure=True, keep_edits=False,
):
print("Edit at", (start, end), "doesn't fail, assumed to be ifdef'd out, continuing")
continue
# Apply the edit.
data = data_test
with open(source, 'w', encoding='utf-8') as fh:
fh.write(data)
# -----------------------------------------------------------------------------
# Edit Source Code From Args
def header_clean_all(build_dir, regex_list, skip_test=False):
# currently only supports ninja or makefiles
build_file_ninja = os.path.join(build_dir, "build.ninja")
build_file_make = os.path.join(build_dir, "Makefile")
if os.path.exists(build_file_ninja):
print("Using Ninja")
args = find_build_args_ninja(build_dir)
elif os.path.exists(build_file_make):
print("Using Make")
args = find_build_args_make(build_dir)
else:
sys.stderr.write(
"Can't find Ninja or Makefile (%r or %r), aborting" %
(build_file_ninja, build_file_make)
)
return
# needed for when arguments are referenced relatively
os.chdir(build_dir)
# Weak, but we probably don't want to handle extern.
# this limit could be removed.
source_paths = (
os.path.join("intern", "ghost"),
os.path.join("intern", "guardedalloc"),
os.path.join("source"),
)
def output_from_build_args(build_args):
import shlex
build_args = shlex.split(build_args)
i = build_args.index("-o")
return build_args[i + 1]
def test_path(c):
for source_path in source_paths:
index = c.rfind(source_path)
print(c)
if index != -1:
# Remove first part of the path, we don't want to match
# against paths in Blender's repo.
print(source_path)
c_strip = c[index:]
for regex in regex_list:
if regex.match(c_strip) is not None:
return True
return False
# Filter out build args.
args_orig_len = len(args)
args = [
(c, build_args)
for (c, build_args) in args
if test_path(c)
]
print("Operating on %d of %d files..." % (len(args), args_orig_len))
for (c, build_args) in args:
print(" ", c)
del args_orig_len
if USE_MULTIPROCESS:
args = [
(c, output_from_build_args(build_args), build_args, skip_test)
for (c, build_args) in args
]
import multiprocessing
job_total = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=job_total * 2)
pool.map(wash_source_with_edits, args)
else:
# now we have commands
for i, (c, build_args) in enumerate(args):
wash_source_with_edits(
(c, output_from_build_args(build_args), build_args, skip_test)
)
print("\n" "Exit without errors")
def create_parser():
import argparse
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
"build_dir",
help="list of files or directories to check",
)
parser.add_argument(
"--match",
nargs='+',
required=True,
metavar="REGEX",
help="Match file paths against this expression",
)
parser.add_argument(
"--skip-test",
dest="skip_test",
default=False,
action='store_true',
help=(
"Perform all edits without testing if they perform functional changes. "
"Use to quickly preview edits, or to perform edits which are manually checked (default=False)"
),
required=False,
)
return parser
def main():
parser = create_parser()
args = parser.parse_args()
build_dir = args.build_dir
regex_list = []
for i, expr in enumerate(args.match):
try:
regex_list.append(re.compile(expr))
except Exception as ex:
print(f"Error in expression: {expr}\n {ex}")
return 1
return header_clean_all(build_dir, regex_list, args.skip_test)
if __name__ == "__main__":
sys.exit(main())
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment