diff --git a/check_source/check_spelling.py b/check_source/check_spelling.py index 728fe97ad93e1b2732d56d4e1dcb0cec1af45d1d..43f276b1871f5893607938d978a5ccf51be1fd7e 100755 --- a/check_source/check_spelling.py +++ b/check_source/check_spelling.py @@ -70,15 +70,59 @@ else: COLOR_WORD = "" COLOR_ENDC = "" - -import enchant -dict_spelling = enchant.Dict("en_US") - from check_spelling_c_config import ( dict_custom, dict_ignore, + dict_ignore_hyphenated_prefix, ) +# ----------------------------------------------------------------------------- +# Dictionary Utilities + +def dictionary_create(): # type: ignore + import enchant # type: ignore + dict_spelling = enchant.Dict("en_US") + + + # Don't add ignore to the dictionary, since they will be suggested. + for w in dict_custom: + dict_spelling.add(w) + return dict_spelling + + +def dictionary_check(w: str) -> bool: + w_lower = w.lower() + if w_lower in dict_ignore: + return True + + is_correct: bool = _dict.check(w) + # Split by hyphenation and check. + if not is_correct: + if "-" in w: + is_correct = True + + # Allow: `un-word`, `re-word`. + w_split = w.strip("-").split("-") + if w_split and w_split[0].lower() in dict_ignore_hyphenated_prefix: + del w_split[0] + + for w_sub in w_split: + if w_sub: + w_sub_lower = w_sub.lower() + if w_sub_lower in dict_ignore: + continue + if not _dict.check(w_sub): + is_correct = False + break + return is_correct + + +def dictionary_suggest(w: str) -> List[str]: + return _dict.suggest(w) # type: ignore + + +_dict = dictionary_create() # type: ignore + # ----------------------------------------------------------------------------- # General Utilities @@ -207,9 +251,9 @@ class Comment: def extract_code_strings(filepath: str) -> Tuple[List[Comment], Set[str]]: - import pygments + import pygments # type: ignore from pygments import lexers - from pygments.token import Token + from pygments.token import Token # type: ignore comments = [] code_words = set() @@ -375,7 +419,7 @@ def spell_check_report(filepath: str, report: Report) -> None: suggest = _suggest_map.get(w_lower) if suggest is None: - _suggest_map[w_lower] = suggest = " ".join(dict_spelling.suggest(w)) + _suggest_map[w_lower] = suggest = " ".join(dictionary_suggest(w)) print("%s:%d:%d: %s%s%s, suggest (%s)" % ( filepath, @@ -403,19 +447,10 @@ def spell_check_file( for comment in comment_list: for w, pos in comment.parse(): w_lower = w.lower() - if w_lower in dict_custom or w_lower in dict_ignore: + if w_lower in dict_ignore: continue - is_good_spelling = dict_spelling.check(w) - if not is_good_spelling: - if "-" in w: - is_good_spelling = True - for w_sub in w.split("-"): - if w_sub: - if not dict_spelling.check(w_sub): - is_good_spelling = False - break - + is_good_spelling = dictionary_check(w) if not is_good_spelling: # Ignore literals that show up in code, # gets rid of a lot of noise from comments that reference variables. diff --git a/check_source/check_spelling_c_config.py b/check_source/check_spelling_c_config.py index ad626ea11fb12d8eeb1cabc901e52eec71ac9e72..4adbc7898775332076067c072a48f1f9a69982a4 100644 --- a/check_source/check_spelling_c_config.py +++ b/check_source/check_spelling_c_config.py @@ -111,7 +111,7 @@ dict_custom = { "parameterization", "parentless", "passepartout", - "pixelated", "pixelisation", + "pixelate", "pixelated", "pixelisation", "planarity", "polytope", "postprocessed", @@ -422,3 +422,14 @@ dict_ignore = { "bugprone-suspicious-enum-usage", "bugprone-use-after-move", } + +# Allow: `un-word`, `re-word` ... etc, in this case only check `word`. +dict_ignore_hyphenated_prefix = { + "de", + "mis", + "non", + "post", + "pre", + "re", + "un", +}