Skip to content
Snippets Groups Projects
Commit adb5af0c authored by Campbell Barton's avatar Campbell Barton
Browse files

check_spelling: Scan code for terms & skip reporting these

Comments often refer to structs & variable names,
skip these when reporting spelling errors.
parent aa9cc189
No related branches found
No related tags found
No related merge requests found
...@@ -53,6 +53,9 @@ from check_spelling_c_config import ( ...@@ -53,6 +53,9 @@ from check_spelling_c_config import (
) )
import re
re_vars = re.compile("[A-Za-z]+")
def words_from_text(text): def words_from_text(text):
""" Extract words to treat as English for spell checking. """ Extract words to treat as English for spell checking.
""" """
...@@ -144,19 +147,25 @@ def extract_py_comments(filepath): ...@@ -144,19 +147,25 @@ def extract_py_comments(filepath):
source = open(filepath, encoding='utf-8') source = open(filepath, encoding='utf-8')
comments = [] comments = []
code_words = set()
prev_toktype = token.INDENT prev_toktype = token.INDENT
tokgen = tokenize.generate_tokens(source.readline) tokgen = tokenize.generate_tokens(source.readline)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen: for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if toktype == token.STRING and prev_toktype == token.INDENT: if toktype == token.STRING:
if prev_toktype == token.INDENT:
comments.append(Comment(filepath, ttext, slineno, 'DOCSTRING')) comments.append(Comment(filepath, ttext, slineno, 'DOCSTRING'))
elif toktype == tokenize.COMMENT: elif toktype == tokenize.COMMENT:
# non standard hint for commented CODE that we can ignore # non standard hint for commented CODE that we can ignore
if not ttext.startswith("#~"): if not ttext.startswith("#~"):
comments.append(Comment(filepath, ttext, slineno, 'COMMENT')) comments.append(Comment(filepath, ttext, slineno, 'COMMENT'))
else:
for match in re_vars.finditer(ttext):
code_words.add(match.group(0))
prev_toktype = toktype prev_toktype = toktype
return comments return comments, code_words
def extract_c_comments(filepath): def extract_c_comments(filepath):
...@@ -167,7 +176,6 @@ def extract_c_comments(filepath): ...@@ -167,7 +176,6 @@ def extract_c_comments(filepath):
* This is a multi-line comment, notice the '*'s are aligned. * This is a multi-line comment, notice the '*'s are aligned.
*/ */
""" """
i = 0
text = open(filepath, encoding='utf-8').read() text = open(filepath, encoding='utf-8').read()
BEGIN = "/*" BEGIN = "/*"
...@@ -210,19 +218,53 @@ def extract_c_comments(filepath): ...@@ -210,19 +218,53 @@ def extract_c_comments(filepath):
break break
block_split[i] = l block_split[i] = l
comments = [] comment_ranges = []
while i >= 0: i = 0
while i != -1:
i = text.find(BEGIN, i) i = text.find(BEGIN, i)
if i != -1: if i != -1:
i_next = text.find(END, i) i_next = text.find(END, i)
if i_next != -1: if i_next != -1:
# Not essential but seek back to find beginning of line.
# not essential but seek ack to find beginning of line
while i > 0 and text[i - 1] in {"\t", " "}: while i > 0 and text[i - 1] in {"\t", " "}:
i -= 1 i -= 1
i_next += len(END)
comment_ranges.append((i, i_next))
i = i_next
else:
pass
block = text[i:i_next + len(END)] # Collect variables from code, so we can reference variables from code blocks
# without this generating noise from the spell checker.
code_ranges = []
if not comment_ranges:
code_ranges.append((0, len(text)))
else:
for index in range(len(comment_ranges) + 1):
if index == 0:
i_prev = 0
else:
i_prev = comment_ranges[index - 1][1]
if index == len(comment_ranges):
i_next = len(text)
else:
i_next = comment_ranges[index][0]
code_ranges.append((i_prev, i_next))
code_words = set()
for i, i_next in code_ranges:
for match in re_vars.finditer(text[i:i_next]):
code_words.add(match.group(0))
comments = []
for i, i_next in comment_ranges:
block = text[i:i_next]
# add whitespace in front of the block (for alignment test) # add whitespace in front of the block (for alignment test)
ws = [] ws = []
...@@ -276,19 +318,15 @@ def extract_c_comments(filepath): ...@@ -276,19 +318,15 @@ def extract_c_comments(filepath):
else: else:
print(filepath + ":" + str(lineno) + ":") print(filepath + ":" + str(lineno) + ":")
i = i_next return comments, code_words
else:
pass
return comments
def spell_check_comments(filepath): def spell_check_comments(filepath):
if filepath.endswith(".py"): if filepath.endswith(".py"):
comment_list = extract_py_comments(filepath) comment_list, code_words = extract_py_comments(filepath)
else: else:
comment_list = extract_c_comments(filepath) comment_list, code_words = extract_c_comments(filepath)
for comment in comment_list: for comment in comment_list:
for w in comment.parse(): for w in comment.parse():
...@@ -301,6 +339,12 @@ def spell_check_comments(filepath): ...@@ -301,6 +339,12 @@ def spell_check_comments(filepath):
if not dict_spelling.check(w): if not dict_spelling.check(w):
# Ignore literals that show up in code,
# gets rid of a lot of noise from comments that reference variables.
if w in code_words:
# print("Skipping", w)
continue
if ONLY_ONCE: if ONLY_ONCE:
if w_lower in _only_once_ids: if w_lower in _only_once_ids:
continue continue
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment