1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
import os
from pathlib import Path
from .stats import Statistics
IGNORED_FOLDERS = [
'venv',
'build',
'dist',
'generated',
'generated-src',
]
def find_all_files(folder: Path, verbose: bool = False):
def gen():
for subpath, subfolders, filenames in os.walk(str(folder)):
subfolders[:] = [subfolder for subfolder in subfolders if
not (subfolder.startswith('.') or subfolder.lower() in IGNORED_FOLDERS)]
current_path = Path(subpath)
for file in filenames:
file_path = current_path / file
if not file.startswith('.'):
yield file_path
if verbose:
print('Scanning', file_path)
else:
if verbose:
print('Skipping', file_path)
return list(gen())
def find_stats_for_file(filename: Path, name: str, stats: Statistics):
extension = filename.name.split('.')[-1]
try:
for line in filename.read_text().split('\n'):
spaces = False
tabs = False
while len(line) > 0 and line[0] in [' ', '\t']:
if line[0] == ' ':
spaces = True
if line[0] == '\t':
tabs = True
line = line[1:]
if spaces and tabs:
stats.add_mixed_line(extension=extension,
filename=name)
elif spaces:
stats.add_spaces(extension=extension)
elif tabs:
stats.add_tabs(extension=extension)
except (UnicodeDecodeError, OSError):
pass
def find_stats(folder: Path, verbose: bool = False) -> Statistics:
folder = folder.resolve()
files = find_all_files(folder, verbose=verbose)
stats = Statistics()
for file in files:
find_stats_for_file(folder / file, file, stats)
return stats
|