aboutsummaryrefslogtreecommitdiff
path: root/lib/find_stats.py
blob: effb5b765e3d3f306b66394371a03785b763042f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from pathlib import Path

from .stats import Statistics

IGNORED_FOLDERS = [
    'venv',
]


def find_all_files(folder: Path, verbose: bool = False):
    def gen():
        for subpath, subfolders, filenames in os.walk(str(folder)):
            subfolders[:] = [subfolder for subfolder in subfolders if
                             not (subfolder.startswith('.') or subfolder.lower() in IGNORED_FOLDERS)]
            current_path = Path(subpath)
            for file in filenames:
                file_path = current_path / file
                if not file.startswith('.'):
                    yield file_path
                    if verbose:
                        print('Scanning', file_path)
                else:
                    if verbose:
                        print('Skipping', file_path)

    return list(gen())


def find_stats_for_file(filename: Path, name: str, stats: Statistics):
    extension = filename.name.split('.')[-1]
    try:
        for line in filename.read_text().split('\n'):
            spaces = False
            tabs = False
            while len(line) > 0 and line[0] in [' ', '\t']:
                if line[0] == ' ':
                    spaces = True
                if line[0] == '\t':
                    tabs = True
                line = line[1:]
            if spaces and tabs:
                stats.add_mixed_line(extension=extension,
                                     filename=name)
            elif spaces:
                stats.add_spaces(extension=extension)
            elif tabs:
                stats.add_tabs(extension=extension)
    except (UnicodeDecodeError, OSError):
        pass


def find_stats(folder: Path, verbose: bool = False) -> Statistics:
    folder = folder.resolve()
    files = find_all_files(folder, verbose=verbose)
    stats = Statistics()
    for file in files:
        find_stats_for_file(folder / file, file, stats)
    return stats