diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | lib/argparse/pathtype.py | 63 | ||||
-rw-r--r-- | lib/find_stats.py | 59 | ||||
-rw-r--r-- | lib/print_stats.py | 17 | ||||
-rw-r--r-- | lib/stats.py | 26 | ||||
-rwxr-xr-x | main.py | 37 |
6 files changed, 204 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e265113 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +report +venv/ diff --git a/lib/argparse/pathtype.py b/lib/argparse/pathtype.py new file mode 100644 index 0000000..39128cc --- /dev/null +++ b/lib/argparse/pathtype.py @@ -0,0 +1,63 @@ +# As usual: copied form stack overflow +# https://stackoverflow.com/a/33181083 + +from argparse import ArgumentTypeError as err +import os + + +class PathType(object): + def __init__(self, exists=True, type='file', dash_ok=True): + """exists: + True: a path that does exist + False: a path that does not exist, in a valid parent directory + None: don't care + type: file, dir, symlink, None, or a function returning True for valid paths + None: don't care + dash_ok: whether to allow "-" as stdin/stdout""" + + assert exists in (True, False, None) + assert type in ('file', 'dir', 'symlink', None) or hasattr(type, '__call__') + + self._exists = exists + self._type = type + self._dash_ok = dash_ok + + def __call__(self, string): + if string == '-': + # the special argument "-" means sys.std{in,out} + if self._type == 'dir': + raise err('standard input/output (-) not allowed as directory path') + elif self._type == 'symlink': + raise err('standard input/output (-) not allowed as symlink path') + elif not self._dash_ok: + raise err('standard input/output (-) not allowed') + else: + e = os.path.exists(string) + if self._exists: + if not e: + raise err("path does not exist: '%s'" % string) + + if self._type is None: + pass + elif self._type == 'file': + if not os.path.isfile(string): + raise err("path is not a file: '%s'" % string) + elif self._type == 'symlink': + if not os.path.symlink(string): + raise err("path is not a symlink: '%s'" % string) + elif self._type == 'dir': + if not os.path.isdir(string): + raise err("path is not a directory: '%s'" % string) + elif not self._type(string): + raise err("path not valid: '%s'" % string) + else: + if not self._exists and e: + raise err("path exists: '%s'" % string) + + p = os.path.dirname(os.path.normpath(string)) or '.' + if not os.path.isdir(p): + raise err("parent path is not a directory: '%s'" % p) + elif not os.path.exists(p): + raise err("parent directory does not exist: '%s'" % p) + + return string diff --git a/lib/find_stats.py b/lib/find_stats.py new file mode 100644 index 0000000..effb5b7 --- /dev/null +++ b/lib/find_stats.py @@ -0,0 +1,59 @@ +import os +from pathlib import Path + +from .stats import Statistics + +IGNORED_FOLDERS = [ + 'venv', +] + + +def find_all_files(folder: Path, verbose: bool = False): + def gen(): + for subpath, subfolders, filenames in os.walk(str(folder)): + subfolders[:] = [subfolder for subfolder in subfolders if + not (subfolder.startswith('.') or subfolder.lower() in IGNORED_FOLDERS)] + current_path = Path(subpath) + for file in filenames: + file_path = current_path / file + if not file.startswith('.'): + yield file_path + if verbose: + print('Scanning', file_path) + else: + if verbose: + print('Skipping', file_path) + + return list(gen()) + + +def find_stats_for_file(filename: Path, name: str, stats: Statistics): + extension = filename.name.split('.')[-1] + try: + for line in filename.read_text().split('\n'): + spaces = False + tabs = False + while len(line) > 0 and line[0] in [' ', '\t']: + if line[0] == ' ': + spaces = True + if line[0] == '\t': + tabs = True + line = line[1:] + if spaces and tabs: + stats.add_mixed_line(extension=extension, + filename=name) + elif spaces: + stats.add_spaces(extension=extension) + elif tabs: + stats.add_tabs(extension=extension) + except (UnicodeDecodeError, OSError): + pass + + +def find_stats(folder: Path, verbose: bool = False) -> Statistics: + folder = folder.resolve() + files = find_all_files(folder, verbose=verbose) + stats = Statistics() + for file in files: + find_stats_for_file(folder / file, file, stats) + return stats diff --git a/lib/print_stats.py b/lib/print_stats.py new file mode 100644 index 0000000..9616caa --- /dev/null +++ b/lib/print_stats.py @@ -0,0 +1,17 @@ +from lib.stats import Statistics + + +def print_stats(stats: Statistics, by_extension: bool): + print('spaces:', stats.all_spaces) + print('tabs:', stats.all_tabs) + print('mixed:', stats.all_mixed) + if by_extension: + for ext in set(stats.space_dict.keys()) | stats.tab_dict.keys() | stats.mixed_line_dict.keys(): + print(ext + ':') + print(' ', 'spaces:', stats.space_dict[ext]) + print(' ', 'tabs:', stats.tab_dict[ext]) + print(' ', 'mixed:', stats.mixed_line_dict[ext]) + if stats.all_mixed > 0: + print('files_with_mixed_lines:') + for file in stats.mixed_files: + print(' -', file) diff --git a/lib/stats.py b/lib/stats.py new file mode 100644 index 0000000..42e4461 --- /dev/null +++ b/lib/stats.py @@ -0,0 +1,26 @@ +from collections import defaultdict +from typing import Set + + +class Statistics: + def __init__(self): + self.space_dict = defaultdict(int) + self.tab_dict = defaultdict(int) + self.mixed_line_dict = defaultdict(int) + self.mixed_files: Set[str] = set() + self.all_tabs = 0 + self.all_spaces = 0 + self.all_mixed = 0 + + def add_spaces(self, extension='', count=1): + self.space_dict[extension] += count + self.all_spaces += count + + def add_tabs(self, extension='', count=1): + self.tab_dict[extension] += count + self.all_tabs += count + + def add_mixed_line(self, extension='', count=1, filename=''): + self.mixed_line_dict[extension] += count + self.mixed_files.add(filename) + self.all_mixed += count @@ -0,0 +1,37 @@ +#!/usr/bin/env python3.6 + +import argparse +from pathlib import Path + +from lib.argparse.pathtype import PathType +from lib.find_stats import find_stats +from lib.print_stats import print_stats +from lib.stats import Statistics + + +def main(): + parser = argparse.ArgumentParser( + prog='tabsvsspaces', + description='Shows statistics about the usage of tabs and spaces in a given folder' + ) + parser.add_argument('folder', + type=PathType(type='dir', exists=True)) + parser.add_argument('--by-extension', '-e', + dest='extension', + action='store_true', + help='show distribution by file extension' + ) + parser.add_argument('--verbose', '-v', + dest='verbose', + action='store_true', + help='show debug information') + ns = parser.parse_args() + folder: str = ns.folder + extension: bool = ns.extension + verbose: bool = ns.verbose + stats: Statistics = find_stats(Path(folder), verbose=verbose) + print_stats(stats, extension) + + +if __name__ == '__main__': + main() |