aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--lib/argparse/pathtype.py63
-rw-r--r--lib/find_stats.py59
-rw-r--r--lib/print_stats.py17
-rw-r--r--lib/stats.py26
-rwxr-xr-xmain.py37
6 files changed, 204 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e265113
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+report
+venv/
diff --git a/lib/argparse/pathtype.py b/lib/argparse/pathtype.py
new file mode 100644
index 0000000..39128cc
--- /dev/null
+++ b/lib/argparse/pathtype.py
@@ -0,0 +1,63 @@
+# As usual: copied form stack overflow
+# https://stackoverflow.com/a/33181083
+
+from argparse import ArgumentTypeError as err
+import os
+
+
+class PathType(object):
+ def __init__(self, exists=True, type='file', dash_ok=True):
+ """exists:
+ True: a path that does exist
+ False: a path that does not exist, in a valid parent directory
+ None: don't care
+ type: file, dir, symlink, None, or a function returning True for valid paths
+ None: don't care
+ dash_ok: whether to allow "-" as stdin/stdout"""
+
+ assert exists in (True, False, None)
+ assert type in ('file', 'dir', 'symlink', None) or hasattr(type, '__call__')
+
+ self._exists = exists
+ self._type = type
+ self._dash_ok = dash_ok
+
+ def __call__(self, string):
+ if string == '-':
+ # the special argument "-" means sys.std{in,out}
+ if self._type == 'dir':
+ raise err('standard input/output (-) not allowed as directory path')
+ elif self._type == 'symlink':
+ raise err('standard input/output (-) not allowed as symlink path')
+ elif not self._dash_ok:
+ raise err('standard input/output (-) not allowed')
+ else:
+ e = os.path.exists(string)
+ if self._exists:
+ if not e:
+ raise err("path does not exist: '%s'" % string)
+
+ if self._type is None:
+ pass
+ elif self._type == 'file':
+ if not os.path.isfile(string):
+ raise err("path is not a file: '%s'" % string)
+ elif self._type == 'symlink':
+ if not os.path.symlink(string):
+ raise err("path is not a symlink: '%s'" % string)
+ elif self._type == 'dir':
+ if not os.path.isdir(string):
+ raise err("path is not a directory: '%s'" % string)
+ elif not self._type(string):
+ raise err("path not valid: '%s'" % string)
+ else:
+ if not self._exists and e:
+ raise err("path exists: '%s'" % string)
+
+ p = os.path.dirname(os.path.normpath(string)) or '.'
+ if not os.path.isdir(p):
+ raise err("parent path is not a directory: '%s'" % p)
+ elif not os.path.exists(p):
+ raise err("parent directory does not exist: '%s'" % p)
+
+ return string
diff --git a/lib/find_stats.py b/lib/find_stats.py
new file mode 100644
index 0000000..effb5b7
--- /dev/null
+++ b/lib/find_stats.py
@@ -0,0 +1,59 @@
+import os
+from pathlib import Path
+
+from .stats import Statistics
+
+IGNORED_FOLDERS = [
+ 'venv',
+]
+
+
+def find_all_files(folder: Path, verbose: bool = False):
+ def gen():
+ for subpath, subfolders, filenames in os.walk(str(folder)):
+ subfolders[:] = [subfolder for subfolder in subfolders if
+ not (subfolder.startswith('.') or subfolder.lower() in IGNORED_FOLDERS)]
+ current_path = Path(subpath)
+ for file in filenames:
+ file_path = current_path / file
+ if not file.startswith('.'):
+ yield file_path
+ if verbose:
+ print('Scanning', file_path)
+ else:
+ if verbose:
+ print('Skipping', file_path)
+
+ return list(gen())
+
+
+def find_stats_for_file(filename: Path, name: str, stats: Statistics):
+ extension = filename.name.split('.')[-1]
+ try:
+ for line in filename.read_text().split('\n'):
+ spaces = False
+ tabs = False
+ while len(line) > 0 and line[0] in [' ', '\t']:
+ if line[0] == ' ':
+ spaces = True
+ if line[0] == '\t':
+ tabs = True
+ line = line[1:]
+ if spaces and tabs:
+ stats.add_mixed_line(extension=extension,
+ filename=name)
+ elif spaces:
+ stats.add_spaces(extension=extension)
+ elif tabs:
+ stats.add_tabs(extension=extension)
+ except (UnicodeDecodeError, OSError):
+ pass
+
+
+def find_stats(folder: Path, verbose: bool = False) -> Statistics:
+ folder = folder.resolve()
+ files = find_all_files(folder, verbose=verbose)
+ stats = Statistics()
+ for file in files:
+ find_stats_for_file(folder / file, file, stats)
+ return stats
diff --git a/lib/print_stats.py b/lib/print_stats.py
new file mode 100644
index 0000000..9616caa
--- /dev/null
+++ b/lib/print_stats.py
@@ -0,0 +1,17 @@
+from lib.stats import Statistics
+
+
+def print_stats(stats: Statistics, by_extension: bool):
+ print('spaces:', stats.all_spaces)
+ print('tabs:', stats.all_tabs)
+ print('mixed:', stats.all_mixed)
+ if by_extension:
+ for ext in set(stats.space_dict.keys()) | stats.tab_dict.keys() | stats.mixed_line_dict.keys():
+ print(ext + ':')
+ print(' ', 'spaces:', stats.space_dict[ext])
+ print(' ', 'tabs:', stats.tab_dict[ext])
+ print(' ', 'mixed:', stats.mixed_line_dict[ext])
+ if stats.all_mixed > 0:
+ print('files_with_mixed_lines:')
+ for file in stats.mixed_files:
+ print(' -', file)
diff --git a/lib/stats.py b/lib/stats.py
new file mode 100644
index 0000000..42e4461
--- /dev/null
+++ b/lib/stats.py
@@ -0,0 +1,26 @@
+from collections import defaultdict
+from typing import Set
+
+
+class Statistics:
+ def __init__(self):
+ self.space_dict = defaultdict(int)
+ self.tab_dict = defaultdict(int)
+ self.mixed_line_dict = defaultdict(int)
+ self.mixed_files: Set[str] = set()
+ self.all_tabs = 0
+ self.all_spaces = 0
+ self.all_mixed = 0
+
+ def add_spaces(self, extension='', count=1):
+ self.space_dict[extension] += count
+ self.all_spaces += count
+
+ def add_tabs(self, extension='', count=1):
+ self.tab_dict[extension] += count
+ self.all_tabs += count
+
+ def add_mixed_line(self, extension='', count=1, filename=''):
+ self.mixed_line_dict[extension] += count
+ self.mixed_files.add(filename)
+ self.all_mixed += count
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..ff8ad95
--- /dev/null
+++ b/main.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3.6
+
+import argparse
+from pathlib import Path
+
+from lib.argparse.pathtype import PathType
+from lib.find_stats import find_stats
+from lib.print_stats import print_stats
+from lib.stats import Statistics
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ prog='tabsvsspaces',
+ description='Shows statistics about the usage of tabs and spaces in a given folder'
+ )
+ parser.add_argument('folder',
+ type=PathType(type='dir', exists=True))
+ parser.add_argument('--by-extension', '-e',
+ dest='extension',
+ action='store_true',
+ help='show distribution by file extension'
+ )
+ parser.add_argument('--verbose', '-v',
+ dest='verbose',
+ action='store_true',
+ help='show debug information')
+ ns = parser.parse_args()
+ folder: str = ns.folder
+ extension: bool = ns.extension
+ verbose: bool = ns.verbose
+ stats: Statistics = find_stats(Path(folder), verbose=verbose)
+ print_stats(stats, extension)
+
+
+if __name__ == '__main__':
+ main()