python-2.5.2/win32/Tools/Scripts/byext.py
changeset 0 ae805ac0140d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python-2.5.2/win32/Tools/Scripts/byext.py	Fri Apr 03 17:19:34 2009 +0100
@@ -0,0 +1,131 @@
+#! /usr/bin/env python
+
+"""Show file statistics by extension."""
+
+import os
+import sys
+
+class Stats:
+
+    def __init__(self):
+        self.stats = {}
+
+    def statargs(self, args):
+        for arg in args:
+            if os.path.isdir(arg):
+                self.statdir(arg)
+            elif os.path.isfile(arg):
+                self.statfile(arg)
+            else:
+                sys.stderr.write("Can't find %s\n" % arg)
+                self.addstats("<???>", "unknown", 1)
+
+    def statdir(self, dir):
+        self.addstats("<dir>", "dirs", 1)
+        try:
+            names = os.listdir(dir)
+        except os.error, err:
+            sys.stderr.write("Can't list %s: %s\n" % (dir, err))
+            self.addstats("<dir>", "unlistable", 1)
+            return
+        names.sort()
+        for name in names:
+            if name.startswith(".#"):
+                continue # Skip CVS temp files
+            if name.endswith("~"):
+                continue# Skip Emacs backup files
+            full = os.path.join(dir, name)
+            if os.path.islink(full):
+                self.addstats("<lnk>", "links", 1)
+            elif os.path.isdir(full):
+                self.statdir(full)
+            else:
+                self.statfile(full)
+
+    def statfile(self, filename):
+        head, ext = os.path.splitext(filename)
+        head, base = os.path.split(filename)
+        if ext == base:
+            ext = "" # E.g. .cvsignore is deemed not to have an extension
+        ext = os.path.normcase(ext)
+        if not ext:
+            ext = "<none>"
+        self.addstats(ext, "files", 1)
+        try:
+            f = open(filename, "rb")
+        except IOError, err:
+            sys.stderr.write("Can't open %s: %s\n" % (filename, err))
+            self.addstats(ext, "unopenable", 1)
+            return
+        data = f.read()
+        f.close()
+        self.addstats(ext, "bytes", len(data))
+        if '\0' in data:
+            self.addstats(ext, "binary", 1)
+            return
+        if not data:
+            self.addstats(ext, "empty", 1)
+        #self.addstats(ext, "chars", len(data))
+        lines = data.splitlines()
+        self.addstats(ext, "lines", len(lines))
+        del lines
+        words = data.split()
+        self.addstats(ext, "words", len(words))
+
+    def addstats(self, ext, key, n):
+        d = self.stats.setdefault(ext, {})
+        d[key] = d.get(key, 0) + n
+
+    def report(self):
+        exts = self.stats.keys()
+        exts.sort()
+        # Get the column keys
+        columns = {}
+        for ext in exts:
+            columns.update(self.stats[ext])
+        cols = columns.keys()
+        cols.sort()
+        colwidth = {}
+        colwidth["ext"] = max([len(ext) for ext in exts])
+        minwidth = 6
+        self.stats["TOTAL"] = {}
+        for col in cols:
+            total = 0
+            cw = max(minwidth, len(col))
+            for ext in exts:
+                value = self.stats[ext].get(col)
+                if value is None:
+                    w = 0
+                else:
+                    w = len("%d" % value)
+                    total += value
+                cw = max(cw, w)
+            cw = max(cw, len(str(total)))
+            colwidth[col] = cw
+            self.stats["TOTAL"][col] = total
+        exts.append("TOTAL")
+        for ext in exts:
+            self.stats[ext]["ext"] = ext
+        cols.insert(0, "ext")
+        def printheader():
+            for col in cols:
+                print "%*s" % (colwidth[col], col),
+            print
+        printheader()
+        for ext in exts:
+            for col in cols:
+                value = self.stats[ext].get(col, "")
+                print "%*s" % (colwidth[col], value),
+            print
+        printheader() # Another header at the bottom
+
+def main():
+    args = sys.argv[1:]
+    if not args:
+        args = [os.curdir]
+    s = Stats()
+    s.statargs(args)
+    s.report()
+
+if __name__ == "__main__":
+    main()