|
1 # Copyright (C) 2009 Google Inc. All rights reserved. |
|
2 # Copyright (C) 2010 Chris Jerdonek (chris.jerdonek@gmail.com) |
|
3 # Copyright (C) 2010 ProFUSION embedded systems |
|
4 # |
|
5 # Redistribution and use in source and binary forms, with or without |
|
6 # modification, are permitted provided that the following conditions are |
|
7 # met: |
|
8 # |
|
9 # * Redistributions of source code must retain the above copyright |
|
10 # notice, this list of conditions and the following disclaimer. |
|
11 # * Redistributions in binary form must reproduce the above |
|
12 # copyright notice, this list of conditions and the following disclaimer |
|
13 # in the documentation and/or other materials provided with the |
|
14 # distribution. |
|
15 # * Neither the name of Google Inc. nor the names of its |
|
16 # contributors may be used to endorse or promote products derived from |
|
17 # this software without specific prior written permission. |
|
18 # |
|
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
30 |
|
31 """Supports reading and processing text files.""" |
|
32 |
|
33 import codecs |
|
34 import logging |
|
35 import os |
|
36 import sys |
|
37 |
|
38 |
|
39 _log = logging.getLogger(__name__) |
|
40 |
|
41 |
|
42 class TextFileReader(object): |
|
43 |
|
44 """Supports reading and processing text files. |
|
45 |
|
46 Attributes: |
|
47 file_count: The total number of files passed to this instance |
|
48 for processing, including non-text files and files |
|
49 that should be skipped. |
|
50 |
|
51 """ |
|
52 |
|
53 def __init__(self, processor): |
|
54 """Create an instance. |
|
55 |
|
56 Arguments: |
|
57 processor: A ProcessorBase instance. |
|
58 |
|
59 """ |
|
60 self._processor = processor |
|
61 self.file_count = 0 |
|
62 |
|
63 def _read_lines(self, file_path): |
|
64 """Read the file at a path, and return its lines. |
|
65 |
|
66 Raises: |
|
67 IOError: If the file does not exist or cannot be read. |
|
68 |
|
69 """ |
|
70 # Support the UNIX convention of using "-" for stdin. |
|
71 if file_path == '-': |
|
72 file = codecs.StreamReaderWriter(sys.stdin, |
|
73 codecs.getreader('utf8'), |
|
74 codecs.getwriter('utf8'), |
|
75 'replace') |
|
76 else: |
|
77 # We do not open the file with universal newline support |
|
78 # (codecs does not support it anyway), so the resulting |
|
79 # lines contain trailing "\r" characters if we are reading |
|
80 # a file with CRLF endings. |
|
81 file = codecs.open(file_path, 'r', 'utf8', 'replace') |
|
82 |
|
83 try: |
|
84 contents = file.read() |
|
85 finally: |
|
86 file.close() |
|
87 |
|
88 lines = contents.split('\n') |
|
89 return lines |
|
90 |
|
91 def process_file(self, file_path, **kwargs): |
|
92 """Process the given file by calling the processor's process() method. |
|
93 |
|
94 Args: |
|
95 file_path: The path of the file to process. |
|
96 **kwargs: Any additional keyword parameters that should be passed |
|
97 to the processor's process() method. The process() |
|
98 method should support these keyword arguments. |
|
99 |
|
100 Raises: |
|
101 SystemExit: If no file at file_path exists. |
|
102 |
|
103 """ |
|
104 self.file_count += 1 |
|
105 |
|
106 if not os.path.exists(file_path) and file_path != "-": |
|
107 _log.error("File does not exist: '%s'" % file_path) |
|
108 sys.exit(1) |
|
109 |
|
110 if not self._processor.should_process(file_path): |
|
111 _log.debug("Skipping file: '%s'" % file_path) |
|
112 return |
|
113 _log.debug("Processing file: '%s'" % file_path) |
|
114 |
|
115 try: |
|
116 lines = self._read_lines(file_path) |
|
117 except IOError, err: |
|
118 message = ("Could not read file. Skipping: '%s'\n %s" |
|
119 % (file_path, err)) |
|
120 _log.warn(message) |
|
121 return |
|
122 |
|
123 self._processor.process(lines, file_path, **kwargs) |
|
124 |
|
125 def _process_directory(self, directory): |
|
126 """Process all files in the given directory, recursively. |
|
127 |
|
128 Args: |
|
129 directory: A directory path. |
|
130 |
|
131 """ |
|
132 for dir_path, dir_names, file_names in os.walk(directory): |
|
133 for file_name in file_names: |
|
134 file_path = os.path.join(dir_path, file_name) |
|
135 self.process_file(file_path) |
|
136 |
|
137 def process_paths(self, paths): |
|
138 """Process the given file and directory paths. |
|
139 |
|
140 Args: |
|
141 paths: A list of file and directory paths. |
|
142 |
|
143 """ |
|
144 for path in paths: |
|
145 if os.path.isdir(path): |
|
146 self._process_directory(directory=path) |
|
147 else: |
|
148 self.process_file(path) |