|
1 """mailerdaemon - classes to parse mailer-daemon messages""" |
|
2 |
|
3 import rfc822 |
|
4 import calendar |
|
5 import re |
|
6 import os |
|
7 import sys |
|
8 |
|
9 Unparseable = 'mailerdaemon.Unparseable' |
|
10 |
|
11 class ErrorMessage(rfc822.Message): |
|
12 def __init__(self, fp): |
|
13 rfc822.Message.__init__(self, fp) |
|
14 self.sub = '' |
|
15 |
|
16 def is_warning(self): |
|
17 sub = self.getheader('Subject') |
|
18 if not sub: |
|
19 return 0 |
|
20 sub = sub.lower() |
|
21 if sub.startswith('waiting mail'): return 1 |
|
22 if 'warning' in sub: return 1 |
|
23 self.sub = sub |
|
24 return 0 |
|
25 |
|
26 def get_errors(self): |
|
27 for p in EMPARSERS: |
|
28 self.rewindbody() |
|
29 try: |
|
30 return p(self.fp, self.sub) |
|
31 except Unparseable: |
|
32 pass |
|
33 raise Unparseable |
|
34 |
|
35 # List of re's or tuples of re's. |
|
36 # If a re, it should contain at least a group (?P<email>...) which |
|
37 # should refer to the email address. The re can also contain a group |
|
38 # (?P<reason>...) which should refer to the reason (error message). |
|
39 # If no reason is present, the emparse_list_reason list is used to |
|
40 # find a reason. |
|
41 # If a tuple, the tuple should contain 2 re's. The first re finds a |
|
42 # location, the second re is repeated one or more times to find |
|
43 # multiple email addresses. The second re is matched (not searched) |
|
44 # where the previous match ended. |
|
45 # The re's are compiled using the re module. |
|
46 emparse_list_list = [ |
|
47 'error: (?P<reason>unresolvable): (?P<email>.+)', |
|
48 ('----- The following addresses had permanent fatal errors -----\n', |
|
49 '(?P<email>[^ \n].*)\n( .*\n)?'), |
|
50 'remote execution.*\n.*rmail (?P<email>.+)', |
|
51 ('The following recipients did not receive your message:\n\n', |
|
52 ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), |
|
53 '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', |
|
54 '^<(?P<email>.*)>:\n(?P<reason>.*)', |
|
55 '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', |
|
56 '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', |
|
57 '^Original-Recipient: rfc822;(?P<email>.*)', |
|
58 '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', |
|
59 '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', |
|
60 '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', |
|
61 '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', |
|
62 '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', |
|
63 ] |
|
64 # compile the re's in the list and store them in-place. |
|
65 for i in range(len(emparse_list_list)): |
|
66 x = emparse_list_list[i] |
|
67 if type(x) is type(''): |
|
68 x = re.compile(x, re.MULTILINE) |
|
69 else: |
|
70 xl = [] |
|
71 for x in x: |
|
72 xl.append(re.compile(x, re.MULTILINE)) |
|
73 x = tuple(xl) |
|
74 del xl |
|
75 emparse_list_list[i] = x |
|
76 del x |
|
77 del i |
|
78 |
|
79 # list of re's used to find reasons (error messages). |
|
80 # if a string, "<>" is replaced by a copy of the email address. |
|
81 # The expressions are searched for in order. After the first match, |
|
82 # no more expressions are searched for. So, order is important. |
|
83 emparse_list_reason = [ |
|
84 r'^5\d{2} <>\.\.\. (?P<reason>.*)', |
|
85 '<>\.\.\. (?P<reason>.*)', |
|
86 re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), |
|
87 re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), |
|
88 re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), |
|
89 ] |
|
90 emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) |
|
91 def emparse_list(fp, sub): |
|
92 data = fp.read() |
|
93 res = emparse_list_from.search(data) |
|
94 if res is None: |
|
95 from_index = len(data) |
|
96 else: |
|
97 from_index = res.start(0) |
|
98 errors = [] |
|
99 emails = [] |
|
100 reason = None |
|
101 for regexp in emparse_list_list: |
|
102 if type(regexp) is type(()): |
|
103 res = regexp[0].search(data, 0, from_index) |
|
104 if res is not None: |
|
105 try: |
|
106 reason = res.group('reason') |
|
107 except IndexError: |
|
108 pass |
|
109 while 1: |
|
110 res = regexp[1].match(data, res.end(0), from_index) |
|
111 if res is None: |
|
112 break |
|
113 emails.append(res.group('email')) |
|
114 break |
|
115 else: |
|
116 res = regexp.search(data, 0, from_index) |
|
117 if res is not None: |
|
118 emails.append(res.group('email')) |
|
119 try: |
|
120 reason = res.group('reason') |
|
121 except IndexError: |
|
122 pass |
|
123 break |
|
124 if not emails: |
|
125 raise Unparseable |
|
126 if not reason: |
|
127 reason = sub |
|
128 if reason[:15] == 'returned mail: ': |
|
129 reason = reason[15:] |
|
130 for regexp in emparse_list_reason: |
|
131 if type(regexp) is type(''): |
|
132 for i in range(len(emails)-1,-1,-1): |
|
133 email = emails[i] |
|
134 exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) |
|
135 res = exp.search(data) |
|
136 if res is not None: |
|
137 errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) |
|
138 del emails[i] |
|
139 continue |
|
140 res = regexp.search(data) |
|
141 if res is not None: |
|
142 reason = res.group('reason') |
|
143 break |
|
144 for email in emails: |
|
145 errors.append(' '.join((email.strip()+': '+reason).split())) |
|
146 return errors |
|
147 |
|
148 EMPARSERS = [emparse_list, ] |
|
149 |
|
150 def sort_numeric(a, b): |
|
151 a = int(a) |
|
152 b = int(b) |
|
153 if a < b: return -1 |
|
154 elif a > b: return 1 |
|
155 else: return 0 |
|
156 |
|
157 def parsedir(dir, modify): |
|
158 os.chdir(dir) |
|
159 pat = re.compile('^[0-9]*$') |
|
160 errordict = {} |
|
161 errorfirst = {} |
|
162 errorlast = {} |
|
163 nok = nwarn = nbad = 0 |
|
164 |
|
165 # find all numeric file names and sort them |
|
166 files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')) |
|
167 files.sort(sort_numeric) |
|
168 |
|
169 for fn in files: |
|
170 # Lets try to parse the file. |
|
171 fp = open(fn) |
|
172 m = ErrorMessage(fp) |
|
173 sender = m.getaddr('From') |
|
174 print '%s\t%-40s\t'%(fn, sender[1]), |
|
175 |
|
176 if m.is_warning(): |
|
177 fp.close() |
|
178 print 'warning only' |
|
179 nwarn = nwarn + 1 |
|
180 if modify: |
|
181 os.rename(fn, ','+fn) |
|
182 ## os.unlink(fn) |
|
183 continue |
|
184 |
|
185 try: |
|
186 errors = m.get_errors() |
|
187 except Unparseable: |
|
188 print '** Not parseable' |
|
189 nbad = nbad + 1 |
|
190 fp.close() |
|
191 continue |
|
192 print len(errors), 'errors' |
|
193 |
|
194 # Remember them |
|
195 for e in errors: |
|
196 try: |
|
197 mm, dd = m.getdate('date')[1:1+2] |
|
198 date = '%s %02d' % (calendar.month_abbr[mm], dd) |
|
199 except: |
|
200 date = '??????' |
|
201 if not errordict.has_key(e): |
|
202 errordict[e] = 1 |
|
203 errorfirst[e] = '%s (%s)' % (fn, date) |
|
204 else: |
|
205 errordict[e] = errordict[e] + 1 |
|
206 errorlast[e] = '%s (%s)' % (fn, date) |
|
207 |
|
208 fp.close() |
|
209 nok = nok + 1 |
|
210 if modify: |
|
211 os.rename(fn, ','+fn) |
|
212 ## os.unlink(fn) |
|
213 |
|
214 print '--------------' |
|
215 print nok, 'files parsed,',nwarn,'files warning-only,', |
|
216 print nbad,'files unparseable' |
|
217 print '--------------' |
|
218 list = [] |
|
219 for e in errordict.keys(): |
|
220 list.append((errordict[e], errorfirst[e], errorlast[e], e)) |
|
221 list.sort() |
|
222 for num, first, last, e in list: |
|
223 print '%d %s - %s\t%s' % (num, first, last, e) |
|
224 |
|
225 def main(): |
|
226 modify = 0 |
|
227 if len(sys.argv) > 1 and sys.argv[1] == '-d': |
|
228 modify = 1 |
|
229 del sys.argv[1] |
|
230 if len(sys.argv) > 1: |
|
231 for folder in sys.argv[1:]: |
|
232 parsedir(folder, modify) |
|
233 else: |
|
234 parsedir('/ufs/jack/Mail/errorsinbox', modify) |
|
235 |
|
236 if __name__ == '__main__' or sys.argv[0] == __name__: |
|
237 main() |