ref: 5883986336a9854a564da025eccebe7ab0076a79
dir: /sys/src/cmd/python/Tools/scripts/mailerdaemon.py/
"""mailerdaemon - classes to parse mailer-daemon messages""" import rfc822 import calendar import re import os import sys Unparseable = 'mailerdaemon.Unparseable' class ErrorMessage(rfc822.Message): def __init__(self, fp): rfc822.Message.__init__(self, fp) self.sub = '' def is_warning(self): sub = self.getheader('Subject') if not sub: return 0 sub = sub.lower() if sub.startswith('waiting mail'): return 1 if 'warning' in sub: return 1 self.sub = sub return 0 def get_errors(self): for p in EMPARSERS: self.rewindbody() try: return p(self.fp, self.sub) except Unparseable: pass raise Unparseable # List of re's or tuples of re's. # If a re, it should contain at least a group (?P<email>...) which # should refer to the email address. The re can also contain a group # (?P<reason>...) which should refer to the reason (error message). # If no reason is present, the emparse_list_reason list is used to # find a reason. # If a tuple, the tuple should contain 2 re's. The first re finds a # location, the second re is repeated one or more times to find # multiple email addresses. The second re is matched (not searched) # where the previous match ended. # The re's are compiled using the re module. emparse_list_list = [ 'error: (?P<reason>unresolvable): (?P<email>.+)', ('----- The following addresses had permanent fatal errors -----\n', '(?P<email>[^ \n].*)\n( .*\n)?'), 'remote execution.*\n.*rmail (?P<email>.+)', ('The following recipients did not receive your message:\n\n', ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', '^<(?P<email>.*)>:\n(?P<reason>.*)', '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', '^Original-Recipient: rfc822;(?P<email>.*)', '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', ] # compile the re's in the list and store them in-place. for i in range(len(emparse_list_list)): x = emparse_list_list[i] if type(x) is type(''): x = re.compile(x, re.MULTILINE) else: xl = [] for x in x: xl.append(re.compile(x, re.MULTILINE)) x = tuple(xl) del xl emparse_list_list[i] = x del x del i # list of re's used to find reasons (error messages). # if a string, "<>" is replaced by a copy of the email address. # The expressions are searched for in order. After the first match, # no more expressions are searched for. So, order is important. emparse_list_reason = [ r'^5\d{2} <>\.\.\. (?P<reason>.*)', '<>\.\.\. (?P<reason>.*)', re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), ] emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) def emparse_list(fp, sub): data = fp.read() res = emparse_list_from.search(data) if res is None: from_index = len(data) else: from_index = res.start(0) errors = [] emails = [] reason = None for regexp in emparse_list_list: if type(regexp) is type(()): res = regexp[0].search(data, 0, from_index) if res is not None: try: reason = res.group('reason') except IndexError: pass while 1: res = regexp[1].match(data, res.end(0), from_index) if res is None: break emails.append(res.group('email')) break else: res = regexp.search(data, 0, from_index) if res is not None: emails.append(res.group('email')) try: reason = res.group('reason') except IndexError: pass break if not emails: raise Unparseable if not reason: reason = sub if reason[:15] == 'returned mail: ': reason = reason[15:] for regexp in emparse_list_reason: if type(regexp) is type(''): for i in range(len(emails)-1,-1,-1): email = emails[i] exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) res = exp.search(data) if res is not None: errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) del emails[i] continue res = regexp.search(data) if res is not None: reason = res.group('reason') break for email in emails: errors.append(' '.join((email.strip()+': '+reason).split())) return errors EMPARSERS = [emparse_list, ] def sort_numeric(a, b): a = int(a) b = int(b) if a < b: return -1 elif a > b: return 1 else: return 0 def parsedir(dir, modify): os.chdir(dir) pat = re.compile('^[0-9]*$') errordict = {} errorfirst = {} errorlast = {} nok = nwarn = nbad = 0 # find all numeric file names and sort them files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')) files.sort(sort_numeric) for fn in files: # Lets try to parse the file. fp = open(fn) m = ErrorMessage(fp) sender = m.getaddr('From') print '%s\t%-40s\t'%(fn, sender[1]), if m.is_warning(): fp.close() print 'warning only' nwarn = nwarn + 1 if modify: os.rename(fn, ','+fn) ## os.unlink(fn) continue try: errors = m.get_errors() except Unparseable: print '** Not parseable' nbad = nbad + 1 fp.close() continue print len(errors), 'errors' # Remember them for e in errors: try: mm, dd = m.getdate('date')[1:1+2] date = '%s %02d' % (calendar.month_abbr[mm], dd) except: date = '??????' if not errordict.has_key(e): errordict[e] = 1 errorfirst[e] = '%s (%s)' % (fn, date) else: errordict[e] = errordict[e] + 1 errorlast[e] = '%s (%s)' % (fn, date) fp.close() nok = nok + 1 if modify: os.rename(fn, ','+fn) ## os.unlink(fn) print '--------------' print nok, 'files parsed,',nwarn,'files warning-only,', print nbad,'files unparseable' print '--------------' list = [] for e in errordict.keys(): list.append((errordict[e], errorfirst[e], errorlast[e], e)) list.sort() for num, first, last, e in list: print '%d %s - %s\t%s' % (num, first, last, e) def main(): modify = 0 if len(sys.argv) > 1 and sys.argv[1] == '-d': modify = 1 del sys.argv[1] if len(sys.argv) > 1: for folder in sys.argv[1:]: parsedir(folder, modify) else: parsedir('/ufs/jack/Mail/errorsinbox', modify) if __name__ == '__main__' or sys.argv[0] == __name__: main()