#---------- error_page.py----------# import re, sys page = sys.stdin.read() # Mapping from patterns to probability contribution of pattern err_pats = {r'(?is).*?(404|403).*?ERROR.*?': 0.95, r'(?is).*?ERROR.*?(404|403).*?': 0.95, r'(?is)ERROR': 0.30, r'(?is).*?ERROR.*?': 0.10, r'(?is)': 0.80, r'(?is)': 0.80, r'(?is).*?File Not Found.*?': 0.80, r'(?is).*?Not Found.*?': 0.40, r'(?is)': 0.10, r'(?is)

.*?(404|403).*?

': 0.15, r'(?is)': 0.10, r'(?is)

.*?not found.*?

': 0.15, r'(?is)': 0.10, r'(?is)': 0.10, r'(?is)': 0.10, r'(?is)': 0.10, r'(?i)does not exist': 0.10, } err_score = 0 for pat, prob in err_pats.items(): if err_score > 0.9: break if re.search(pat, page): # print pat, prob err_score += prob if err_score > 0.90: print 'Page is almost surely an error report' elif err_score > 0.75: print 'It is highly likely page is an error report' elif err_score > 0.50: print 'Better-than-even odds page is error report' elif err_score > 0.25: print 'Fair indication page is an error report' else: print 'Page is probably real content'