Handle emails with no subject

This commit is contained in:
IamTheFij 2019-11-11 23:53:18 +00:00
parent 9552f9788a
commit b4c690aeb3

View File

@ -1,11 +1,7 @@
from datetime import date
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
from getpass import getpass
from time import sleep from time import sleep
from imaplib import IMAP4 from imaplib import IMAP4
import email
import json
import os import os
from dateutil import parser from dateutil import parser
@ -14,7 +10,12 @@ from imbox import Imbox
import requests import requests
VALID_CONTENT_TYPES = [ 'text/plain', 'text/html' ] VALID_CONTENT_TYPES = ['text/plain', 'text/html']
def get_message_subject(message):
"""Returns message subject or a placeholder text"""
return getattr(message, 'subject', 'NO SUBJECT')
class MailCrawler(object): class MailCrawler(object):
@ -53,7 +54,7 @@ class MailCrawler(object):
response = requests.post( response = requests.post(
parser_host+'/parse', parser_host+'/parse',
json={ json={
'subject': message.subject, 'subject': get_message_subject(message),
'message': text, 'message': text,
}, },
) )
@ -97,18 +98,21 @@ class MailCrawler(object):
print('Parsed result: ', result) print('Parsed result: ', result)
print('Indexed result: ', self.index_token(result)) print('Indexed result: ', self.index_token(result))
def process_messages(self, server, since_date, last_message=0): def process_messages(self, server, since_date, last_message=0):
for uid, message in server.messages(date__gt=since_date): for uid, message in server.messages(date__gt=since_date):
uid = int(uid) uid = int(uid)
if uid <= last_message: if uid <= last_message:
print('DDB Already seen message with uid {}. Skipping'.format(uid)) print(
'DDB Already seen message with uid {}. Skipping'.format(uid)
)
continue continue
print( print(
'Processing message uid {} message_id {} ' 'Processing message uid {} message_id {} '
'with subject "{}"'.format( 'with subject "{}"'.format(
uid, message.message_id, message.subject uid,
message.message_id,
get_message_subject(message),
) )
) )
self.process_message(message) self.process_message(message)
@ -124,7 +128,6 @@ class MailCrawler(object):
return since_date, last_message return since_date, last_message
def run(self): def run(self):
print('Starting crawler') print('Starting crawler')
# TODO: Put server into some kind of context manager and property # TODO: Put server into some kind of context manager and property