Handle emails with no subject
This commit is contained in:
parent
9552f9788a
commit
b4c690aeb3
@ -1,11 +1,7 @@
|
|||||||
from datetime import date
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from getpass import getpass
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from imaplib import IMAP4
|
from imaplib import IMAP4
|
||||||
import email
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
@ -14,7 +10,12 @@ from imbox import Imbox
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
VALID_CONTENT_TYPES = [ 'text/plain', 'text/html' ]
|
VALID_CONTENT_TYPES = ['text/plain', 'text/html']
|
||||||
|
|
||||||
|
|
||||||
|
def get_message_subject(message):
|
||||||
|
"""Returns message subject or a placeholder text"""
|
||||||
|
return getattr(message, 'subject', 'NO SUBJECT')
|
||||||
|
|
||||||
|
|
||||||
class MailCrawler(object):
|
class MailCrawler(object):
|
||||||
@ -53,7 +54,7 @@ class MailCrawler(object):
|
|||||||
response = requests.post(
|
response = requests.post(
|
||||||
parser_host+'/parse',
|
parser_host+'/parse',
|
||||||
json={
|
json={
|
||||||
'subject': message.subject,
|
'subject': get_message_subject(message),
|
||||||
'message': text,
|
'message': text,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -97,18 +98,21 @@ class MailCrawler(object):
|
|||||||
print('Parsed result: ', result)
|
print('Parsed result: ', result)
|
||||||
print('Indexed result: ', self.index_token(result))
|
print('Indexed result: ', self.index_token(result))
|
||||||
|
|
||||||
|
|
||||||
def process_messages(self, server, since_date, last_message=0):
|
def process_messages(self, server, since_date, last_message=0):
|
||||||
for uid, message in server.messages(date__gt=since_date):
|
for uid, message in server.messages(date__gt=since_date):
|
||||||
uid = int(uid)
|
uid = int(uid)
|
||||||
if uid <= last_message:
|
if uid <= last_message:
|
||||||
print('DDB Already seen message with uid {}. Skipping'.format(uid))
|
print(
|
||||||
|
'DDB Already seen message with uid {}. Skipping'.format(uid)
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(
|
print(
|
||||||
'Processing message uid {} message_id {} '
|
'Processing message uid {} message_id {} '
|
||||||
'with subject "{}"'.format(
|
'with subject "{}"'.format(
|
||||||
uid, message.message_id, message.subject
|
uid,
|
||||||
|
message.message_id,
|
||||||
|
get_message_subject(message),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.process_message(message)
|
self.process_message(message)
|
||||||
@ -124,7 +128,6 @@ class MailCrawler(object):
|
|||||||
|
|
||||||
return since_date, last_message
|
return since_date, last_message
|
||||||
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
print('Starting crawler')
|
print('Starting crawler')
|
||||||
# TODO: Put server into some kind of context manager and property
|
# TODO: Put server into some kind of context manager and property
|
||||||
|
Loading…
Reference in New Issue
Block a user