diff --git a/.gitignore b/.gitignore index 850fa0b..eb0ed0b 100644 --- a/.gitignore +++ b/.gitignore @@ -124,3 +124,4 @@ _testmain.go *.prof .DS_Store +.env diff --git a/crawler/Dockerfile b/crawler/Dockerfile new file mode 100644 index 0000000..2d637ee --- /dev/null +++ b/crawler/Dockerfile @@ -0,0 +1,3 @@ +FROM python:3.6-onbuild + +CMD python -m crawler.main diff --git a/crawler/crawler/main.py b/crawler/crawler/main.py index 905c35b..db01597 100644 --- a/crawler/crawler/main.py +++ b/crawler/crawler/main.py @@ -1,26 +1,28 @@ from getpass import getpass from datetime import date -from ipdb import set_trace import email +import os from imapclient import IMAPClient -class MailCrawler(object): - server_url = 'my.iamthefij.com' - valid_content_types = [ 'text/plain', 'text/html' ] +VALID_CONTENT_TYPES = [ 'text/plain', 'text/html' ] - def get_credentials(self): - password = getpass('Password?') - return ('iamthefij@iamthefij.com', password) + +class MailCrawler(object): + + def __init__(self): + self.imap_url = os.environ['IMAP_URL'] + self.imap_user = os.environ['IMAP_USER'] + self.imap_pass = os.environ['IMAP_PASS'] def get_server(self): - server = IMAPClient(self.server_url, use_uid=True) - server.login(*self.get_credentials()) + server = IMAPClient(self.imap_url, use_uid=True) + server.login(self.imap_user, self.imap_pass) return server def is_valid_content_type(self, message): - return message.get_content_type() in self.valid_content_types + return message.get_content_type() in VALID_CONTENT_TYPES def get_email_text(self, message): if not message.is_multipart(): @@ -31,7 +33,7 @@ class MailCrawler(object): payload.get_content_type(): self.get_email_text(payload) for payload in message.get_payload() } - for content_type in self.valid_content_types: + for content_type in VALID_CONTENT_TYPES: text = content_type_to_payload.get(content_type) if text: return text @@ -44,7 +46,6 @@ class MailCrawler(object): message_ids = server.search(['SINCE', date(2018, 1, 31)]) for msgid, data in server.fetch(message_ids, 'RFC822').items(): email_message = email.message_from_bytes(data[b'RFC822']) - set_trace() print(self.get_email_text(email_message)) diff --git a/crawler/docker-compose.yml b/crawler/docker-compose.yml new file mode 100644 index 0000000..49d63a4 --- /dev/null +++ b/crawler/docker-compose.yml @@ -0,0 +1,10 @@ +version: '2' +services: + main: + build: . + volumes: + - .:/usr/src/app + environment: + IMAP_URL: my.iamthefij.com + IMAP_USER: iamthefij@iamthefij.com + IMAP_PASS: "${IMAP_PASS}" diff --git a/docker-compose.yml b/docker-compose.yml index 7bb5fbd..5df8e42 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,11 @@ version: '2' services: + crawler: + build: ./crawler + environment: + IMAP_URL: my.iamthefij.com + IMAP_USER: iamthefij@iamthefij.com + IMAP_PASS: "${IMAP_PASS}" parser_package_tracking: build: ./parsers/package-tracking ports: diff --git a/parsers/package-tracking/docker-compose.yml b/parsers/package-tracking/docker-compose.yml index f462a47..dbe797e 100644 --- a/parsers/package-tracking/docker-compose.yml +++ b/parsers/package-tracking/docker-compose.yml @@ -2,5 +2,7 @@ version: '2' services: main: build: . + volumes: + - .:/src ports: - "127.0.0.1:8183:3000"