Update docker config for crawler service and parser
This commit is contained in:
parent
df4f53dac4
commit
5882fe6ebb
1
.gitignore
vendored
1
.gitignore
vendored
@ -124,3 +124,4 @@ _testmain.go
|
|||||||
*.prof
|
*.prof
|
||||||
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
.env
|
||||||
|
3
crawler/Dockerfile
Normal file
3
crawler/Dockerfile
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
FROM python:3.6-onbuild
|
||||||
|
|
||||||
|
CMD python -m crawler.main
|
@ -1,26 +1,28 @@
|
|||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from ipdb import set_trace
|
|
||||||
import email
|
import email
|
||||||
|
import os
|
||||||
|
|
||||||
from imapclient import IMAPClient
|
from imapclient import IMAPClient
|
||||||
|
|
||||||
|
|
||||||
class MailCrawler(object):
|
VALID_CONTENT_TYPES = [ 'text/plain', 'text/html' ]
|
||||||
server_url = 'my.iamthefij.com'
|
|
||||||
valid_content_types = [ 'text/plain', 'text/html' ]
|
|
||||||
|
|
||||||
def get_credentials(self):
|
|
||||||
password = getpass('Password?')
|
class MailCrawler(object):
|
||||||
return ('iamthefij@iamthefij.com', password)
|
|
||||||
|
def __init__(self):
|
||||||
|
self.imap_url = os.environ['IMAP_URL']
|
||||||
|
self.imap_user = os.environ['IMAP_USER']
|
||||||
|
self.imap_pass = os.environ['IMAP_PASS']
|
||||||
|
|
||||||
def get_server(self):
|
def get_server(self):
|
||||||
server = IMAPClient(self.server_url, use_uid=True)
|
server = IMAPClient(self.imap_url, use_uid=True)
|
||||||
server.login(*self.get_credentials())
|
server.login(self.imap_user, self.imap_pass)
|
||||||
return server
|
return server
|
||||||
|
|
||||||
def is_valid_content_type(self, message):
|
def is_valid_content_type(self, message):
|
||||||
return message.get_content_type() in self.valid_content_types
|
return message.get_content_type() in VALID_CONTENT_TYPES
|
||||||
|
|
||||||
def get_email_text(self, message):
|
def get_email_text(self, message):
|
||||||
if not message.is_multipart():
|
if not message.is_multipart():
|
||||||
@ -31,7 +33,7 @@ class MailCrawler(object):
|
|||||||
payload.get_content_type(): self.get_email_text(payload)
|
payload.get_content_type(): self.get_email_text(payload)
|
||||||
for payload in message.get_payload()
|
for payload in message.get_payload()
|
||||||
}
|
}
|
||||||
for content_type in self.valid_content_types:
|
for content_type in VALID_CONTENT_TYPES:
|
||||||
text = content_type_to_payload.get(content_type)
|
text = content_type_to_payload.get(content_type)
|
||||||
if text:
|
if text:
|
||||||
return text
|
return text
|
||||||
@ -44,7 +46,6 @@ class MailCrawler(object):
|
|||||||
message_ids = server.search(['SINCE', date(2018, 1, 31)])
|
message_ids = server.search(['SINCE', date(2018, 1, 31)])
|
||||||
for msgid, data in server.fetch(message_ids, 'RFC822').items():
|
for msgid, data in server.fetch(message_ids, 'RFC822').items():
|
||||||
email_message = email.message_from_bytes(data[b'RFC822'])
|
email_message = email.message_from_bytes(data[b'RFC822'])
|
||||||
set_trace()
|
|
||||||
print(self.get_email_text(email_message))
|
print(self.get_email_text(email_message))
|
||||||
|
|
||||||
|
|
||||||
|
10
crawler/docker-compose.yml
Normal file
10
crawler/docker-compose.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
version: '2'
|
||||||
|
services:
|
||||||
|
main:
|
||||||
|
build: .
|
||||||
|
volumes:
|
||||||
|
- .:/usr/src/app
|
||||||
|
environment:
|
||||||
|
IMAP_URL: my.iamthefij.com
|
||||||
|
IMAP_USER: iamthefij@iamthefij.com
|
||||||
|
IMAP_PASS: "${IMAP_PASS}"
|
@ -1,5 +1,11 @@
|
|||||||
version: '2'
|
version: '2'
|
||||||
services:
|
services:
|
||||||
|
crawler:
|
||||||
|
build: ./crawler
|
||||||
|
environment:
|
||||||
|
IMAP_URL: my.iamthefij.com
|
||||||
|
IMAP_USER: iamthefij@iamthefij.com
|
||||||
|
IMAP_PASS: "${IMAP_PASS}"
|
||||||
parser_package_tracking:
|
parser_package_tracking:
|
||||||
build: ./parsers/package-tracking
|
build: ./parsers/package-tracking
|
||||||
ports:
|
ports:
|
||||||
|
@ -2,5 +2,7 @@ version: '2'
|
|||||||
services:
|
services:
|
||||||
main:
|
main:
|
||||||
build: .
|
build: .
|
||||||
|
volumes:
|
||||||
|
- .:/src
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:8183:3000"
|
- "127.0.0.1:8183:3000"
|
||||||
|
Loading…
Reference in New Issue
Block a user