Crawler now reads a few emails and prints the body
This commit is contained in:
parent
4d954a879b
commit
df4f53dac4
1
.gitignore
vendored
1
.gitignore
vendored
@ -9,6 +9,7 @@ __pycache__/
|
|||||||
|
|
||||||
# Distribution / packaging
|
# Distribution / packaging
|
||||||
.Python
|
.Python
|
||||||
|
virtualenv_run/
|
||||||
env/
|
env/
|
||||||
build/
|
build/
|
||||||
develop-eggs/
|
develop-eggs/
|
||||||
|
0
crawler/crawler/__init__.py
Normal file
0
crawler/crawler/__init__.py
Normal file
52
crawler/crawler/main.py
Normal file
52
crawler/crawler/main.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from getpass import getpass
|
||||||
|
from datetime import date
|
||||||
|
from ipdb import set_trace
|
||||||
|
import email
|
||||||
|
|
||||||
|
from imapclient import IMAPClient
|
||||||
|
|
||||||
|
|
||||||
|
class MailCrawler(object):
|
||||||
|
server_url = 'my.iamthefij.com'
|
||||||
|
valid_content_types = [ 'text/plain', 'text/html' ]
|
||||||
|
|
||||||
|
def get_credentials(self):
|
||||||
|
password = getpass('Password?')
|
||||||
|
return ('iamthefij@iamthefij.com', password)
|
||||||
|
|
||||||
|
def get_server(self):
|
||||||
|
server = IMAPClient(self.server_url, use_uid=True)
|
||||||
|
server.login(*self.get_credentials())
|
||||||
|
return server
|
||||||
|
|
||||||
|
def is_valid_content_type(self, message):
|
||||||
|
return message.get_content_type() in self.valid_content_types
|
||||||
|
|
||||||
|
def get_email_text(self, message):
|
||||||
|
if not message.is_multipart():
|
||||||
|
if self.is_valid_content_type(message):
|
||||||
|
return message.get_payload(decode=True)
|
||||||
|
else:
|
||||||
|
content_type_to_payload = {
|
||||||
|
payload.get_content_type(): self.get_email_text(payload)
|
||||||
|
for payload in message.get_payload()
|
||||||
|
}
|
||||||
|
for content_type in self.valid_content_types:
|
||||||
|
text = content_type_to_payload.get(content_type)
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
server = self.get_server()
|
||||||
|
server.select_folder('INBOX')
|
||||||
|
message_ids = server.search(['SINCE', date(2018, 1, 31)])
|
||||||
|
for msgid, data in server.fetch(message_ids, 'RFC822').items():
|
||||||
|
email_message = email.message_from_bytes(data[b'RFC822'])
|
||||||
|
set_trace()
|
||||||
|
print(self.get_email_text(email_message))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
MailCrawler().run()
|
2
crawler/requirements-dev.txt
Normal file
2
crawler/requirements-dev.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
ipdb
|
||||||
|
ipython
|
1
crawler/requirements.txt
Normal file
1
crawler/requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
imapclient
|
Loading…
Reference in New Issue
Block a user