Run linters

This commit is contained in:
IamTheFij 2019-11-11 16:26:25 -08:00
parent aafb524673
commit 6dc99be7e4
6 changed files with 103 additions and 104 deletions

1
.gitignore vendored
View File

@ -126,3 +126,4 @@ _testmain.go
.DS_Store .DS_Store
.env .env
docker-compose-prod.yml docker-compose-prod.yml
.mypy_cache

View File

@ -13,36 +13,34 @@ from imbox import Imbox
logging.basicConfig( logging.basicConfig(
level=logging.WARNING, level=logging.WARNING, format="%(asctime)s %(levelname)s %(name)s %(message)s"
format='%(asctime)s %(levelname)s %(name)s %(message)s'
) )
logging.getLogger(__name__).addHandler(logging.NullHandler()) logging.getLogger(__name__).addHandler(logging.NullHandler())
VALID_CONTENT_TYPES = ['text/plain', 'text/html'] VALID_CONTENT_TYPES = ["text/plain", "text/html"]
def get_message_subject(message): def get_message_subject(message):
"""Returns message subject or a placeholder text""" """Returns message subject or a placeholder text"""
return getattr(message, 'subject', 'NO SUBJECT') return getattr(message, "subject", "NO SUBJECT")
class MailCrawler(object): class MailCrawler(object):
def __init__(self): def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__) self._logger = logging.getLogger(self.__class__.__name__)
self.imap_url = os.environ['IMAP_URL'] self.imap_url = os.environ["IMAP_URL"]
self.imap_user = os.environ['IMAP_USER'] self.imap_user = os.environ["IMAP_USER"]
self.imap_pass = os.environ['IMAP_PASS'] self.imap_pass = os.environ["IMAP_PASS"]
self.parser_hosts = None self.parser_hosts = None
self.indexer_host = os.environ.get('INDEXER') self.indexer_host = os.environ.get("INDEXER")
self.debug_mode = os.environ.get('DEBUG', False) self.debug_mode = os.environ.get("DEBUG", False)
def get_parsers(self): def get_parsers(self):
"""Retrieves a list of parser hosts""" """Retrieves a list of parser hosts"""
if self.parser_hosts is None: if self.parser_hosts is None:
self.parser_hosts = [] self.parser_hosts = []
parser_format = 'PARSER_{}' parser_format = "PARSER_{}"
parser_index = 1 parser_index = 1
parser_host = os.environ.get(parser_format.format(parser_index)) parser_host = os.environ.get(parser_format.format(parser_index))
while parser_host is not None: while parser_host is not None:
@ -56,21 +54,21 @@ class MailCrawler(object):
"""Parses tokens from an email message""" """Parses tokens from an email message"""
text = self.get_email_text(message) text = self.get_email_text(message)
if not text: if not text:
print('No email text returned') print("No email text returned")
return [] return []
results = [] results = []
for parser_host in self.get_parsers(): for parser_host in self.get_parsers():
# print('Parsing email text... ', text) # print('Parsing email text... ', text)
response = requests.post( response = requests.post(
parser_host+'/parse', parser_host + "/parse",
json={ json={
'subject': get_message_subject(message), "subject": get_message_subject(message),
'message': text, "message": text,
}, },
) )
response.raise_for_status() response.raise_for_status()
print('Got response', response.text) print("Got response", response.text)
results += response.json() results += response.json()
return results return results
@ -85,18 +83,18 @@ class MailCrawler(object):
def get_email_text(self, message): def get_email_text(self, message):
"""Retrieves the text body of an email message""" """Retrieves the text body of an email message"""
body = message.body.get('plain') or message.body.get('html') body = message.body.get("plain") or message.body.get("html")
if not body: if not body:
return None return None
# Concat all known body content together since it doesn't really matter # Concat all known body content together since it doesn't really matter
return ''.join([text for text in body if isinstance(text, str)]) return "".join([text for text in body if isinstance(text, str)])
def index_token(self, message): def index_token(self, message):
"""Sends a token from the parser to the indexer""" """Sends a token from the parser to the indexer"""
if self.indexer_host is None and self.debug_mode: if self.indexer_host is None and self.debug_mode:
print("DDB No indexer host, but OK for debugging") print("DDB No indexer host, but OK for debugging")
response = requests.post( response = requests.post(
self.indexer_host+'/token', self.indexer_host + "/token",
json=message, json=message,
) )
response.raise_for_status() response.raise_for_status()
@ -105,9 +103,11 @@ class MailCrawler(object):
def process_message(self, message): def process_message(self, message):
"""Process a single email message""" """Process a single email message"""
for result in self.parse_message(message): for result in self.parse_message(message):
result.update({ result.update(
"subject": message.subject, {
}) "subject": message.subject,
}
)
print("Parsed result: ", result) print("Parsed result: ", result)
print("Indexed result: ", self.index_token(result)) print("Indexed result: ", self.index_token(result))
@ -138,14 +138,13 @@ class MailCrawler(object):
message_date = parser.parse(message.date) message_date = parser.parse(message.date)
self._logger.debug( self._logger.debug(
"DDB Processed message. Message date: %s Old date: %s", "DDB Processed message. Message date: %s Old date: %s",
message_date, since_date message_date,
since_date,
) )
try: try:
since_date = max(since_date, message_date) since_date = max(since_date, message_date)
except TypeError: except TypeError:
self._logger.error( self._logger.error("Error comparing dates. We'll just use the last one")
"Error comparing dates. We'll just use the last one"
)
self._logger.debug("DDB Since date is now %s", since_date) self._logger.debug("DDB Since date is now %s", since_date)
last_uid = max(uid, last_uid) last_uid = max(uid, last_uid)
@ -155,16 +154,22 @@ class MailCrawler(object):
"""Parses command line arguments and returns them""" """Parses command line arguments and returns them"""
parser = ArgumentParser(description="Inbox crawler") parser = ArgumentParser(description="Inbox crawler")
parser.add_argument( parser.add_argument(
"--sleep", "-s", "--sleep",
default=10*60, "-s",
help=("Number of seconds to wait between polling IMAP server." default=10 * 60,
"Default 10 min"), help=(
"Number of seconds to wait between polling IMAP server."
"Default 10 min"
),
) )
parser.add_argument( parser.add_argument(
"--verbosity", "-v", "--verbosity",
"-v",
action="count", action="count",
help=("Adjust log verbosity by increasing arg count. Default log", help=(
"level is ERROR. Level increases with each `v`"), "Adjust log verbosity by increasing arg count. Default log",
"level is ERROR. Level increases with each `v`",
),
) )
return parser.parse_args(args) return parser.parse_args(args)
@ -186,7 +191,7 @@ class MailCrawler(object):
if args.verbosity: if args.verbosity:
self._set_log_level(args.verbosity) self._set_log_level(args.verbosity)
self._logger.info('Starting crawler') self._logger.info("Starting crawler")
with self.get_server() as server: with self.get_server() as server:
# TODO: parameterize startup date, maybe relative # TODO: parameterize startup date, maybe relative
since_date = datetime.now(tzutc()) - timedelta(days=16) since_date = datetime.now(tzutc()) - timedelta(days=16)
@ -194,9 +199,7 @@ class MailCrawler(object):
while True: while True:
print("Processing messages") print("Processing messages")
since_date, last_uid = self.process_messages( since_date, last_uid = self.process_messages(
server, server, since_date, last_uid=last_uid
since_date,
last_uid=last_uid
) )
self._logger.info( self._logger.info(
"DDB Processed all. New since_date %s", "DDB Processed all. New since_date %s",
@ -205,10 +208,10 @@ class MailCrawler(object):
sleep(args.sleep) sleep(args.sleep)
if __name__ == '__main__': if __name__ == "__main__":
while True: while True:
try: try:
MailCrawler().run() MailCrawler().run()
except IMAP4.abort: except IMAP4.abort:
print('Imap abort. We will try to reconnect') print("Imap abort. We will try to reconnect")
pass pass

View File

@ -1,3 +1,3 @@
imbox
python-dateutil python-dateutil
requests requests
git+https://github.com/martinrusev/imbox@fd68b35e22686f43cdb7e3df344efc9b3a26b1e6

View File

@ -2,24 +2,25 @@ import json
import os import os
import sys import sys
import flask
from flask import jsonify from flask import jsonify
from flask import request from flask import request
from flask.ext.sqlalchemy import SQLAlchemy from flask.ext.sqlalchemy import SQLAlchemy
import flask
app = flask.Flask(__name__) app = flask.Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get( app.config["SQLALCHEMY_DATABASE_URI"] = os.environ.get(
'SQLALCHEMY_DATABASE_URI', "SQLALCHEMY_DATABASE_URI", "sqlite:///../tokens.db"
'sqlite:///../tokens.db'
) )
app.config['SQLALCHEMY_ECHO'] = True app.config["SQLALCHEMY_ECHO"] = True
app.config['DEBUG'] = True app.config["DEBUG"] = True
db = SQLAlchemy(app) db = SQLAlchemy(app)
class EmailToken(db.Model): class EmailToken(db.Model):
"""Model to store the indexed tokens""" """Model to store the indexed tokens"""
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
subject = db.Column(db.String(1024)) subject = db.Column(db.String(1024))
token = db.Column(db.String(1024)) token = db.Column(db.String(1024))
@ -34,47 +35,47 @@ class EmailToken(db.Model):
def as_dict(self): def as_dict(self):
return { return {
'id': self.id, "id": self.id,
'subject': self.subject, "subject": self.subject,
'token': self.token, "token": self.token,
'type': self.token_type, "type": self.token_type,
'metadata': self.get_token_metadata(), "metadata": self.get_token_metadata(),
'disabled': self.disabled, "disabled": self.disabled,
} }
@classmethod @classmethod
def from_json(cls, data): def from_json(cls, data):
metadata = data.get('metadata') metadata = data.get("metadata")
try: try:
metadata = json.dumps(metadata) metadata = json.dumps(metadata)
except TypeError as err: except TypeError as err:
print('Error dumping metadata', err, file=sys.stderr) print("Error dumping metadata", err, file=sys.stderr)
return cls( return cls(
subject=data.get('subject'), subject=data.get("subject"),
token=data.get('token'), token=data.get("token"),
token_type=data.get('type'), token_type=data.get("type"),
token_metadata=metadata, token_metadata=metadata,
disabled=data.get('disabled', False), disabled=data.get("disabled", False),
) )
@classmethod @classmethod
def jsonify_all(cls, token_type=None, desc=False): def jsonify_all(cls, token_type=None, desc=False):
query = cls.query query = cls.query
if token_type: if token_type:
print('Filtering query by token type', file=sys.stderr) print("Filtering query by token type", file=sys.stderr)
query = query.filter_by(token_type=token_type) query = query.filter_by(token_type=token_type)
if desc: if desc:
query = query.order_by(cls.id.desc()) query = query.order_by(cls.id.desc())
return jsonify(tokens=[token.as_dict() for token in query.all()]) return jsonify(tokens=[token.as_dict() for token in query.all()])
@app.route('/') @app.route("/")
def check(): def check():
return 'OK' return "OK"
@app.route('/token', methods=['POST']) @app.route("/token", methods=["POST"])
def create_tokens(): def create_tokens():
"""Creates a token from posted JSON request""" """Creates a token from posted JSON request"""
new_token = EmailToken.from_json(request.get_json(force=True)) new_token = EmailToken.from_json(request.get_json(force=True))
@ -85,48 +86,41 @@ def create_tokens():
).first() ).first()
print( print(
'Received token with value {} and type {}'.format( "Received token with value {} and type {}".format(
new_token.token, new_token.token_type new_token.token, new_token.token_type
), file=sys.stderr ),
file=sys.stderr,
) )
print('Existing token? ', existing_token, file=sys.stderr) print("Existing token? ", existing_token, file=sys.stderr)
if not existing_token: if not existing_token:
print('No existing token, creating a new one', file=sys.stderr) print("No existing token, creating a new one", file=sys.stderr)
db.session.add(new_token) db.session.add(new_token)
db.session.commit() db.session.commit()
db.session.refresh(new_token) db.session.refresh(new_token)
return jsonify( return jsonify(success=True, created=True, record=new_token.as_dict())
success=True,
created=True,
record=new_token.as_dict()
)
else: else:
print('Found an existing token', file=sys.stderr) print("Found an existing token", file=sys.stderr)
return jsonify( return jsonify(success=True, created=False, record=existing_token.as_dict())
success=True,
created=False,
record=existing_token.as_dict()
)
@app.route('/token', methods=['GET']) @app.route("/token", methods=["GET"])
def list_all_tokens(): def list_all_tokens():
"""Lists all tokens with an optional type filter""" """Lists all tokens with an optional type filter"""
token_type = request.args.get('filter_type') token_type = request.args.get("filter_type")
desc = request.args.get('desc', False) desc = request.args.get("desc", False)
print('Asked to filter by ', token_type, file=sys.stderr) print("Asked to filter by ", token_type, file=sys.stderr)
return EmailToken.jsonify_all(token_type=token_type, desc=desc) return EmailToken.jsonify_all(token_type=token_type, desc=desc)
@app.route('/token/<int:token_id>', methods=['GET']) @app.route("/token/<int:token_id>", methods=["GET"])
def get_token(token_id): def get_token(token_id):
"""Gets a token by its primary key id""" """Gets a token by its primary key id"""
token = EmailToken.query.get(token_id) token = EmailToken.query.get(token_id)
return jsonify(token.as_dict()) return jsonify(token.as_dict())
if __name__ == '__main__': if __name__ == "__main__":
db.create_all() db.create_all()
app.run(host='0.0.0.0', port=5000) app.run(host="0.0.0.0", port=5000)

View File

@ -1,3 +1,3 @@
flask==0.12.2 flask==0.12.2
sqlalchemy==1.2.2
flask-sqlalchemy==2.3.2 flask-sqlalchemy==2.3.2
sqlalchemy==1.2.2

View File

@ -6,45 +6,46 @@ import requests
app = flask.Flask(__name__) app = flask.Flask(__name__)
app.config['DEBUG'] = True app.config["DEBUG"] = True
indexer_url = os.environ.get('INDEXER_URL', 'http://indexer') indexer_url = os.environ.get("INDEXER_URL", "http://indexer")
@app.route('/healthcheck') @app.route("/healthcheck")
def healthcheck(): def healthcheck():
return 'OK' return "OK"
@app.route('/')
@app.route("/")
def home(): def home():
return flask.render_template('home.html') return flask.render_template("home.html")
@app.route('/shipping') @app.route("/shipping")
def get_tokens(): def get_tokens():
resp = requests.get( resp = requests.get(
indexer_url+'/token', indexer_url + "/token",
params={ params={
'filter_type': 'SHIPPING', "filter_type": "SHIPPING",
'desc': True, "desc": True,
}, },
) )
resp.raise_for_status() resp.raise_for_status()
tokens = resp.json().get('tokens') tokens = resp.json().get("tokens")
for token in tokens: for token in tokens:
try: try:
resp = requests.get( resp = requests.get(
'http://viewer_package_tracking:3000/info/'+token['token'] "http://viewer_package_tracking:3000/info/" + token["token"]
) )
resp.raise_for_status() resp.raise_for_status()
print('Response: ', resp.text, file=sys.stderr) print("Response: ", resp.text, file=sys.stderr)
info = resp.json() info = resp.json()
token['metadata'].update(info) token["metadata"].update(info)
except Exception as e: except Exception as e:
print('Error', e, file=sys.stderr) print("Error", e, file=sys.stderr)
pass pass
return flask.render_template('shipping.html', trackers=tokens) return flask.render_template("shipping.html", trackers=tokens)
if __name__ == '__main__': if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000) app.run(host="0.0.0.0", port=5000)