Run linters
This commit is contained in:
parent
aafb524673
commit
6dc99be7e4
1
.gitignore
vendored
1
.gitignore
vendored
@ -126,3 +126,4 @@ _testmain.go
|
||||
.DS_Store
|
||||
.env
|
||||
docker-compose-prod.yml
|
||||
.mypy_cache
|
||||
|
@ -13,36 +13,34 @@ from imbox import Imbox
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.WARNING,
|
||||
format='%(asctime)s %(levelname)s %(name)s %(message)s'
|
||||
level=logging.WARNING, format="%(asctime)s %(levelname)s %(name)s %(message)s"
|
||||
)
|
||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
VALID_CONTENT_TYPES = ['text/plain', 'text/html']
|
||||
VALID_CONTENT_TYPES = ["text/plain", "text/html"]
|
||||
|
||||
|
||||
def get_message_subject(message):
|
||||
"""Returns message subject or a placeholder text"""
|
||||
return getattr(message, 'subject', 'NO SUBJECT')
|
||||
return getattr(message, "subject", "NO SUBJECT")
|
||||
|
||||
|
||||
class MailCrawler(object):
|
||||
|
||||
def __init__(self):
|
||||
self._logger = logging.getLogger(self.__class__.__name__)
|
||||
self.imap_url = os.environ['IMAP_URL']
|
||||
self.imap_user = os.environ['IMAP_USER']
|
||||
self.imap_pass = os.environ['IMAP_PASS']
|
||||
self.imap_url = os.environ["IMAP_URL"]
|
||||
self.imap_user = os.environ["IMAP_USER"]
|
||||
self.imap_pass = os.environ["IMAP_PASS"]
|
||||
self.parser_hosts = None
|
||||
self.indexer_host = os.environ.get('INDEXER')
|
||||
self.debug_mode = os.environ.get('DEBUG', False)
|
||||
self.indexer_host = os.environ.get("INDEXER")
|
||||
self.debug_mode = os.environ.get("DEBUG", False)
|
||||
|
||||
def get_parsers(self):
|
||||
"""Retrieves a list of parser hosts"""
|
||||
if self.parser_hosts is None:
|
||||
self.parser_hosts = []
|
||||
parser_format = 'PARSER_{}'
|
||||
parser_format = "PARSER_{}"
|
||||
parser_index = 1
|
||||
parser_host = os.environ.get(parser_format.format(parser_index))
|
||||
while parser_host is not None:
|
||||
@ -56,21 +54,21 @@ class MailCrawler(object):
|
||||
"""Parses tokens from an email message"""
|
||||
text = self.get_email_text(message)
|
||||
if not text:
|
||||
print('No email text returned')
|
||||
print("No email text returned")
|
||||
return []
|
||||
|
||||
results = []
|
||||
for parser_host in self.get_parsers():
|
||||
# print('Parsing email text... ', text)
|
||||
response = requests.post(
|
||||
parser_host+'/parse',
|
||||
parser_host + "/parse",
|
||||
json={
|
||||
'subject': get_message_subject(message),
|
||||
'message': text,
|
||||
"subject": get_message_subject(message),
|
||||
"message": text,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
print('Got response', response.text)
|
||||
print("Got response", response.text)
|
||||
results += response.json()
|
||||
return results
|
||||
|
||||
@ -85,18 +83,18 @@ class MailCrawler(object):
|
||||
|
||||
def get_email_text(self, message):
|
||||
"""Retrieves the text body of an email message"""
|
||||
body = message.body.get('plain') or message.body.get('html')
|
||||
body = message.body.get("plain") or message.body.get("html")
|
||||
if not body:
|
||||
return None
|
||||
# Concat all known body content together since it doesn't really matter
|
||||
return ''.join([text for text in body if isinstance(text, str)])
|
||||
return "".join([text for text in body if isinstance(text, str)])
|
||||
|
||||
def index_token(self, message):
|
||||
"""Sends a token from the parser to the indexer"""
|
||||
if self.indexer_host is None and self.debug_mode:
|
||||
print("DDB No indexer host, but OK for debugging")
|
||||
response = requests.post(
|
||||
self.indexer_host+'/token',
|
||||
self.indexer_host + "/token",
|
||||
json=message,
|
||||
)
|
||||
response.raise_for_status()
|
||||
@ -105,9 +103,11 @@ class MailCrawler(object):
|
||||
def process_message(self, message):
|
||||
"""Process a single email message"""
|
||||
for result in self.parse_message(message):
|
||||
result.update({
|
||||
result.update(
|
||||
{
|
||||
"subject": message.subject,
|
||||
})
|
||||
}
|
||||
)
|
||||
print("Parsed result: ", result)
|
||||
print("Indexed result: ", self.index_token(result))
|
||||
|
||||
@ -138,14 +138,13 @@ class MailCrawler(object):
|
||||
message_date = parser.parse(message.date)
|
||||
self._logger.debug(
|
||||
"DDB Processed message. Message date: %s Old date: %s",
|
||||
message_date, since_date
|
||||
message_date,
|
||||
since_date,
|
||||
)
|
||||
try:
|
||||
since_date = max(since_date, message_date)
|
||||
except TypeError:
|
||||
self._logger.error(
|
||||
"Error comparing dates. We'll just use the last one"
|
||||
)
|
||||
self._logger.error("Error comparing dates. We'll just use the last one")
|
||||
self._logger.debug("DDB Since date is now %s", since_date)
|
||||
last_uid = max(uid, last_uid)
|
||||
|
||||
@ -155,16 +154,22 @@ class MailCrawler(object):
|
||||
"""Parses command line arguments and returns them"""
|
||||
parser = ArgumentParser(description="Inbox crawler")
|
||||
parser.add_argument(
|
||||
"--sleep", "-s",
|
||||
"--sleep",
|
||||
"-s",
|
||||
default=10 * 60,
|
||||
help=("Number of seconds to wait between polling IMAP server."
|
||||
"Default 10 min"),
|
||||
help=(
|
||||
"Number of seconds to wait between polling IMAP server."
|
||||
"Default 10 min"
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbosity", "-v",
|
||||
"--verbosity",
|
||||
"-v",
|
||||
action="count",
|
||||
help=("Adjust log verbosity by increasing arg count. Default log",
|
||||
"level is ERROR. Level increases with each `v`"),
|
||||
help=(
|
||||
"Adjust log verbosity by increasing arg count. Default log",
|
||||
"level is ERROR. Level increases with each `v`",
|
||||
),
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
@ -186,7 +191,7 @@ class MailCrawler(object):
|
||||
if args.verbosity:
|
||||
self._set_log_level(args.verbosity)
|
||||
|
||||
self._logger.info('Starting crawler')
|
||||
self._logger.info("Starting crawler")
|
||||
with self.get_server() as server:
|
||||
# TODO: parameterize startup date, maybe relative
|
||||
since_date = datetime.now(tzutc()) - timedelta(days=16)
|
||||
@ -194,9 +199,7 @@ class MailCrawler(object):
|
||||
while True:
|
||||
print("Processing messages")
|
||||
since_date, last_uid = self.process_messages(
|
||||
server,
|
||||
since_date,
|
||||
last_uid=last_uid
|
||||
server, since_date, last_uid=last_uid
|
||||
)
|
||||
self._logger.info(
|
||||
"DDB Processed all. New since_date %s",
|
||||
@ -205,10 +208,10 @@ class MailCrawler(object):
|
||||
sleep(args.sleep)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
while True:
|
||||
try:
|
||||
MailCrawler().run()
|
||||
except IMAP4.abort:
|
||||
print('Imap abort. We will try to reconnect')
|
||||
print("Imap abort. We will try to reconnect")
|
||||
pass
|
||||
|
@ -1,3 +1,3 @@
|
||||
imbox
|
||||
python-dateutil
|
||||
requests
|
||||
git+https://github.com/martinrusev/imbox@fd68b35e22686f43cdb7e3df344efc9b3a26b1e6
|
||||
|
@ -2,24 +2,25 @@ import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import flask
|
||||
from flask import jsonify
|
||||
from flask import request
|
||||
from flask.ext.sqlalchemy import SQLAlchemy
|
||||
import flask
|
||||
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get(
|
||||
'SQLALCHEMY_DATABASE_URI',
|
||||
'sqlite:///../tokens.db'
|
||||
app.config["SQLALCHEMY_DATABASE_URI"] = os.environ.get(
|
||||
"SQLALCHEMY_DATABASE_URI", "sqlite:///../tokens.db"
|
||||
)
|
||||
app.config['SQLALCHEMY_ECHO'] = True
|
||||
app.config['DEBUG'] = True
|
||||
app.config["SQLALCHEMY_ECHO"] = True
|
||||
app.config["DEBUG"] = True
|
||||
|
||||
db = SQLAlchemy(app)
|
||||
|
||||
|
||||
class EmailToken(db.Model):
|
||||
"""Model to store the indexed tokens"""
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
subject = db.Column(db.String(1024))
|
||||
token = db.Column(db.String(1024))
|
||||
@ -34,47 +35,47 @@ class EmailToken(db.Model):
|
||||
|
||||
def as_dict(self):
|
||||
return {
|
||||
'id': self.id,
|
||||
'subject': self.subject,
|
||||
'token': self.token,
|
||||
'type': self.token_type,
|
||||
'metadata': self.get_token_metadata(),
|
||||
'disabled': self.disabled,
|
||||
"id": self.id,
|
||||
"subject": self.subject,
|
||||
"token": self.token,
|
||||
"type": self.token_type,
|
||||
"metadata": self.get_token_metadata(),
|
||||
"disabled": self.disabled,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data):
|
||||
metadata = data.get('metadata')
|
||||
metadata = data.get("metadata")
|
||||
try:
|
||||
metadata = json.dumps(metadata)
|
||||
except TypeError as err:
|
||||
print('Error dumping metadata', err, file=sys.stderr)
|
||||
print("Error dumping metadata", err, file=sys.stderr)
|
||||
|
||||
return cls(
|
||||
subject=data.get('subject'),
|
||||
token=data.get('token'),
|
||||
token_type=data.get('type'),
|
||||
subject=data.get("subject"),
|
||||
token=data.get("token"),
|
||||
token_type=data.get("type"),
|
||||
token_metadata=metadata,
|
||||
disabled=data.get('disabled', False),
|
||||
disabled=data.get("disabled", False),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def jsonify_all(cls, token_type=None, desc=False):
|
||||
query = cls.query
|
||||
if token_type:
|
||||
print('Filtering query by token type', file=sys.stderr)
|
||||
print("Filtering query by token type", file=sys.stderr)
|
||||
query = query.filter_by(token_type=token_type)
|
||||
if desc:
|
||||
query = query.order_by(cls.id.desc())
|
||||
return jsonify(tokens=[token.as_dict() for token in query.all()])
|
||||
|
||||
|
||||
@app.route('/')
|
||||
@app.route("/")
|
||||
def check():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
||||
|
||||
@app.route('/token', methods=['POST'])
|
||||
@app.route("/token", methods=["POST"])
|
||||
def create_tokens():
|
||||
"""Creates a token from posted JSON request"""
|
||||
new_token = EmailToken.from_json(request.get_json(force=True))
|
||||
@ -85,48 +86,41 @@ def create_tokens():
|
||||
).first()
|
||||
|
||||
print(
|
||||
'Received token with value {} and type {}'.format(
|
||||
"Received token with value {} and type {}".format(
|
||||
new_token.token, new_token.token_type
|
||||
), file=sys.stderr
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
print('Existing token? ', existing_token, file=sys.stderr)
|
||||
print("Existing token? ", existing_token, file=sys.stderr)
|
||||
|
||||
if not existing_token:
|
||||
print('No existing token, creating a new one', file=sys.stderr)
|
||||
print("No existing token, creating a new one", file=sys.stderr)
|
||||
db.session.add(new_token)
|
||||
db.session.commit()
|
||||
db.session.refresh(new_token)
|
||||
return jsonify(
|
||||
success=True,
|
||||
created=True,
|
||||
record=new_token.as_dict()
|
||||
)
|
||||
return jsonify(success=True, created=True, record=new_token.as_dict())
|
||||
else:
|
||||
print('Found an existing token', file=sys.stderr)
|
||||
return jsonify(
|
||||
success=True,
|
||||
created=False,
|
||||
record=existing_token.as_dict()
|
||||
)
|
||||
print("Found an existing token", file=sys.stderr)
|
||||
return jsonify(success=True, created=False, record=existing_token.as_dict())
|
||||
|
||||
|
||||
@app.route('/token', methods=['GET'])
|
||||
@app.route("/token", methods=["GET"])
|
||||
def list_all_tokens():
|
||||
"""Lists all tokens with an optional type filter"""
|
||||
token_type = request.args.get('filter_type')
|
||||
desc = request.args.get('desc', False)
|
||||
print('Asked to filter by ', token_type, file=sys.stderr)
|
||||
token_type = request.args.get("filter_type")
|
||||
desc = request.args.get("desc", False)
|
||||
print("Asked to filter by ", token_type, file=sys.stderr)
|
||||
return EmailToken.jsonify_all(token_type=token_type, desc=desc)
|
||||
|
||||
|
||||
@app.route('/token/<int:token_id>', methods=['GET'])
|
||||
@app.route("/token/<int:token_id>", methods=["GET"])
|
||||
def get_token(token_id):
|
||||
"""Gets a token by its primary key id"""
|
||||
token = EmailToken.query.get(token_id)
|
||||
return jsonify(token.as_dict())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
db.create_all()
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
app.run(host="0.0.0.0", port=5000)
|
||||
|
@ -1,3 +1,3 @@
|
||||
flask==0.12.2
|
||||
sqlalchemy==1.2.2
|
||||
flask-sqlalchemy==2.3.2
|
||||
sqlalchemy==1.2.2
|
||||
|
@ -6,45 +6,46 @@ import requests
|
||||
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.config['DEBUG'] = True
|
||||
app.config["DEBUG"] = True
|
||||
|
||||
indexer_url = os.environ.get('INDEXER_URL', 'http://indexer')
|
||||
indexer_url = os.environ.get("INDEXER_URL", "http://indexer")
|
||||
|
||||
|
||||
@app.route('/healthcheck')
|
||||
@app.route("/healthcheck")
|
||||
def healthcheck():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
||||
@app.route('/')
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return flask.render_template('home.html')
|
||||
return flask.render_template("home.html")
|
||||
|
||||
|
||||
@app.route('/shipping')
|
||||
@app.route("/shipping")
|
||||
def get_tokens():
|
||||
resp = requests.get(
|
||||
indexer_url+'/token',
|
||||
indexer_url + "/token",
|
||||
params={
|
||||
'filter_type': 'SHIPPING',
|
||||
'desc': True,
|
||||
"filter_type": "SHIPPING",
|
||||
"desc": True,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
tokens = resp.json().get('tokens')
|
||||
tokens = resp.json().get("tokens")
|
||||
for token in tokens:
|
||||
try:
|
||||
resp = requests.get(
|
||||
'http://viewer_package_tracking:3000/info/'+token['token']
|
||||
"http://viewer_package_tracking:3000/info/" + token["token"]
|
||||
)
|
||||
resp.raise_for_status()
|
||||
print('Response: ', resp.text, file=sys.stderr)
|
||||
print("Response: ", resp.text, file=sys.stderr)
|
||||
info = resp.json()
|
||||
token['metadata'].update(info)
|
||||
token["metadata"].update(info)
|
||||
except Exception as e:
|
||||
print('Error', e, file=sys.stderr)
|
||||
print("Error", e, file=sys.stderr)
|
||||
pass
|
||||
return flask.render_template('shipping.html', trackers=tokens)
|
||||
return flask.render_template("shipping.html", trackers=tokens)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=5000)
|
||||
|
Loading…
Reference in New Issue
Block a user