Browse Source

Run linters

master
IamTheFij 2 years ago
parent
commit
6dc99be7e4
  1. 1
      .gitignore
  2. 81
      crawler/crawler/main.py
  3. 2
      crawler/requirements.txt
  4. 84
      indexer/indexer/main.py
  5. 2
      indexer/requirements.txt
  6. 37
      viewers/main/viewer/main.py

1
.gitignore

@ -126,3 +126,4 @@ _testmain.go
.DS_Store
.env
docker-compose-prod.yml
.mypy_cache

81
crawler/crawler/main.py

@ -13,36 +13,34 @@ from imbox import Imbox
logging.basicConfig(
level=logging.WARNING,
format='%(asctime)s %(levelname)s %(name)s %(message)s'
level=logging.WARNING, format="%(asctime)s %(levelname)s %(name)s %(message)s"
)
logging.getLogger(__name__).addHandler(logging.NullHandler())
VALID_CONTENT_TYPES = ['text/plain', 'text/html']
VALID_CONTENT_TYPES = ["text/plain", "text/html"]
def get_message_subject(message):
"""Returns message subject or a placeholder text"""
return getattr(message, 'subject', 'NO SUBJECT')
return getattr(message, "subject", "NO SUBJECT")
class MailCrawler(object):
def __init__(self):
self._logger = logging.getLogger(self.__class__.__name__)
self.imap_url = os.environ['IMAP_URL']
self.imap_user = os.environ['IMAP_USER']
self.imap_pass = os.environ['IMAP_PASS']
self.imap_url = os.environ["IMAP_URL"]
self.imap_user = os.environ["IMAP_USER"]
self.imap_pass = os.environ["IMAP_PASS"]
self.parser_hosts = None
self.indexer_host = os.environ.get('INDEXER')
self.debug_mode = os.environ.get('DEBUG', False)
self.indexer_host = os.environ.get("INDEXER")
self.debug_mode = os.environ.get("DEBUG", False)
def get_parsers(self):
"""Retrieves a list of parser hosts"""
if self.parser_hosts is None:
self.parser_hosts = []
parser_format = 'PARSER_{}'
parser_format = "PARSER_{}"
parser_index = 1
parser_host = os.environ.get(parser_format.format(parser_index))
while parser_host is not None:
@ -56,21 +54,21 @@ class MailCrawler(object):
"""Parses tokens from an email message"""
text = self.get_email_text(message)
if not text:
print('No email text returned')
print("No email text returned")
return []
results = []
for parser_host in self.get_parsers():
# print('Parsing email text... ', text)
response = requests.post(
parser_host+'/parse',
parser_host + "/parse",
json={
'subject': get_message_subject(message),
'message': text,
"subject": get_message_subject(message),
"message": text,
},
)
response.raise_for_status()
print('Got response', response.text)
print("Got response", response.text)
results += response.json()
return results
@ -85,18 +83,18 @@ class MailCrawler(object):
def get_email_text(self, message):
"""Retrieves the text body of an email message"""
body = message.body.get('plain') or message.body.get('html')
body = message.body.get("plain") or message.body.get("html")
if not body:
return None
# Concat all known body content together since it doesn't really matter
return ''.join([text for text in body if isinstance(text, str)])
return "".join([text for text in body if isinstance(text, str)])
def index_token(self, message):
"""Sends a token from the parser to the indexer"""
if self.indexer_host is None and self.debug_mode:
print("DDB No indexer host, but OK for debugging")
response = requests.post(
self.indexer_host+'/token',
self.indexer_host + "/token",
json=message,
)
response.raise_for_status()
@ -105,9 +103,11 @@ class MailCrawler(object):
def process_message(self, message):
"""Process a single email message"""
for result in self.parse_message(message):
result.update({
"subject": message.subject,
})
result.update(
{
"subject": message.subject,
}
)
print("Parsed result: ", result)
print("Indexed result: ", self.index_token(result))
@ -138,14 +138,13 @@ class MailCrawler(object):
message_date = parser.parse(message.date)
self._logger.debug(
"DDB Processed message. Message date: %s Old date: %s",
message_date, since_date
message_date,
since_date,
)
try:
since_date = max(since_date, message_date)
except TypeError:
self._logger.error(
"Error comparing dates. We'll just use the last one"
)
self._logger.error("Error comparing dates. We'll just use the last one")
self._logger.debug("DDB Since date is now %s", since_date)
last_uid = max(uid, last_uid)
@ -155,16 +154,22 @@ class MailCrawler(object):
"""Parses command line arguments and returns them"""
parser = ArgumentParser(description="Inbox crawler")
parser.add_argument(
"--sleep", "-s",
default=10*60,
help=("Number of seconds to wait between polling IMAP server."
"Default 10 min"),
"--sleep",
"-s",
default=10 * 60,
help=(
"Number of seconds to wait between polling IMAP server."
"Default 10 min"
),
)
parser.add_argument(
"--verbosity", "-v",
"--verbosity",
"-v",
action="count",
help=("Adjust log verbosity by increasing arg count. Default log",
"level is ERROR. Level increases with each `v`"),
help=(
"Adjust log verbosity by increasing arg count. Default log",
"level is ERROR. Level increases with each `v`",
),
)
return parser.parse_args(args)
@ -186,7 +191,7 @@ class MailCrawler(object):
if args.verbosity:
self._set_log_level(args.verbosity)
self._logger.info('Starting crawler')
self._logger.info("Starting crawler")
with self.get_server() as server:
# TODO: parameterize startup date, maybe relative
since_date = datetime.now(tzutc()) - timedelta(days=16)
@ -194,9 +199,7 @@ class MailCrawler(object):
while True:
print("Processing messages")
since_date, last_uid = self.process_messages(
server,
since_date,
last_uid=last_uid
server, since_date, last_uid=last_uid
)
self._logger.info(
"DDB Processed all. New since_date %s",
@ -205,10 +208,10 @@ class MailCrawler(object):
sleep(args.sleep)
if __name__ == '__main__':
if __name__ == "__main__":
while True:
try:
MailCrawler().run()
except IMAP4.abort:
print('Imap abort. We will try to reconnect')
print("Imap abort. We will try to reconnect")
pass

2
crawler/requirements.txt

@ -1,3 +1,3 @@
imbox
python-dateutil
requests
git+https://github.com/martinrusev/imbox@fd68b35e22686f43cdb7e3df344efc9b3a26b1e6

84
indexer/indexer/main.py

@ -2,24 +2,25 @@ import json
import os
import sys
import flask
from flask import jsonify
from flask import request
from flask.ext.sqlalchemy import SQLAlchemy
import flask
app = flask.Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get(
'SQLALCHEMY_DATABASE_URI',
'sqlite:///../tokens.db'
app.config["SQLALCHEMY_DATABASE_URI"] = os.environ.get(
"SQLALCHEMY_DATABASE_URI", "sqlite:///../tokens.db"
)
app.config['SQLALCHEMY_ECHO'] = True
app.config['DEBUG'] = True
app.config["SQLALCHEMY_ECHO"] = True
app.config["DEBUG"] = True
db = SQLAlchemy(app)
class EmailToken(db.Model):
"""Model to store the indexed tokens"""
id = db.Column(db.Integer, primary_key=True)
subject = db.Column(db.String(1024))
token = db.Column(db.String(1024))
@ -34,47 +35,47 @@ class EmailToken(db.Model):
def as_dict(self):
return {
'id': self.id,
'subject': self.subject,
'token': self.token,
'type': self.token_type,
'metadata': self.get_token_metadata(),
'disabled': self.disabled,
"id": self.id,
"subject": self.subject,
"token": self.token,
"type": self.token_type,
"metadata": self.get_token_metadata(),
"disabled": self.disabled,
}
@classmethod
def from_json(cls, data):
metadata = data.get('metadata')
metadata = data.get("metadata")
try:
metadata = json.dumps(metadata)
except TypeError as err:
print('Error dumping metadata', err, file=sys.stderr)
print("Error dumping metadata", err, file=sys.stderr)
return cls(
subject=data.get('subject'),
token=data.get('token'),
token_type=data.get('type'),
subject=data.get("subject"),
token=data.get("token"),
token_type=data.get("type"),
token_metadata=metadata,
disabled=data.get('disabled', False),
disabled=data.get("disabled", False),
)
@classmethod
def jsonify_all(cls, token_type=None, desc=False):
query = cls.query
if token_type:
print('Filtering query by token type', file=sys.stderr)
print("Filtering query by token type", file=sys.stderr)
query = query.filter_by(token_type=token_type)
if desc:
query = query.order_by(cls.id.desc())
return jsonify(tokens=[token.as_dict() for token in query.all()])
@app.route('/')
@app.route("/")
def check():
return 'OK'
return "OK"
@app.route('/token', methods=['POST'])
@app.route("/token", methods=["POST"])
def create_tokens():
"""Creates a token from posted JSON request"""
new_token = EmailToken.from_json(request.get_json(force=True))
@ -85,48 +86,41 @@ def create_tokens():
).first()
print(
'Received token with value {} and type {}'.format(
new_token.token, new_token.token_type
), file=sys.stderr
"Received token with value {} and type {}".format(
new_token.token, new_token.token_type
),
file=sys.stderr,
)
print('Existing token? ', existing_token, file=sys.stderr)
print("Existing token? ", existing_token, file=sys.stderr)
if not existing_token:
print('No existing token, creating a new one', file=sys.stderr)
print("No existing token, creating a new one", file=sys.stderr)
db.session.add(new_token)
db.session.commit()
db.session.refresh(new_token)
return jsonify(
success=True,
created=True,
record=new_token.as_dict()
)
return jsonify(success=True, created=True, record=new_token.as_dict())
else:
print('Found an existing token', file=sys.stderr)
return jsonify(
success=True,
created=False,
record=existing_token.as_dict()
)
print("Found an existing token", file=sys.stderr)
return jsonify(success=True, created=False, record=existing_token.as_dict())
@app.route('/token', methods=['GET'])
@app.route("/token", methods=["GET"])
def list_all_tokens():
"""Lists all tokens with an optional type filter"""
token_type = request.args.get('filter_type')
desc = request.args.get('desc', False)
print('Asked to filter by ', token_type, file=sys.stderr)
token_type = request.args.get("filter_type")
desc = request.args.get("desc", False)
print("Asked to filter by ", token_type, file=sys.stderr)
return EmailToken.jsonify_all(token_type=token_type, desc=desc)
@app.route('/token/<int:token_id>', methods=['GET'])
@app.route("/token/<int:token_id>", methods=["GET"])
def get_token(token_id):
"""Gets a token by its primary key id"""
token = EmailToken.query.get(token_id)
return jsonify(token.as_dict())
if __name__ == '__main__':
if __name__ == "__main__":
db.create_all()
app.run(host='0.0.0.0', port=5000)
app.run(host="0.0.0.0", port=5000)

2
indexer/requirements.txt

@ -1,3 +1,3 @@
flask==0.12.2
sqlalchemy==1.2.2
flask-sqlalchemy==2.3.2
sqlalchemy==1.2.2

37
viewers/main/viewer/main.py

@ -6,45 +6,46 @@ import requests
app = flask.Flask(__name__)
app.config['DEBUG'] = True
app.config["DEBUG"] = True
indexer_url = os.environ.get('INDEXER_URL', 'http://indexer')
indexer_url = os.environ.get("INDEXER_URL", "http://indexer")
@app.route('/healthcheck')
@app.route("/healthcheck")
def healthcheck():
return 'OK'
return "OK"
@app.route('/')
@app.route("/")
def home():
return flask.render_template('home.html')
return flask.render_template("home.html")
@app.route('/shipping')
@app.route("/shipping")
def get_tokens():
resp = requests.get(
indexer_url+'/token',
indexer_url + "/token",
params={
'filter_type': 'SHIPPING',
'desc': True,
"filter_type": "SHIPPING",
"desc": True,
},
)
resp.raise_for_status()
tokens = resp.json().get('tokens')
tokens = resp.json().get("tokens")
for token in tokens:
try:
resp = requests.get(
'http://viewer_package_tracking:3000/info/'+token['token']
"http://viewer_package_tracking:3000/info/" + token["token"]
)
resp.raise_for_status()
print('Response: ', resp.text, file=sys.stderr)
print("Response: ", resp.text, file=sys.stderr)
info = resp.json()
token['metadata'].update(info)
token["metadata"].update(info)
except Exception as e:
print('Error', e, file=sys.stderr)
print("Error", e, file=sys.stderr)
pass
return flask.render_template('shipping.html', trackers=tokens)
return flask.render_template("shipping.html", trackers=tokens)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000)

Loading…
Cancel
Save