Add API functionality

This commit is contained in:
Patrick Jentsch 2022-09-02 13:24:14 +02:00
parent dedccad70a
commit ceef272d06
11 changed files with 583 additions and 54 deletions

View File

@ -1,3 +1,4 @@
from apifairy import APIFairy
from config import Config
from docker import DockerClient
from flask import Flask
@ -5,6 +6,7 @@ from flask_apscheduler import APScheduler
from flask_assets import Environment
from flask_login import LoginManager
from flask_mail import Mail
from flask_marshmallow import Marshmallow
from flask_migrate import Migrate
from flask_paranoid import Paranoid
from flask_socketio import SocketIO
@ -12,6 +14,7 @@ from flask_sqlalchemy import SQLAlchemy
from flask_hashids import Hashids
apifairy = APIFairy()
assets = Environment()
db = SQLAlchemy()
docker_client = DockerClient()
@ -19,6 +22,7 @@ hashids = Hashids()
login = LoginManager()
login.login_view = 'auth.login'
login.login_message = 'Please log in to access this page.'
ma = Marshmallow()
mail = Mail()
migrate = Migrate()
paranoid = Paranoid()
@ -38,10 +42,12 @@ def create_app(config: Config = Config) -> Flask:
registry=app.config['NOPAQUE_DOCKER_REGISTRY']
)
apifairy.init_app(app)
assets.init_app(app)
db.init_app(app)
hashids.init_app(app)
login.init_app(app)
ma.init_app(app)
mail.init_app(app)
migrate.init_app(app, db)
paranoid.init_app(app)
@ -51,6 +57,9 @@ def create_app(config: Config = Config) -> Flask:
from .admin import bp as admin_blueprint
app.register_blueprint(admin_blueprint, url_prefix='/admin')
from .api import bp as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/api')
from .auth import bp as auth_blueprint
app.register_blueprint(auth_blueprint, url_prefix='/auth')

View File

@ -1,25 +1,14 @@
from flask import Blueprint
from flask_restx import Api
from .tokens import ns as tokens_ns
bp = Blueprint('api', __name__)
authorizations = {
'basicAuth': {
'type': 'basic'
},
'apiKey': {
'type': 'apiKey',
'in': 'header',
'name': 'Authorization'
}
}
api = Api(
bp,
authorizations=authorizations,
description='An API to interact with nopaque',
title='nopaque API',
version='1.0'
)
api.add_namespace(tokens_ns)
from .tokens import bp as tokens_blueprint
bp.register_blueprint(tokens_blueprint, url_prefix='/tokens')
from .users import bp as users_blueprint
bp.register_blueprint(users_blueprint, url_prefix='/users')
from .jobs import bp as jobs_blueprint
bp.register_blueprint(jobs_blueprint, url_prefix='/jobs')

View File

@ -1,34 +1,49 @@
from app.models import User
from flask import current_app
from flask_httpauth import HTTPBasicAuth, HTTPTokenAuth
from sqlalchemy import or_
from werkzeug.http import HTTP_STATUS_CODES
from werkzeug.exceptions import Forbidden, Unauthorized
from app.models import User
basic_auth = HTTPBasicAuth()
token_auth = HTTPTokenAuth()
auth_error_responses = {
Unauthorized.code: Unauthorized.description,
Forbidden.code: Forbidden.description
}
@basic_auth.verify_password
def verify_password(email_or_username, password):
user = User.query.filter(
or_(
User.username == email_or_username,
User.email == email_or_username.lower()
)
).first()
if user and user.verify_password(password):
user = User.query.filter((User.email == email_or_username.lower()) | (User.username == email_or_username)).first()
if user is not None and user.verify_password(password):
return user
@basic_auth.error_handler
def basic_auth_error(status):
return {'error': HTTP_STATUS_CODES.get(status, 'Unknown error')}, status
error = (Forbidden if status == 403 else Unauthorized)()
return {
'code': error.code,
'message': error.name,
'description': error.description,
}, error.code, {'WWW-Authenticate': 'Form'}
@token_auth.verify_token
def verify_token(token):
return User.check_token(token) if token else None
return User.verify_access_token(token) if token else None
@token_auth.error_handler
def token_auth_error(status):
return {'error': HTTP_STATUS_CODES.get(status, 'Unknown error')}, status
error = (Forbidden if status == 403 else Unauthorized)()
return {
'code': error.code,
'message': error.name,
'description': error.description,
}, error.code
@basic_auth.get_user_roles
@token_auth.get_user_roles
def get_user_roles(user):
return [user.role.name]

102
app/api/jobs.py Normal file
View File

@ -0,0 +1,102 @@
from apifairy import authenticate, response
from apifairy.decorators import body, other_responses
from flask import abort, Blueprint
from werkzeug.exceptions import InternalServerError
from app import db, hashids
from app.models import Job, JobInput, JobStatus, TesseractOCRModel
from .schemas import EmptySchema, JobSchema, SpaCyNLPPipelineJobSchema, TesseractOCRPipelineJobSchema, TesseractOCRModelSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('jobs', __name__)
job_schema = JobSchema()
jobs_schema = JobSchema(many=True)
spacy_nlp_pipeline_job_schema = SpaCyNLPPipelineJobSchema()
tesseract_ocr_pipeline_job_schema = TesseractOCRPipelineJobSchema()
tesseract_ocr_model_schema = TesseractOCRModelSchema()
tesseract_ocr_models_schema = TesseractOCRModelSchema(many=True)
@bp.route('', methods=['GET'])
@authenticate(token_auth, role='Administrator')
@response(jobs_schema)
@other_responses(auth_error_responses)
def get_jobs():
"""Get all jobs"""
return Job.query.all()
@bp.route('/tesseract-ocr-pipeline', methods=['POST'])
@authenticate(token_auth)
@body(tesseract_ocr_pipeline_job_schema, location='form')
@response(job_schema)
@other_responses({**auth_error_responses, InternalServerError.code: InternalServerError.description})
def create_tesseract_ocr_pipeline_job(args):
"""Create a new Tesseract OCR Pipeline job"""
current_user = token_auth.current_user()
try:
job = Job.create(
title=args['title'],
description=args['description'],
service='tesseract-ocr-pipeline',
service_args={
'model': hashids.decode(args['model_id']),
'binarization': args['binarization']
},
service_version=args['service_version'],
user=current_user
)
except OSError:
abort(500)
try:
JobInput.create(args['pdf'], job=job)
except OSError:
abort(500)
job.status = JobStatus.SUBMITTED
db.session.commit()
return job, 201
@bp.route('/tesseract-ocr-pipeline/models', methods=['GET'])
@authenticate(token_auth)
@response(tesseract_ocr_models_schema)
@other_responses(auth_error_responses)
def get_tesseract_ocr_models():
"""Get all Tesseract OCR Models"""
return TesseractOCRModel.query.all()
@bp.route('/<hashid:job_id>', methods=['DELETE'])
@authenticate(token_auth)
@response(EmptySchema, status_code=204)
@other_responses(auth_error_responses)
def delete_job(job_id):
"""Delete a job by id"""
current_user = token_auth.current_user()
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
try:
job.delete()
except OSError as e:
abort(500)
db.session.commit()
return {}, 204
@bp.route('/<hashid:job_id>', methods=['GET'])
@authenticate(token_auth)
@response(job_schema)
@other_responses(auth_error_responses)
def get_job(job_id):
"""Get a job by id"""
current_user = token_auth.current_user()
job = Job.query.get(job_id)
if job is None:
abort(404)
if not (job.user == current_user or current_user.is_administrator()):
abort(403)
return job

165
app/api/schemas.py Normal file
View File

@ -0,0 +1,165 @@
from apifairy.fields import FileField
from marshmallow import validate, validates, ValidationError
from marshmallow.decorators import post_dump
from app import ma
from app.auth import USERNAME_REGEX
from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel
from app.services import SERVICES
class EmptySchema(ma.Schema):
pass
class TokenSchema(ma.SQLAlchemySchema):
class Meta:
model = Token
ordered = True
access_token = ma.String(required=True)
refresh_token = ma.String()
class TesseractOCRModelSchema(ma.SQLAlchemySchema):
class Meta:
model = TesseractOCRModel
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
user_hashid = ma.String(data_key='user_id', dump_only=True)
title = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=64)
)
description = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=255)
)
version = ma.String(
required=True,
validate=validate.Length(min=1, max=16)
)
compatible_service_versions = ma.List(
ma.String(required=True, validate=validate.Length(min=1, max=16)),
required=True,
validate=validate.Length(min=1, max=255)
)
publisher = ma.String(
required=True,
validate=validate.Length(min=1, max=128)
)
publisher_url = ma.String(
validate=[validate.URL(), validate.Length(min=1, max=512)]
)
publishing_url = ma.String(
required=True,
validate=[validate.URL(), validate.Length(min=1, max=512)]
)
publishing_year = ma.Int(
required=True
)
shared = ma.Boolean(required=True)
class JobSchema(ma.SQLAlchemySchema):
class Meta:
model = Job
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
user_hashid = ma.String(data_key='user_id', dump_only=True)
title = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=32)
)
description = ma.auto_field(
required=True,
validate=validate.Length(min=1, max=255)
)
creation_date = ma.auto_field(dump_only=True)
end_date = ma.auto_field(dump_only=True)
service = ma.String(
dump_only=True,
validate=validate.OneOf(SERVICES.keys())
)
service_args = ma.Dict(dump_only=True)
service_version = ma.String(dump_only=True)
status = ma.String(
dump_only=True,
validate=validate.OneOf(list(JobStatus.__members__.keys()))
)
@post_dump(pass_original=True)
def post_dump(self, serialized_job, job, **kwargs):
serialized_job['status'] = job.status.name
return serialized_job
class TesseractOCRPipelineJobSchema(JobSchema):
binarization = ma.Boolean(load_only=True, missing=False)
model_id = ma.String(required=True, load_only=True)
service_version = ma.auto_field(
required=True,
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
)
pdf = FileField()
@validates('pdf')
def validate_pdf(self, value):
if value.mimetype != 'application/pdf':
raise ValidationError('PDF files only!')
class SpaCyNLPPipelineJobSchema(JobSchema):
binarization = ma.Boolean(load_only=True, missing=False)
model_id = ma.String(required=True, load_only=True)
service_version = ma.auto_field(
required=True,
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
)
txt = FileField(required=True)
@validates('txt')
def validate_txt(self, value):
if value.mimetype != 'text/plain':
raise ValidationError('Plain text files only!')
class UserSchema(ma.SQLAlchemySchema):
class Meta:
model = User
ordered = True
hashid = ma.String(data_key='id', dump_only=True)
username = ma.auto_field(
validate=[
validate.Length(min=1, max=64),
validate.Regexp(USERNAME_REGEX, error='Usernames must have only letters, numbers, dots or underscores')
]
)
email = ma.auto_field(validate=validate.Email())
member_since = ma.auto_field(dump_only=True)
last_seen = ma.auto_field(dump_only=True)
password = ma.String(load_only=True)
last_seen = ma.auto_field(dump_only=True)
setting_dark_mode = ma.auto_field()
setting_job_status_mail_notification_level = ma.String(
validate=validate.OneOf(list(UserSettingJobStatusMailNotificationLevel.__members__.keys()))
)
@validates('email')
def validate_email(self, email):
if User.query.filter(User.email == email).first():
raise ValidationError('Email already registered')
@validates('username')
def validate_username(self, username):
if User.query.filter(User.username == username).first():
raise ValidationError('Username already in use')
@post_dump(pass_original=True)
def post_dump(self, serialized_user, user, **kwargs):
serialized_user['setting_job_status_mail_notification_level'] = \
user.setting_job_status_mail_notification_level.name
return serialized_user

View File

@ -1,27 +1,58 @@
from apifairy import authenticate, body, response, other_responses
from flask import Blueprint, request, abort
from app import db
from flask_restx import Namespace, Resource
from .auth import basic_auth, token_auth
from app.models import Token, User
from .auth import basic_auth
from .schemas import EmptySchema, TokenSchema
ns = Namespace('tokens', description='Token operations')
bp = Blueprint('tokens', __name__)
token_schema = TokenSchema()
@ns.route('')
class API_Tokens(Resource):
'''Get or revoke a user authentication token'''
@bp.route('', methods=['DELETE'])
@response(EmptySchema, status_code=204, description='Token revoked')
@other_responses({401: 'Invalid access token'})
def delete_token():
"""Revoke an access token"""
access_token = request.headers['Authorization'].split()[1]
token = Token.query.filter(Token.access_token == access_token).first()
if token is None: # pragma: no cover
abort(401)
token.expire()
db.session.commit()
return {}
@ns.doc(security='basicAuth')
@basic_auth.login_required
def post(self):
'''Get a user token'''
token = basic_auth.current_user().get_token()
db.session.commit()
return {'token': 'Bearer ' + token}
@ns.doc(security='apiKey')
@token_auth.login_required
def delete(self):
'''Revoke a user token'''
token_auth.current_user().revoke_token()
db.session.commit()
return '', 204
@bp.route('', methods=['POST'])
@authenticate(basic_auth)
@response(token_schema)
@other_responses({401: 'Invalid username or password'})
def create_token():
"""Create new access and refresh tokens"""
user = basic_auth.current_user()
token = user.generate_auth_token()
db.session.add(token)
Token.clean() # keep token table clean of old tokens
db.session.commit()
return token, 200
@bp.route('', methods=['PUT'])
@body(token_schema)
@response(token_schema, description='Newly issued access and refresh tokens')
@other_responses({401: 'Invalid access or refresh token'})
def refresh_token(args):
"""Refresh an access token"""
access_token = args.get('access_token')
refresh_token = args.get('refresh_token')
if access_token is None or refresh_token is None:
abort(401)
token = User.verify_refresh_token(refresh_token, access_token)
if token is None:
abort(401)
token.expire()
new_token = token.user.generate_auth_token()
db.session.add_all([token, new_token])
db.session.commit()
return new_token, 200

99
app/api/users.py Normal file
View File

@ -0,0 +1,99 @@
from apifairy import authenticate, body, response
from apifairy.decorators import other_responses
from flask import abort, Blueprint, current_app
from werkzeug.exceptions import InternalServerError
from app import db
from app.email import create_message, send
from app.models import User
from .schemas import EmptySchema, UserSchema
from .auth import auth_error_responses, token_auth
bp = Blueprint('users', __name__)
user_schema = UserSchema()
users_schema = UserSchema(many=True)
@bp.route('', methods=['GET'])
@authenticate(token_auth, role='Administrator')
@response(users_schema)
@other_responses(auth_error_responses)
def get_users():
"""Get all users"""
return User.query.all()
@bp.route('', methods=['POST'])
@body(user_schema)
@response(user_schema, 201)
@other_responses({InternalServerError.code: InternalServerError.description})
def create_user(args):
"""Create a new user"""
try:
user = User.create(
email=args['email'].lower(),
password=args['password'],
username=args['username']
)
except OSError:
abort(500)
msg = create_message(
user.email,
'Confirm Your Account',
'auth/email/confirm',
token=user.generate_confirm_token(),
user=user
)
send(msg)
db.session.commit()
return user, 201
@bp.route('/<hashid:user_id>', methods=['DELETE'])
@authenticate(token_auth)
@response(EmptySchema, status_code=204)
@other_responses(auth_error_responses)
def delete_user(user_id):
"""Delete a user by id"""
current_user = token_auth.current_user()
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
user.delete()
db.session.commit()
return {}, 204
@bp.route('/<hashid:user_id>', methods=['GET'])
@authenticate(token_auth)
@response(user_schema)
@other_responses(auth_error_responses)
@other_responses({404: 'User not found'})
def get_user(user_id):
"""Retrieve a user by id"""
current_user = token_auth.current_user()
user = User.query.get(user_id)
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user
@bp.route('/<username>', methods=['GET'])
@authenticate(token_auth)
@response(user_schema)
@other_responses(auth_error_responses)
@other_responses({404: 'User not found'})
def get_user_by_username(username):
"""Retrieve a user by username"""
current_user = token_auth.current_user()
user = User.query.filter(User.username == username).first()
if user is None:
abort(404)
if not (user == current_user or current_user.is_administrator()):
abort(403)
return user

View File

@ -11,6 +11,7 @@ import json
import jwt
import os
import requests
import secrets
import shutil
import xml.etree.ElementTree as ET
import yaml
@ -209,6 +210,30 @@ class Role(HashidMixin, db.Model):
db.session.commit()
class Token(db.Model):
__tablename__ = 'tokens'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
access_token = db.Column(db.String(64), index=True)
access_expiration = db.Column(db.DateTime)
refresh_token = db.Column(db.String(64), index=True)
refresh_expiration = db.Column(db.DateTime)
# Backrefs: user: User
def expire(self):
self.access_expiration = datetime.utcnow()
self.refresh_expiration = datetime.utcnow()
@staticmethod
def clean():
"""Remove any tokens that have been expired for more than a day."""
yesterday = datetime.utcnow() - timedelta(days=1)
Token.query.filter(Token.refresh_expiration < yesterday).delete()
class User(HashidMixin, UserMixin, db.Model):
__tablename__ = 'users'
# Primary key
@ -253,6 +278,12 @@ class User(HashidMixin, UserMixin, db.Model):
cascade='all, delete-orphan',
lazy='dynamic'
)
tokens = db.relationship(
'Token',
backref='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
def __init__(self, **kwargs):
super().__init__(**kwargs)
@ -337,6 +368,27 @@ class User(HashidMixin, UserMixin, db.Model):
db.session.add(user)
return True
@staticmethod
def verify_access_token(access_token, refresh_token=None):
token = Token.query.filter(Token.access_token == access_token).first()
if token is not None:
if token.access_expiration > datetime.utcnow():
token.user.ping()
db.session.commit()
if token.user.role.name != 'System user':
return token.user
@staticmethod
def verify_refresh_token(refresh_token, access_token):
token = Token.query.filter((Token.refresh_token == refresh_token) & (Token.access_token == access_token)).first()
if token is not None:
if token.refresh_expiration > datetime.utcnow():
return token
# someone tried to refresh with an expired token
# revoke all tokens from this user as a precaution
token.user.revoke_auth_tokens()
db.session.commit()
def can(self, permission):
return self.role.has_permission(permission)
@ -364,6 +416,15 @@ class User(HashidMixin, UserMixin, db.Model):
shutil.rmtree(self.path, ignore_errors=True)
db.session.delete(self)
def generate_auth_token(self):
return Token(
access_token=secrets.token_urlsafe(),
access_expiration=datetime.utcnow() + timedelta(minutes=15),
refresh_token=secrets.token_urlsafe(),
refresh_expiration=datetime.utcnow() + timedelta(days=7),
user=self
)
def generate_confirm_token(self, expiration=3600):
now = datetime.utcnow()
payload = {
@ -400,6 +461,10 @@ class User(HashidMixin, UserMixin, db.Model):
def ping(self):
self.last_seen = datetime.utcnow()
def revoke_auth_tokens(self):
for token in self.tokens:
db.session.delete(token)
def verify_password(self, password):
if self.role.name == 'System user':
return False

View File

@ -31,6 +31,9 @@
{% if current_user.can(Permission.ADMINISTRATE) %}
<li><a href="{{ url_for('admin.index') }}"><i class="material-icons">admin_panel_settings</i>Administration</a></li>
{% endif %}
{% if current_user.can(Permission.USE_API) %}
<li><a href="{{ url_for('apifairy.docs') }}"><i class="material-icons">api</i>API</a></li>
{% endif %}
{% if current_user.can(Permission.CONTRIBUTE) %}
<li><a href="{{ url_for('contributions.contributions') }}"><i class="material-icons">new_label</i>Contribute</a></li>
{% endif %}

View File

@ -11,6 +11,13 @@ load_dotenv(os.path.join(basedir, '.env'))
class Config:
''' APIFairy '''
APIFAIRY_TITLE = 'nopaque'
APIFAIRY_VERSION = '0.0.1'
APIFAIRY_UI = 'swagger_ui'
APIFAIRY_APISPEC_PATH = '/api/apispec.json'
APIFAIRY_UI_PATH = '/api'
''' # Flask # '''
PREFERRED_URL_SCHEME = os.environ.get('PREFERRED_URL_SCHEME', 'http')
SECRET_KEY = os.environ.get('SECRET_KEY', 'hard to guess string')
@ -58,6 +65,9 @@ class Config:
NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI = \
os.environ.get('NOPAQUE_SOCKETIO_MESSAGE_QUEUE_URI')
NOPAQUE_JOB_EXPIRATION_ENABLED = os.environ.get('NOPAQUE_JOB_EXPIRATION_ENABLED', 'true').lower() == 'true'
NOPAQUE_JOB_EXPIRATION_TIME = int(os.environ.get('NOPAQUE_JOB_EXPIRATION_TIME', '120'))
NOPAQUE_DOCKER_REGISTRY = 'gitlab.ub.uni-bielefeld.de:4567'
NOPAQUE_DOCKER_IMAGE_PREFIX = f'{NOPAQUE_DOCKER_REGISTRY}/sfb1288inf/'
NOPAQUE_DOCKER_REGISTRY_USERNAME = \

View File

@ -0,0 +1,41 @@
"""Add API authentication token table
Revision ID: 116b4ab3ef9c
Revises: f9070ff1fa4a
Create Date: 2022-09-02 11:12:01.995451
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '116b4ab3ef9c'
down_revision = 'f9070ff1fa4a'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('tokens',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('access_token', sa.String(length=64), nullable=True),
sa.Column('access_expiration', sa.DateTime(), nullable=True),
sa.Column('refresh_token', sa.String(length=64), nullable=True),
sa.Column('refresh_expiration', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_index(op.f('ix_tokens_access_token'), 'tokens', ['access_token'], unique=False)
op.create_index(op.f('ix_tokens_refresh_token'), 'tokens', ['refresh_token'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_tokens_refresh_token'), table_name='tokens')
op.drop_index(op.f('ix_tokens_access_token'), table_name='tokens')
op.drop_table('tokens')
# ### end Alembic commands ###