mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
166 lines
5.2 KiB
Python
166 lines
5.2 KiB
Python
from apifairy.fields import FileField
|
|
from marshmallow import validate, validates, ValidationError
|
|
from marshmallow.decorators import post_dump
|
|
from app import ma
|
|
from app.auth import USERNAME_REGEX
|
|
from app.models import Job, JobStatus, TesseractOCRModel, Token, User, UserSettingJobStatusMailNotificationLevel
|
|
from app.services import SERVICES
|
|
|
|
|
|
|
|
class EmptySchema(ma.Schema):
|
|
pass
|
|
|
|
|
|
class TokenSchema(ma.SQLAlchemySchema):
|
|
class Meta:
|
|
model = Token
|
|
ordered = True
|
|
|
|
access_token = ma.String(required=True)
|
|
refresh_token = ma.String()
|
|
|
|
|
|
class TesseractOCRModelSchema(ma.SQLAlchemySchema):
|
|
class Meta:
|
|
model = TesseractOCRModel
|
|
ordered = True
|
|
|
|
hashid = ma.String(data_key='id', dump_only=True)
|
|
user_hashid = ma.String(data_key='user_id', dump_only=True)
|
|
title = ma.auto_field(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=64)
|
|
)
|
|
description = ma.auto_field(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=255)
|
|
)
|
|
version = ma.String(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=16)
|
|
)
|
|
compatible_service_versions = ma.List(
|
|
ma.String(required=True, validate=validate.Length(min=1, max=16)),
|
|
required=True,
|
|
validate=validate.Length(min=1, max=255)
|
|
)
|
|
publisher = ma.String(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=128)
|
|
)
|
|
publisher_url = ma.String(
|
|
validate=[validate.URL(), validate.Length(min=1, max=512)]
|
|
)
|
|
publishing_url = ma.String(
|
|
required=True,
|
|
validate=[validate.URL(), validate.Length(min=1, max=512)]
|
|
)
|
|
publishing_year = ma.Int(
|
|
required=True
|
|
)
|
|
shared = ma.Boolean(required=True)
|
|
|
|
|
|
class JobSchema(ma.SQLAlchemySchema):
|
|
class Meta:
|
|
model = Job
|
|
ordered = True
|
|
|
|
hashid = ma.String(data_key='id', dump_only=True)
|
|
user_hashid = ma.String(data_key='user_id', dump_only=True)
|
|
title = ma.auto_field(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=32)
|
|
)
|
|
description = ma.auto_field(
|
|
required=True,
|
|
validate=validate.Length(min=1, max=255)
|
|
)
|
|
creation_date = ma.auto_field(dump_only=True)
|
|
end_date = ma.auto_field(dump_only=True)
|
|
service = ma.String(
|
|
dump_only=True,
|
|
validate=validate.OneOf(SERVICES.keys())
|
|
)
|
|
service_args = ma.Dict(dump_only=True)
|
|
service_version = ma.String(dump_only=True)
|
|
status = ma.String(
|
|
dump_only=True,
|
|
validate=validate.OneOf(list(JobStatus.__members__.keys()))
|
|
)
|
|
|
|
@post_dump(pass_original=True)
|
|
def post_dump(self, serialized_job, job, **kwargs):
|
|
serialized_job['status'] = job.status.name
|
|
return serialized_job
|
|
|
|
|
|
class TesseractOCRPipelineJobSchema(JobSchema):
|
|
binarization = ma.Boolean(load_only=True, missing=False)
|
|
model_id = ma.String(required=True, load_only=True)
|
|
service_version = ma.auto_field(
|
|
required=True,
|
|
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
|
|
)
|
|
pdf = FileField()
|
|
|
|
@validates('pdf')
|
|
def validate_pdf(self, value):
|
|
if value.mimetype != 'application/pdf':
|
|
raise ValidationError('PDF files only!')
|
|
|
|
|
|
class SpaCyNLPPipelineJobSchema(JobSchema):
|
|
binarization = ma.Boolean(load_only=True, missing=False)
|
|
model_id = ma.String(required=True, load_only=True)
|
|
service_version = ma.auto_field(
|
|
required=True,
|
|
validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))]
|
|
)
|
|
txt = FileField(required=True)
|
|
|
|
@validates('txt')
|
|
def validate_txt(self, value):
|
|
if value.mimetype != 'text/plain':
|
|
raise ValidationError('Plain text files only!')
|
|
|
|
|
|
class UserSchema(ma.SQLAlchemySchema):
|
|
class Meta:
|
|
model = User
|
|
ordered = True
|
|
|
|
hashid = ma.String(data_key='id', dump_only=True)
|
|
username = ma.auto_field(
|
|
validate=[
|
|
validate.Length(min=1, max=64),
|
|
validate.Regexp(USERNAME_REGEX, error='Usernames must have only letters, numbers, dots or underscores')
|
|
]
|
|
)
|
|
email = ma.auto_field(validate=validate.Email())
|
|
member_since = ma.auto_field(dump_only=True)
|
|
last_seen = ma.auto_field(dump_only=True)
|
|
password = ma.String(load_only=True)
|
|
last_seen = ma.auto_field(dump_only=True)
|
|
setting_dark_mode = ma.auto_field()
|
|
setting_job_status_mail_notification_level = ma.String(
|
|
validate=validate.OneOf(list(UserSettingJobStatusMailNotificationLevel.__members__.keys()))
|
|
)
|
|
|
|
@validates('email')
|
|
def validate_email(self, email):
|
|
if User.query.filter(User.email == email).first():
|
|
raise ValidationError('Email already registered')
|
|
|
|
@validates('username')
|
|
def validate_username(self, username):
|
|
if User.query.filter(User.username == username).first():
|
|
raise ValidationError('Username already in use')
|
|
|
|
@post_dump(pass_original=True)
|
|
def post_dump(self, serialized_user, user, **kwargs):
|
|
serialized_user['setting_job_status_mail_notification_level'] = \
|
|
user.setting_job_status_mail_notification_level.name
|
|
return serialized_user
|