from apifairy.fields import FileField from marshmallow import validate, validates, ValidationError from marshmallow.decorators import post_dump from app import ma from app.models import ( Job, JobStatus, TesseractOCRPipelineModel, Token, User, UserSettingJobStatusMailNotificationLevel ) from app.blueprints.services import SERVICES class EmptySchema(ma.Schema): pass class TokenSchema(ma.SQLAlchemySchema): class Meta: model = Token ordered = True access_token = ma.String(required=True) refresh_token = ma.String() class TesseractOCRPipelineModelSchema(ma.SQLAlchemySchema): class Meta: model = TesseractOCRPipelineModel ordered = True hashid = ma.String(data_key='id', dump_only=True) user_hashid = ma.String(data_key='user_id', dump_only=True) title = ma.auto_field( required=True, validate=validate.Length(min=1, max=64) ) description = ma.auto_field( required=True, validate=validate.Length(min=1, max=255) ) version = ma.String( required=True, validate=validate.Length(min=1, max=16) ) compatible_service_versions = ma.List( ma.String(required=True, validate=validate.Length(min=1, max=16)), required=True, validate=validate.Length(min=1, max=255) ) publisher = ma.String( required=True, validate=validate.Length(min=1, max=128) ) publisher_url = ma.String( validate=[validate.URL(), validate.Length(min=1, max=512)] ) publishing_url = ma.String( required=True, validate=[validate.URL(), validate.Length(min=1, max=512)] ) publishing_year = ma.Int( required=True ) is_public = ma.Boolean(required=True) class JobSchema(ma.SQLAlchemySchema): class Meta: model = Job ordered = True hashid = ma.String(data_key='id', dump_only=True) user_hashid = ma.String(data_key='user_id', dump_only=True) title = ma.auto_field( required=True, validate=validate.Length(min=1, max=32) ) description = ma.auto_field( required=True, validate=validate.Length(min=1, max=255) ) creation_date = ma.auto_field(dump_only=True) end_date = ma.auto_field(dump_only=True) service = ma.String( dump_only=True, validate=validate.OneOf(SERVICES.keys()) ) service_args = ma.Dict(dump_only=True) service_version = ma.String(dump_only=True) status = ma.String( dump_only=True, validate=validate.OneOf(list(JobStatus.__members__.keys())) ) @post_dump(pass_original=True) def post_dump(self, serialized_job, job, **kwargs): serialized_job['status'] = job.status.name return serialized_job class TesseractOCRPipelineJobSchema(JobSchema): binarization = ma.Boolean(load_only=True, missing=False) model_id = ma.String(required=True, load_only=True) service_version = ma.auto_field( required=True, validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))] ) pdf = FileField() @validates('pdf') def validate_pdf(self, value): if value.mimetype != 'application/pdf': raise ValidationError('PDF files only!') class SpaCyNLPPipelineJobSchema(JobSchema): binarization = ma.Boolean(load_only=True, missing=False) model_id = ma.String(required=True, load_only=True) service_version = ma.auto_field( required=True, validate=[validate.Length(min=1, max=16), validate.OneOf(list(SERVICES['tesseract-ocr-pipeline']['versions'].keys()))] ) txt = FileField(required=True) @validates('txt') def validate_txt(self, value): if value.mimetype != 'text/plain': raise ValidationError('Plain text files only!') class UserSchema(ma.SQLAlchemySchema): class Meta: model = User ordered = True hashid = ma.String(data_key='id', dump_only=True) username = ma.auto_field( validate=[ validate.Length(min=1, max=64), validate.Regexp( User.username_pattern, error='Usernames must have only letters, numbers, dots or underscores' ) ] ) email = ma.auto_field(validate=validate.Email()) member_since = ma.auto_field(dump_only=True) last_seen = ma.auto_field(dump_only=True) password = ma.String(load_only=True) last_seen = ma.auto_field(dump_only=True) setting_job_status_mail_notification_level = ma.String( validate=validate.OneOf(list(UserSettingJobStatusMailNotificationLevel.__members__.keys())) ) @validates('email') def validate_email(self, email): if User.query.filter(User.email == email).first(): raise ValidationError('Email already registered') @validates('username') def validate_username(self, username): if User.query.filter(User.username == username).first(): raise ValidationError('Username already in use') @post_dump(pass_original=True) def post_dump(self, serialized_user, user, **kwargs): serialized_user['setting_job_status_mail_notification_level'] = \ user.setting_job_status_mail_notification_level.name return serialized_user