diff --git a/web/app/models.py b/web/app/models.py index fdade759..6cd94698 100644 --- a/web/app/models.py +++ b/web/app/models.py @@ -362,8 +362,6 @@ class Job(db.Model): creation_date = db.Column(db.DateTime(), default=datetime.utcnow) description = db.Column(db.String(255)) end_date = db.Column(db.DateTime()) - mem_mb = db.Column(db.Integer) - n_cores = db.Column(db.Integer) service = db.Column(db.String(64)) ''' ' Service specific arguments as string list. diff --git a/web/app/services/__init__.py b/web/app/services/__init__.py index 03836993..f374b18b 100644 --- a/web/app/services/__init__.py +++ b/web/app/services/__init__.py @@ -25,7 +25,14 @@ SERVICES = { 'latest': '1.0.0', '1.0.0': { 'check_encoding': True, - 'models': {}, + 'models': { + 'de': 'German', + 'en': 'English', + 'it': 'Italian', + 'nl': 'Dutch', + 'pl': 'Polish', + 'zh': 'Chinese' + }, 'publishing_data': { 'date': None, 'title': 'nopaque NLP service', @@ -42,14 +49,19 @@ SERVICES = { '1.0.0': { 'binarization': True, 'models': { + 'ara': 'Arabic', + 'chi_tra': 'Chinese - Traditional', + 'dan': 'Danish', 'eng': 'English', 'enm': 'English, Middle 1100-1500', 'fra': 'French', 'frm': 'French, Middle ca. 1400-1600', 'deu': 'German', 'frk': 'German Fraktur', + 'ell': 'Greek, Modern (1453-)', 'ita': 'Italian', 'por': 'Portuguese', + 'rus': 'Russian', 'spa': 'Spanish; Castilian', }, 'publishing_data': { diff --git a/web/app/services/forms.py b/web/app/services/forms.py index cea741de..5d9f5ede 100644 --- a/web/app/services/forms.py +++ b/web/app/services/forms.py @@ -10,16 +10,18 @@ class AddJobForm(FlaskForm): validators=[DataRequired(), Length(1, 255)]) submit = SubmitField() title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) + version = SelectField('Version', validators=[DataRequired()]) class AddNLPJobForm(AddJobForm): - files = MultipleFileField('Files', validators=[DataRequired()]) - model = SelectField('Model', validators=[DataRequired()]) - version = SelectField('Version', - choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa - default=SERVICES['nlp']['versions']['latest'], - validators=[DataRequired()]) check_encoding = BooleanField('Check encoding') + files = MultipleFileField('Files', validators=[DataRequired()]) + language = SelectField('Language', choices=[('', 'Choose your option')], + default='', validators=[DataRequired()]) + + def validate_check_encoding(self, field): + if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa + raise ValidationError('Check encoding is not available in this version') # noqa def validate_files(form, field): for file in field.data: @@ -27,37 +29,44 @@ class AddNLPJobForm(AddJobForm): raise ValidationError('File does not have an approved ' 'extension: .txt') + def __init__(self, *args, **kwargs): + version = kwargs.pop('version', SERVICES['nlp']['versions']['latest']) + super().__init__(*args, **kwargs) + if 'check_encoding' not in SERVICES['nlp']['versions'][version]: + self.check_encoding.render_kw = {'disabled': True} + self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa + self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa + self.version.default = version + class AddOCRJobForm(AddJobForm): binarization = BooleanField('Binarazation') files = MultipleFileField('Files', validators=[DataRequired()]) - model = SelectField('Model', validators=[DataRequired()]) - version = SelectField('Version', - choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa - default=SERVICES['ocr']['versions']['latest'], - validators=[DataRequired()]) + language = SelectField('Language', choices=[('', 'Choose your option')], + default='', validators=[DataRequired()]) - def validate_binarization(form, field): - if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa + def validate_binarization(self, field): + if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa raise ValidationError('Binarization is not available in this version') # noqa - def validate_files(form, field): + def validate_files(self, field): for file in field.data: if not file.filename.lower().endswith('.pdf'): raise ValidationError('File does not have an approved ' 'extension: .pdf') - def validate_model(form, field): - if field.data not in SERVICES['ocr'][form.versiondata]['models']: - raise ValidationError('Model is not available in this version') + def __init__(self, *args, **kwargs): + version = kwargs.pop('version', SERVICES['ocr']['versions']['latest']) + super().__init__(*args, **kwargs) + if 'binarization' not in SERVICES['ocr']['versions'][version]: + self.binarization.render_kw = {'disabled': True} + self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa + self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa + self.version.default = version class AddFileSetupJobForm(AddJobForm): files = MultipleFileField('Files', validators=[DataRequired()]) - version = SelectField('Version', - choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa - default=SERVICES['file-setup']['versions']['latest'], - validators=[DataRequired()]) def validate_files(form, field): for file in field.data: @@ -66,3 +75,9 @@ class AddFileSetupJobForm(AddJobForm): raise ValidationError('File does not have an approved ' 'extension: .jpeg | .jpg | .png | .tiff ' '| .tif') + + def __init__(self, *args, **kwargs): + version = kwargs.pop('version', SERVICES['file-setup']['versions']['latest']) + super().__init__(*args, **kwargs) + self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'] # noqa + self.version.default = version diff --git a/web/app/services/views.py b/web/app/services/views.py index 4436d1ff..4aaac006 100644 --- a/web/app/services/views.py +++ b/web/app/services/views.py @@ -23,19 +23,30 @@ def service(service): elif service == 'file-setup': form = AddFileSetupJobForm(prefix='add-file-setup-job-form') elif service == 'nlp': - form = AddNLPJobForm(prefix='add-nlp-job-form') + version = request.args.get('version') + if version is None or version not in SERVICES['nlp']['versions']: + form = AddNLPJobForm(prefix='add-nlp-job-form') + else: + form = AddNLPJobForm(prefix='add-nlp-job-form', version=version) + form.version.data = version elif service == 'ocr': - form = AddOCRJobForm(prefix='add-ocr-job-form') + version = request.args.get('version') + if version is None or version not in SERVICES['ocr']['versions']: + form = AddOCRJobForm(prefix='add-ocr-job-form') + else: + form = AddOCRJobForm(prefix='add-ocr-job-form', version=version) + form.version.data = version if form.is_submitted(): if not form.validate(): + logging.error(form.errors) return make_response(form.errors, 400) service_args = [] if service == 'nlp': - service_args.append('-l {}'.format(form.model.data)) + service_args.append('-l {}'.format(form.language.data)) if form.check_encoding.data: service_args.append('--check-encoding') if service == 'ocr': - service_args.append('-l {}'.format(form.model.data)) + service_args.append('-l {}'.format(form.language.data)) if form.binarization.data: service_args.append('--binarize') job = Job(creator=current_user, @@ -70,4 +81,5 @@ def service(service): return make_response( {'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) return render_template('services/{}.html.j2'.format(service), - form=form, title=SERVICES[service]['name']) + form=form, title=SERVICES[service]['name'], + versions=SERVICES[service]['versions']) diff --git a/web/app/tasks/job_utils.py b/web/app/tasks/job_utils.py index 9baadde7..cdd0160d 100644 --- a/web/app/tasks/job_utils.py +++ b/web/app/tasks/job_utils.py @@ -9,13 +9,31 @@ import json import os +service_settings = { + 'file-setup': { + 'ressources': docker.types.Resources(mem_reservation=1024 * (10 ** 6), + cpu_reservation=1 * (10 ** 9)) + }, + 'nlp': { + 'default_args': ' --n-cores 2 --mem-mb 2048', + 'ressources': docker.types.Resources(mem_reservation=2048 * (10 ** 6), + cpu_reservation=2 * (10 ** 9)) + }, + 'ocr': { + 'default_args': ' --n-cores 4 --mem-mb 4096', + 'ressources': docker.types.Resources(mem_reservation=4096 * (10 ** 6), + cpu_reservation=4 * (10 ** 9)) + } +} + + class CheckJobsMixin: def check_jobs(self): jobs = Job.query.all() - canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) + canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa queued_jobs = list(filter(lambda job: job.status == 'queued', jobs)) running_jobs = list(filter(lambda job: job.status == 'running', jobs)) - submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) + submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa for job in submitted_jobs: self.create_job_service(job) for job in queued_jobs + running_jobs: @@ -25,26 +43,24 @@ class CheckJobsMixin: def create_job_service(self, job): cmd = '{} -i /files -o /files/output'.format(job.service) + if 'default_args' in service_settings[job.service]: + cmd += service_settings[job.service]['default_args'] if job.service == 'file-setup': cmd += ' -f {}'.format(secure_filename(job.title)) - ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa - elif job.service == 'nlp': - ressources = docker.types.Resources(cpu_reservation=2 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa - elif job.service == 'ocr': - ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=8192 * (10 ** 6)) # noqa cmd += ' --log-dir /files' cmd += ' --zip [{}]_{}'.format(job.service, secure_filename(job.title)) cmd += ' ' + ' '.join(json.loads(job.service_args)) + ressources = service_settings[job.service]['ressources'] service_kwargs = {'command': cmd, 'constraints': ['node.role==worker'], 'labels': {'origin': 'nopaque', - 'type': 'service.{}'.format(job.service), + 'type': 'job', 'job_id': str(job.id)}, 'mounts': [job.path + ':/files:rw'], 'name': 'job_{}'.format(job.id), 'resources': ressources, 'restart_policy': docker.types.RestartPolicy()} - service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(job.service, job.service_version) # noqa + service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:latest'.format(job.service) # noqa try: self.docker.services.create(service_image, **service_kwargs) except docker.errors.APIError as e: diff --git a/web/app/templates/main/news.html.j2 b/web/app/templates/main/news.html.j2 index 7b90f497..6e00ed09 100644 --- a/web/app/templates/main/news.html.j2 +++ b/web/app/templates/main/news.html.j2 @@ -11,6 +11,17 @@

{{ title }}

+
+
+
+ Natural Language Processing removed language support +

Dear users

+
+

Not all language models support all features we utizlize in our NLP service. Thats why we had to drop them, as soon as they meet our requirements we will add them back!

+
+
+
+
diff --git a/web/app/templates/services/file-setup.html.j2 b/web/app/templates/services/file-setup.html.j2 index b39fec38..ffb0efb0 100644 --- a/web/app/templates/services/file-setup.html.j2 +++ b/web/app/templates/services/file-setup.html.j2 @@ -56,10 +56,10 @@
{{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
-
+
{{ wtf.render_field(form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }}
-
+
{{ wtf.render_field(form.version, material_icon='apps') }}
diff --git a/web/app/templates/services/nlp.html.j2 b/web/app/templates/services/nlp.html.j2 index 4c5018bc..6e805f3e 100644 --- a/web/app/templates/services/nlp.html.j2 +++ b/web/app/templates/services/nlp.html.j2 @@ -23,7 +23,7 @@

 

 

- +
@@ -127,3 +127,16 @@
{% endblock %} + + +{% block scripts %} +{{ super() }} + +{% endblock %} diff --git a/web/app/templates/services/ocr.html.j2 b/web/app/templates/services/ocr.html.j2 index 5cb520bf..af8205a0 100644 --- a/web/app/templates/services/ocr.html.j2 +++ b/web/app/templates/services/ocr.html.j2 @@ -23,7 +23,7 @@

 

 

- + @@ -60,7 +60,7 @@ {{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
- {{ wtf.render_field(form.model, material_icon='language') }} + {{ wtf.render_field(form.language, material_icon='language') }}
{{ wtf.render_field(form.version, material_icon='apps') }} @@ -154,3 +154,16 @@
{% endblock %} + + +{% block scripts %} +{{ super() }} + +{% endblock %}