Update service pages and how version data is gathered.

This commit is contained in:
Patrick Jentsch 2021-03-26 13:10:42 +01:00
parent 55d94ea329
commit 2f9ecf8048
9 changed files with 133 additions and 43 deletions

View File

@ -362,8 +362,6 @@ class Job(db.Model):
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
description = db.Column(db.String(255))
end_date = db.Column(db.DateTime())
mem_mb = db.Column(db.Integer)
n_cores = db.Column(db.Integer)
service = db.Column(db.String(64))
'''
' Service specific arguments as string list.

View File

@ -25,7 +25,14 @@ SERVICES = {
'latest': '1.0.0',
'1.0.0': {
'check_encoding': True,
'models': {},
'models': {
'de': 'German',
'en': 'English',
'it': 'Italian',
'nl': 'Dutch',
'pl': 'Polish',
'zh': 'Chinese'
},
'publishing_data': {
'date': None,
'title': 'nopaque NLP service',
@ -42,14 +49,19 @@ SERVICES = {
'1.0.0': {
'binarization': True,
'models': {
'ara': 'Arabic',
'chi_tra': 'Chinese - Traditional',
'dan': 'Danish',
'eng': 'English',
'enm': 'English, Middle 1100-1500',
'fra': 'French',
'frm': 'French, Middle ca. 1400-1600',
'deu': 'German',
'frk': 'German Fraktur',
'ell': 'Greek, Modern (1453-)',
'ita': 'Italian',
'por': 'Portuguese',
'rus': 'Russian',
'spa': 'Spanish; Castilian',
},
'publishing_data': {

View File

@ -10,16 +10,18 @@ class AddJobForm(FlaskForm):
validators=[DataRequired(), Length(1, 255)])
submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
version = SelectField('Version', validators=[DataRequired()])
class AddNLPJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
default=SERVICES['nlp']['versions']['latest'],
validators=[DataRequired()])
check_encoding = BooleanField('Check encoding')
files = MultipleFileField('Files', validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_check_encoding(self, field):
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
raise ValidationError('Check encoding is not available in this version') # noqa
def validate_files(form, field):
for file in field.data:
@ -27,37 +29,44 @@ class AddNLPJobForm(AddJobForm):
raise ValidationError('File does not have an approved '
'extension: .txt')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
self.check_encoding.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddOCRJobForm(AddJobForm):
binarization = BooleanField('Binarazation')
files = MultipleFileField('Files', validators=[DataRequired()])
model = SelectField('Model', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
default=SERVICES['ocr']['versions']['latest'],
validators=[DataRequired()])
language = SelectField('Language', choices=[('', 'Choose your option')],
default='', validators=[DataRequired()])
def validate_binarization(form, field):
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
def validate_binarization(self, field):
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
raise ValidationError('Binarization is not available in this version') # noqa
def validate_files(form, field):
def validate_files(self, field):
for file in field.data:
if not file.filename.lower().endswith('.pdf'):
raise ValidationError('File does not have an approved '
'extension: .pdf')
def validate_model(form, field):
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
raise ValidationError('Model is not available in this version')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
super().__init__(*args, **kwargs)
if 'binarization' not in SERVICES['ocr']['versions'][version]:
self.binarization.render_kw = {'disabled': True}
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
self.version.default = version
class AddFileSetupJobForm(AddJobForm):
files = MultipleFileField('Files', validators=[DataRequired()])
version = SelectField('Version',
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
default=SERVICES['file-setup']['versions']['latest'],
validators=[DataRequired()])
def validate_files(form, field):
for file in field.data:
@ -66,3 +75,9 @@ class AddFileSetupJobForm(AddJobForm):
raise ValidationError('File does not have an approved '
'extension: .jpeg | .jpg | .png | .tiff '
'| .tif')
def __init__(self, *args, **kwargs):
version = kwargs.pop('version', SERVICES['file-setup']['versions']['latest'])
super().__init__(*args, **kwargs)
self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'] # noqa
self.version.default = version

View File

@ -23,19 +23,30 @@ def service(service):
elif service == 'file-setup':
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
elif service == 'nlp':
form = AddNLPJobForm(prefix='add-nlp-job-form')
version = request.args.get('version')
if version is None or version not in SERVICES['nlp']['versions']:
form = AddNLPJobForm(prefix='add-nlp-job-form')
else:
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
form.version.data = version
elif service == 'ocr':
form = AddOCRJobForm(prefix='add-ocr-job-form')
version = request.args.get('version')
if version is None or version not in SERVICES['ocr']['versions']:
form = AddOCRJobForm(prefix='add-ocr-job-form')
else:
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
form.version.data = version
if form.is_submitted():
if not form.validate():
logging.error(form.errors)
return make_response(form.errors, 400)
service_args = []
if service == 'nlp':
service_args.append('-l {}'.format(form.model.data))
service_args.append('-l {}'.format(form.language.data))
if form.check_encoding.data:
service_args.append('--check-encoding')
if service == 'ocr':
service_args.append('-l {}'.format(form.model.data))
service_args.append('-l {}'.format(form.language.data))
if form.binarization.data:
service_args.append('--binarize')
job = Job(creator=current_user,
@ -70,4 +81,5 @@ def service(service):
return make_response(
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
return render_template('services/{}.html.j2'.format(service),
form=form, title=SERVICES[service]['name'])
form=form, title=SERVICES[service]['name'],
versions=SERVICES[service]['versions'])

View File

@ -9,13 +9,31 @@ import json
import os
service_settings = {
'file-setup': {
'ressources': docker.types.Resources(mem_reservation=1024 * (10 ** 6),
cpu_reservation=1 * (10 ** 9))
},
'nlp': {
'default_args': ' --n-cores 2 --mem-mb 2048',
'ressources': docker.types.Resources(mem_reservation=2048 * (10 ** 6),
cpu_reservation=2 * (10 ** 9))
},
'ocr': {
'default_args': ' --n-cores 4 --mem-mb 4096',
'ressources': docker.types.Resources(mem_reservation=4096 * (10 ** 6),
cpu_reservation=4 * (10 ** 9))
}
}
class CheckJobsMixin:
def check_jobs(self):
jobs = Job.query.all()
canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs))
canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa
queued_jobs = list(filter(lambda job: job.status == 'queued', jobs))
running_jobs = list(filter(lambda job: job.status == 'running', jobs))
submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs))
submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa
for job in submitted_jobs:
self.create_job_service(job)
for job in queued_jobs + running_jobs:
@ -25,26 +43,24 @@ class CheckJobsMixin:
def create_job_service(self, job):
cmd = '{} -i /files -o /files/output'.format(job.service)
if 'default_args' in service_settings[job.service]:
cmd += service_settings[job.service]['default_args']
if job.service == 'file-setup':
cmd += ' -f {}'.format(secure_filename(job.title))
ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa
elif job.service == 'nlp':
ressources = docker.types.Resources(cpu_reservation=2 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa
elif job.service == 'ocr':
ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=8192 * (10 ** 6)) # noqa
cmd += ' --log-dir /files'
cmd += ' --zip [{}]_{}'.format(job.service, secure_filename(job.title))
cmd += ' ' + ' '.join(json.loads(job.service_args))
ressources = service_settings[job.service]['ressources']
service_kwargs = {'command': cmd,
'constraints': ['node.role==worker'],
'labels': {'origin': 'nopaque',
'type': 'service.{}'.format(job.service),
'type': 'job',
'job_id': str(job.id)},
'mounts': [job.path + ':/files:rw'],
'name': 'job_{}'.format(job.id),
'resources': ressources,
'restart_policy': docker.types.RestartPolicy()}
service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(job.service, job.service_version) # noqa
service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:latest'.format(job.service) # noqa
try:
self.docker.services.create(service_image, **service_kwargs)
except docker.errors.APIError as e:

View File

@ -11,6 +11,17 @@
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12">
<div class="card" id="nlp-removed-language-support">
<div class="card-content">
<span class="card-title">Natural Language Processing removed language support</span>
<p>Dear users</p>
<br>
<p>Not all language models support all features we utizlize in our NLP service. Thats why we had to drop them, as soon as they meet our requirements we will add them back!</p>
</div>
</div>
</div>
<div class="col s12">
<div class="card" id="beta-launch">
<div class="card-content">

View File

@ -56,10 +56,10 @@
<div class="col s12 l8">
{{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
</div>
<div class="col s12">
<div class="col s12 l9">
{{ wtf.render_field(form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }}
</div>
<div class="col s12 hide">
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}
</div>
</div>

View File

@ -23,7 +23,7 @@
<p class="hide-on-small-only">&nbsp;</p>
<p class="hide-on-small-only">&nbsp;</p>
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
<i class="material-icons service" data-service="nlp"></i>
<i class="nopaque-icons service" data-service="nlp"></i>
</a>
</div>
</div>
@ -127,3 +127,16 @@
</div>
</div>
{% endblock %}
{% block scripts %}
{{ super() }}
<script>
var versionField = document.querySelector('#add-nlp-job-form-version');
versionField.addEventListener('change', (event) => {
let url = new URL(window.location.href);
url.search = `?version=${event.target.value}`;
window.location.href = url.toString();
});
</script>
{% endblock %}

View File

@ -23,7 +23,7 @@
<p class="hide-on-small-only">&nbsp;</p>
<p class="hide-on-small-only">&nbsp;</p>
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
<i class="material-icons service" data-service="ocr"></i>
<i class="nopaque-icons service" data-service="ocr"></i>
</a>
</div>
</div>
@ -60,7 +60,7 @@
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
</div>
<div class="col s12 l4">
{{ wtf.render_field(form.model, material_icon='language') }}
{{ wtf.render_field(form.language, material_icon='language') }}
</div>
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}
@ -154,3 +154,16 @@
</div>
</div>
{% endblock %}
{% block scripts %}
{{ super() }}
<script>
var versionField = document.querySelector('#add-ocr-job-form-version');
versionField.addEventListener('change', (event) => {
let url = new URL(window.location.href);
url.search = `?version=${event.target.value}`;
window.location.href = url.toString();
});
</script>
{% endblock %}