mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 01:05:42 +00:00
Update service pages and how version data is gathered.
This commit is contained in:
parent
55d94ea329
commit
2f9ecf8048
@ -362,8 +362,6 @@ class Job(db.Model):
|
|||||||
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
|
creation_date = db.Column(db.DateTime(), default=datetime.utcnow)
|
||||||
description = db.Column(db.String(255))
|
description = db.Column(db.String(255))
|
||||||
end_date = db.Column(db.DateTime())
|
end_date = db.Column(db.DateTime())
|
||||||
mem_mb = db.Column(db.Integer)
|
|
||||||
n_cores = db.Column(db.Integer)
|
|
||||||
service = db.Column(db.String(64))
|
service = db.Column(db.String(64))
|
||||||
'''
|
'''
|
||||||
' Service specific arguments as string list.
|
' Service specific arguments as string list.
|
||||||
|
@ -25,7 +25,14 @@ SERVICES = {
|
|||||||
'latest': '1.0.0',
|
'latest': '1.0.0',
|
||||||
'1.0.0': {
|
'1.0.0': {
|
||||||
'check_encoding': True,
|
'check_encoding': True,
|
||||||
'models': {},
|
'models': {
|
||||||
|
'de': 'German',
|
||||||
|
'en': 'English',
|
||||||
|
'it': 'Italian',
|
||||||
|
'nl': 'Dutch',
|
||||||
|
'pl': 'Polish',
|
||||||
|
'zh': 'Chinese'
|
||||||
|
},
|
||||||
'publishing_data': {
|
'publishing_data': {
|
||||||
'date': None,
|
'date': None,
|
||||||
'title': 'nopaque NLP service',
|
'title': 'nopaque NLP service',
|
||||||
@ -42,14 +49,19 @@ SERVICES = {
|
|||||||
'1.0.0': {
|
'1.0.0': {
|
||||||
'binarization': True,
|
'binarization': True,
|
||||||
'models': {
|
'models': {
|
||||||
|
'ara': 'Arabic',
|
||||||
|
'chi_tra': 'Chinese - Traditional',
|
||||||
|
'dan': 'Danish',
|
||||||
'eng': 'English',
|
'eng': 'English',
|
||||||
'enm': 'English, Middle 1100-1500',
|
'enm': 'English, Middle 1100-1500',
|
||||||
'fra': 'French',
|
'fra': 'French',
|
||||||
'frm': 'French, Middle ca. 1400-1600',
|
'frm': 'French, Middle ca. 1400-1600',
|
||||||
'deu': 'German',
|
'deu': 'German',
|
||||||
'frk': 'German Fraktur',
|
'frk': 'German Fraktur',
|
||||||
|
'ell': 'Greek, Modern (1453-)',
|
||||||
'ita': 'Italian',
|
'ita': 'Italian',
|
||||||
'por': 'Portuguese',
|
'por': 'Portuguese',
|
||||||
|
'rus': 'Russian',
|
||||||
'spa': 'Spanish; Castilian',
|
'spa': 'Spanish; Castilian',
|
||||||
},
|
},
|
||||||
'publishing_data': {
|
'publishing_data': {
|
||||||
|
@ -10,16 +10,18 @@ class AddJobForm(FlaskForm):
|
|||||||
validators=[DataRequired(), Length(1, 255)])
|
validators=[DataRequired(), Length(1, 255)])
|
||||||
submit = SubmitField()
|
submit = SubmitField()
|
||||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||||
|
version = SelectField('Version', validators=[DataRequired()])
|
||||||
|
|
||||||
|
|
||||||
class AddNLPJobForm(AddJobForm):
|
class AddNLPJobForm(AddJobForm):
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
|
||||||
model = SelectField('Model', validators=[DataRequired()])
|
|
||||||
version = SelectField('Version',
|
|
||||||
choices=[(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'], # noqa
|
|
||||||
default=SERVICES['nlp']['versions']['latest'],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
check_encoding = BooleanField('Check encoding')
|
check_encoding = BooleanField('Check encoding')
|
||||||
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
|
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||||
|
default='', validators=[DataRequired()])
|
||||||
|
|
||||||
|
def validate_check_encoding(self, field):
|
||||||
|
if field.data and 'check_encoding' not in SERVICES['nlp']['versions'][self.version.data]: # noqa
|
||||||
|
raise ValidationError('Check encoding is not available in this version') # noqa
|
||||||
|
|
||||||
def validate_files(form, field):
|
def validate_files(form, field):
|
||||||
for file in field.data:
|
for file in field.data:
|
||||||
@ -27,37 +29,44 @@ class AddNLPJobForm(AddJobForm):
|
|||||||
raise ValidationError('File does not have an approved '
|
raise ValidationError('File does not have an approved '
|
||||||
'extension: .txt')
|
'extension: .txt')
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
version = kwargs.pop('version', SERVICES['nlp']['versions']['latest'])
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
if 'check_encoding' not in SERVICES['nlp']['versions'][version]:
|
||||||
|
self.check_encoding.render_kw = {'disabled': True}
|
||||||
|
self.language.choices += [(x, y) for x, y in SERVICES['nlp']['versions'][version]['models'].items()] # noqa
|
||||||
|
self.version.choices = [(x, x) for x in SERVICES['nlp']['versions'] if x != 'latest'] # noqa
|
||||||
|
self.version.default = version
|
||||||
|
|
||||||
|
|
||||||
class AddOCRJobForm(AddJobForm):
|
class AddOCRJobForm(AddJobForm):
|
||||||
binarization = BooleanField('Binarazation')
|
binarization = BooleanField('Binarazation')
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
model = SelectField('Model', validators=[DataRequired()])
|
language = SelectField('Language', choices=[('', 'Choose your option')],
|
||||||
version = SelectField('Version',
|
default='', validators=[DataRequired()])
|
||||||
choices=[(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'], # noqa
|
|
||||||
default=SERVICES['ocr']['versions']['latest'],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
|
|
||||||
def validate_binarization(form, field):
|
def validate_binarization(self, field):
|
||||||
if field.data and 'binarization' not in SERVICES['ocr'][form.version.data]: # noqa
|
if field.data and 'binarization' not in SERVICES['ocr']['versions'][self.version.data]: # noqa
|
||||||
raise ValidationError('Binarization is not available in this version') # noqa
|
raise ValidationError('Binarization is not available in this version') # noqa
|
||||||
|
|
||||||
def validate_files(form, field):
|
def validate_files(self, field):
|
||||||
for file in field.data:
|
for file in field.data:
|
||||||
if not file.filename.lower().endswith('.pdf'):
|
if not file.filename.lower().endswith('.pdf'):
|
||||||
raise ValidationError('File does not have an approved '
|
raise ValidationError('File does not have an approved '
|
||||||
'extension: .pdf')
|
'extension: .pdf')
|
||||||
|
|
||||||
def validate_model(form, field):
|
def __init__(self, *args, **kwargs):
|
||||||
if field.data not in SERVICES['ocr'][form.versiondata]['models']:
|
version = kwargs.pop('version', SERVICES['ocr']['versions']['latest'])
|
||||||
raise ValidationError('Model is not available in this version')
|
super().__init__(*args, **kwargs)
|
||||||
|
if 'binarization' not in SERVICES['ocr']['versions'][version]:
|
||||||
|
self.binarization.render_kw = {'disabled': True}
|
||||||
|
self.language.choices += [(x, y) for x, y in SERVICES['ocr']['versions'][version]['models'].items()] # noqa
|
||||||
|
self.version.choices = [(x, x) for x in SERVICES['ocr']['versions'] if x != 'latest'] # noqa
|
||||||
|
self.version.default = version
|
||||||
|
|
||||||
|
|
||||||
class AddFileSetupJobForm(AddJobForm):
|
class AddFileSetupJobForm(AddJobForm):
|
||||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||||
version = SelectField('Version',
|
|
||||||
choices=[(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'], # noqa
|
|
||||||
default=SERVICES['file-setup']['versions']['latest'],
|
|
||||||
validators=[DataRequired()])
|
|
||||||
|
|
||||||
def validate_files(form, field):
|
def validate_files(form, field):
|
||||||
for file in field.data:
|
for file in field.data:
|
||||||
@ -66,3 +75,9 @@ class AddFileSetupJobForm(AddJobForm):
|
|||||||
raise ValidationError('File does not have an approved '
|
raise ValidationError('File does not have an approved '
|
||||||
'extension: .jpeg | .jpg | .png | .tiff '
|
'extension: .jpeg | .jpg | .png | .tiff '
|
||||||
'| .tif')
|
'| .tif')
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
version = kwargs.pop('version', SERVICES['file-setup']['versions']['latest'])
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions'] if x != 'latest'] # noqa
|
||||||
|
self.version.default = version
|
||||||
|
@ -23,19 +23,30 @@ def service(service):
|
|||||||
elif service == 'file-setup':
|
elif service == 'file-setup':
|
||||||
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
form = AddFileSetupJobForm(prefix='add-file-setup-job-form')
|
||||||
elif service == 'nlp':
|
elif service == 'nlp':
|
||||||
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
version = request.args.get('version')
|
||||||
|
if version is None or version not in SERVICES['nlp']['versions']:
|
||||||
|
form = AddNLPJobForm(prefix='add-nlp-job-form')
|
||||||
|
else:
|
||||||
|
form = AddNLPJobForm(prefix='add-nlp-job-form', version=version)
|
||||||
|
form.version.data = version
|
||||||
elif service == 'ocr':
|
elif service == 'ocr':
|
||||||
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
version = request.args.get('version')
|
||||||
|
if version is None or version not in SERVICES['ocr']['versions']:
|
||||||
|
form = AddOCRJobForm(prefix='add-ocr-job-form')
|
||||||
|
else:
|
||||||
|
form = AddOCRJobForm(prefix='add-ocr-job-form', version=version)
|
||||||
|
form.version.data = version
|
||||||
if form.is_submitted():
|
if form.is_submitted():
|
||||||
if not form.validate():
|
if not form.validate():
|
||||||
|
logging.error(form.errors)
|
||||||
return make_response(form.errors, 400)
|
return make_response(form.errors, 400)
|
||||||
service_args = []
|
service_args = []
|
||||||
if service == 'nlp':
|
if service == 'nlp':
|
||||||
service_args.append('-l {}'.format(form.model.data))
|
service_args.append('-l {}'.format(form.language.data))
|
||||||
if form.check_encoding.data:
|
if form.check_encoding.data:
|
||||||
service_args.append('--check-encoding')
|
service_args.append('--check-encoding')
|
||||||
if service == 'ocr':
|
if service == 'ocr':
|
||||||
service_args.append('-l {}'.format(form.model.data))
|
service_args.append('-l {}'.format(form.language.data))
|
||||||
if form.binarization.data:
|
if form.binarization.data:
|
||||||
service_args.append('--binarize')
|
service_args.append('--binarize')
|
||||||
job = Job(creator=current_user,
|
job = Job(creator=current_user,
|
||||||
@ -70,4 +81,5 @@ def service(service):
|
|||||||
return make_response(
|
return make_response(
|
||||||
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
|
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
|
||||||
return render_template('services/{}.html.j2'.format(service),
|
return render_template('services/{}.html.j2'.format(service),
|
||||||
form=form, title=SERVICES[service]['name'])
|
form=form, title=SERVICES[service]['name'],
|
||||||
|
versions=SERVICES[service]['versions'])
|
||||||
|
@ -9,13 +9,31 @@ import json
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
service_settings = {
|
||||||
|
'file-setup': {
|
||||||
|
'ressources': docker.types.Resources(mem_reservation=1024 * (10 ** 6),
|
||||||
|
cpu_reservation=1 * (10 ** 9))
|
||||||
|
},
|
||||||
|
'nlp': {
|
||||||
|
'default_args': ' --n-cores 2 --mem-mb 2048',
|
||||||
|
'ressources': docker.types.Resources(mem_reservation=2048 * (10 ** 6),
|
||||||
|
cpu_reservation=2 * (10 ** 9))
|
||||||
|
},
|
||||||
|
'ocr': {
|
||||||
|
'default_args': ' --n-cores 4 --mem-mb 4096',
|
||||||
|
'ressources': docker.types.Resources(mem_reservation=4096 * (10 ** 6),
|
||||||
|
cpu_reservation=4 * (10 ** 9))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class CheckJobsMixin:
|
class CheckJobsMixin:
|
||||||
def check_jobs(self):
|
def check_jobs(self):
|
||||||
jobs = Job.query.all()
|
jobs = Job.query.all()
|
||||||
canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs))
|
canceling_jobs = list(filter(lambda job: job.status == 'canceling', jobs)) # noqa
|
||||||
queued_jobs = list(filter(lambda job: job.status == 'queued', jobs))
|
queued_jobs = list(filter(lambda job: job.status == 'queued', jobs))
|
||||||
running_jobs = list(filter(lambda job: job.status == 'running', jobs))
|
running_jobs = list(filter(lambda job: job.status == 'running', jobs))
|
||||||
submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs))
|
submitted_jobs = list(filter(lambda job: job.status == 'submitted', jobs)) # noqa
|
||||||
for job in submitted_jobs:
|
for job in submitted_jobs:
|
||||||
self.create_job_service(job)
|
self.create_job_service(job)
|
||||||
for job in queued_jobs + running_jobs:
|
for job in queued_jobs + running_jobs:
|
||||||
@ -25,26 +43,24 @@ class CheckJobsMixin:
|
|||||||
|
|
||||||
def create_job_service(self, job):
|
def create_job_service(self, job):
|
||||||
cmd = '{} -i /files -o /files/output'.format(job.service)
|
cmd = '{} -i /files -o /files/output'.format(job.service)
|
||||||
|
if 'default_args' in service_settings[job.service]:
|
||||||
|
cmd += service_settings[job.service]['default_args']
|
||||||
if job.service == 'file-setup':
|
if job.service == 'file-setup':
|
||||||
cmd += ' -f {}'.format(secure_filename(job.title))
|
cmd += ' -f {}'.format(secure_filename(job.title))
|
||||||
ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa
|
|
||||||
elif job.service == 'nlp':
|
|
||||||
ressources = docker.types.Resources(cpu_reservation=2 * (10 ** 9), mem_reservation=4096 * (10 ** 6)) # noqa
|
|
||||||
elif job.service == 'ocr':
|
|
||||||
ressources = docker.types.Resources(cpu_reservation=4 * (10 ** 9), mem_reservation=8192 * (10 ** 6)) # noqa
|
|
||||||
cmd += ' --log-dir /files'
|
cmd += ' --log-dir /files'
|
||||||
cmd += ' --zip [{}]_{}'.format(job.service, secure_filename(job.title))
|
cmd += ' --zip [{}]_{}'.format(job.service, secure_filename(job.title))
|
||||||
cmd += ' ' + ' '.join(json.loads(job.service_args))
|
cmd += ' ' + ' '.join(json.loads(job.service_args))
|
||||||
|
ressources = service_settings[job.service]['ressources']
|
||||||
service_kwargs = {'command': cmd,
|
service_kwargs = {'command': cmd,
|
||||||
'constraints': ['node.role==worker'],
|
'constraints': ['node.role==worker'],
|
||||||
'labels': {'origin': 'nopaque',
|
'labels': {'origin': 'nopaque',
|
||||||
'type': 'service.{}'.format(job.service),
|
'type': 'job',
|
||||||
'job_id': str(job.id)},
|
'job_id': str(job.id)},
|
||||||
'mounts': [job.path + ':/files:rw'],
|
'mounts': [job.path + ':/files:rw'],
|
||||||
'name': 'job_{}'.format(job.id),
|
'name': 'job_{}'.format(job.id),
|
||||||
'resources': ressources,
|
'resources': ressources,
|
||||||
'restart_policy': docker.types.RestartPolicy()}
|
'restart_policy': docker.types.RestartPolicy()}
|
||||||
service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format(job.service, job.service_version) # noqa
|
service_image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:latest'.format(job.service) # noqa
|
||||||
try:
|
try:
|
||||||
self.docker.services.create(service_image, **service_kwargs)
|
self.docker.services.create(service_image, **service_kwargs)
|
||||||
except docker.errors.APIError as e:
|
except docker.errors.APIError as e:
|
||||||
|
@ -11,6 +11,17 @@
|
|||||||
<h1 id="title">{{ title }}</h1>
|
<h1 id="title">{{ title }}</h1>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="col s12">
|
||||||
|
<div class="card" id="nlp-removed-language-support">
|
||||||
|
<div class="card-content">
|
||||||
|
<span class="card-title">Natural Language Processing removed language support</span>
|
||||||
|
<p>Dear users</p>
|
||||||
|
<br>
|
||||||
|
<p>Not all language models support all features we utizlize in our NLP service. Thats why we had to drop them, as soon as they meet our requirements we will add them back!</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="col s12">
|
<div class="col s12">
|
||||||
<div class="card" id="beta-launch">
|
<div class="card" id="beta-launch">
|
||||||
<div class="card-content">
|
<div class="card-content">
|
||||||
|
@ -56,10 +56,10 @@
|
|||||||
<div class="col s12 l8">
|
<div class="col s12 l8">
|
||||||
{{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
|
{{ wtf.render_field(form.description, data_length='255', material_icon='description') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12">
|
<div class="col s12 l9">
|
||||||
{{ wtf.render_field(form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }}
|
{{ wtf.render_field(form.files, accept='image/jpeg, image/png, image/tiff', placeholder='Choose your .jpeg, .png or .tiff files') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12 hide">
|
<div class="col s12 l3">
|
||||||
{{ wtf.render_field(form.version, material_icon='apps') }}
|
{{ wtf.render_field(form.version, material_icon='apps') }}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
<p class="hide-on-small-only"> </p>
|
<p class="hide-on-small-only"> </p>
|
||||||
<p class="hide-on-small-only"> </p>
|
<p class="hide-on-small-only"> </p>
|
||||||
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
|
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
|
||||||
<i class="material-icons service" data-service="nlp"></i>
|
<i class="nopaque-icons service" data-service="nlp"></i>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -127,3 +127,16 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
{{ super() }}
|
||||||
|
<script>
|
||||||
|
var versionField = document.querySelector('#add-nlp-job-form-version');
|
||||||
|
versionField.addEventListener('change', (event) => {
|
||||||
|
let url = new URL(window.location.href);
|
||||||
|
url.search = `?version=${event.target.value}`;
|
||||||
|
window.location.href = url.toString();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
<p class="hide-on-small-only"> </p>
|
<p class="hide-on-small-only"> </p>
|
||||||
<p class="hide-on-small-only"> </p>
|
<p class="hide-on-small-only"> </p>
|
||||||
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
|
<a class="btn-floating btn-large waves-effect waves-light" style="transform: scale(2);">
|
||||||
<i class="material-icons service" data-service="ocr"></i>
|
<i class="nopaque-icons service" data-service="ocr"></i>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -60,7 +60,7 @@
|
|||||||
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
|
{{ wtf.render_field(form.files, accept='application/pdf', color=ocr_color_darken, placeholder='Choose your .pdf files') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12 l4">
|
<div class="col s12 l4">
|
||||||
{{ wtf.render_field(form.model, material_icon='language') }}
|
{{ wtf.render_field(form.language, material_icon='language') }}
|
||||||
</div>
|
</div>
|
||||||
<div class="col s12 l3">
|
<div class="col s12 l3">
|
||||||
{{ wtf.render_field(form.version, material_icon='apps') }}
|
{{ wtf.render_field(form.version, material_icon='apps') }}
|
||||||
@ -154,3 +154,16 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
{{ super() }}
|
||||||
|
<script>
|
||||||
|
var versionField = document.querySelector('#add-ocr-job-form-version');
|
||||||
|
versionField.addEventListener('change', (event) => {
|
||||||
|
let url = new URL(window.location.href);
|
||||||
|
url.search = `?version=${event.target.value}`;
|
||||||
|
window.location.href = url.toString();
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
|
Loading…
Reference in New Issue
Block a user