mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 09:00:40 +00:00
Change job model.
This commit is contained in:
@ -3,10 +3,50 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va
|
||||
from wtforms.validators import DataRequired, Length
|
||||
|
||||
|
||||
class NewNLPJobForm(FlaskForm):
|
||||
description = StringField(
|
||||
'Description',
|
||||
validators=[DataRequired(), Length(1, 255)]
|
||||
)
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField(
|
||||
'Language',
|
||||
choices=[('', 'Choose your option'),
|
||||
('en', 'English'),
|
||||
('fr', 'French'),
|
||||
('de', 'German'),
|
||||
('it', 'Italian'),
|
||||
('pt', 'Portuguese'),
|
||||
('es', 'Spanish')
|
||||
],
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
submit = SubmitField('Submit')
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[DataRequired(), Length(1, 32)]
|
||||
)
|
||||
version = SelectField(
|
||||
'Version',
|
||||
choices=[('', 'Choose your option'),
|
||||
('latest', 'Latest'),
|
||||
],
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.txt'):
|
||||
raise ValidationError(
|
||||
'File does not have an approved extension: '
|
||||
'.txt'
|
||||
)
|
||||
|
||||
|
||||
class NewOCRJobForm(FlaskForm):
|
||||
description = StringField(
|
||||
'Description',
|
||||
validators=[DataRequired(), Length(1, 64)]
|
||||
validators=[DataRequired(), Length(1, 255)]
|
||||
)
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField(
|
||||
@ -44,43 +84,3 @@ class NewOCRJobForm(FlaskForm):
|
||||
'File does not have an approved extension: '
|
||||
'.pdf | .tif | .tiff'
|
||||
)
|
||||
|
||||
|
||||
class NewNLPJobForm(FlaskForm):
|
||||
description = StringField(
|
||||
'Description',
|
||||
validators=[DataRequired(), Length(1, 64)]
|
||||
)
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
language = SelectField(
|
||||
'Language',
|
||||
choices=[('', 'Choose your option'),
|
||||
('en', 'English'),
|
||||
('fr', 'French'),
|
||||
('de', 'German'),
|
||||
('it', 'Italian'),
|
||||
('pt', 'Portuguese'),
|
||||
('es', 'Spanish')
|
||||
],
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
submit = SubmitField('Submit')
|
||||
title = StringField(
|
||||
'Title',
|
||||
validators=[DataRequired(), Length(1, 32)]
|
||||
)
|
||||
version = SelectField(
|
||||
'Version',
|
||||
choices=[('', 'Choose your option'),
|
||||
('latest', 'Latest'),
|
||||
],
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
|
||||
def validate_files(form, field):
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith('.txt'):
|
||||
raise ValidationError(
|
||||
'File does not have an approved extension: '
|
||||
'.txt'
|
||||
)
|
||||
|
@ -6,61 +6,8 @@ from ..models import Job
|
||||
from ..import swarm
|
||||
from .. import db
|
||||
from threading import Thread
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
@services.route('/ocr', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def ocr():
|
||||
new_ocr_job_form = NewOCRJobForm()
|
||||
if new_ocr_job_form.validate_on_submit():
|
||||
ocr_job = Job(creator=current_user._get_current_object(),
|
||||
description=new_ocr_job_form.description.data,
|
||||
service="ocr",
|
||||
ressources=json.dumps({"n_cores": 4,
|
||||
"mem_mb": 8192}),
|
||||
service_args=json.dumps({"args": ["--keep-intermediates",
|
||||
"--skip-binarisation"],
|
||||
"lang": new_ocr_job_form.language.data,
|
||||
"version": new_ocr_job_form.version.data}),
|
||||
status="pending",
|
||||
title=new_ocr_job_form.title.data)
|
||||
|
||||
db.session.add(ocr_job)
|
||||
db.session.commit()
|
||||
|
||||
dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
|
||||
str(ocr_job.user_id),
|
||||
'jobs',
|
||||
str(ocr_job.id))
|
||||
|
||||
try:
|
||||
os.makedirs(dir)
|
||||
except OSError:
|
||||
flash('OSError!')
|
||||
db.session.remove(ocr_job)
|
||||
db.session.commit()
|
||||
else:
|
||||
for file in new_ocr_job_form.files.data:
|
||||
file.save(os.path.join(dir, file.filename))
|
||||
'''
|
||||
' TODO: Let the scheduler run this job in the background.
|
||||
'
|
||||
' NOTE: Using self created threads is just for testing purpose as
|
||||
' there is no scheduler available.
|
||||
'''
|
||||
db.session.expunge(ocr_job)
|
||||
thread = Thread(target=swarm.run, args=(ocr_job,))
|
||||
thread.start()
|
||||
flash('Job created!')
|
||||
return redirect(url_for('services.ocr'))
|
||||
|
||||
return render_template(
|
||||
'services/ocr.html.j2',
|
||||
title='Optical Character Recognition',
|
||||
new_ocr_job_form=new_ocr_job_form
|
||||
)
|
||||
import os
|
||||
|
||||
|
||||
@services.route('/nlp', methods=['GET', 'POST'])
|
||||
@ -68,15 +15,14 @@ def ocr():
|
||||
def nlp():
|
||||
new_nlp_job_form = NewNLPJobForm()
|
||||
if new_nlp_job_form.validate_on_submit():
|
||||
nlp_job = Job(creator=current_user._get_current_object(),
|
||||
nlp_job = Job(creator=current_user,
|
||||
description=new_nlp_job_form.description.data,
|
||||
service="nlp",
|
||||
ressources=json.dumps({"n_cores": 2,
|
||||
"mem_mb": 4096}),
|
||||
service_args=json.dumps({"args": [],
|
||||
"lang": new_nlp_job_form.language.data,
|
||||
"version": new_nlp_job_form.version.data}),
|
||||
status="pending",
|
||||
mem_mb=4096,
|
||||
n_cores=2,
|
||||
service='nlp',
|
||||
service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]),
|
||||
service_version=new_nlp_job_form.version.data,
|
||||
status='submitted',
|
||||
title=new_nlp_job_form.title.data)
|
||||
|
||||
db.session.add(nlp_job)
|
||||
@ -113,3 +59,57 @@ def nlp():
|
||||
title='Natrual Language Processing',
|
||||
new_nlp_job_form=new_nlp_job_form
|
||||
)
|
||||
|
||||
|
||||
@services.route('/ocr', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def ocr():
|
||||
new_ocr_job_form = NewOCRJobForm()
|
||||
if new_ocr_job_form.validate_on_submit():
|
||||
ocr_job = Job(creator=current_user,
|
||||
description=new_ocr_job_form.description.data,
|
||||
mem_mb=8192,
|
||||
n_cores=4,
|
||||
service='ocr',
|
||||
service_args=json.dumps([
|
||||
'-l {}'.format(new_ocr_job_form.language.data),
|
||||
'--keep-intermediates',
|
||||
'--skip-binarisation']),
|
||||
service_version=new_ocr_job_form.version.data,
|
||||
status='submitted',
|
||||
title=new_ocr_job_form.title.data)
|
||||
|
||||
db.session.add(ocr_job)
|
||||
db.session.commit()
|
||||
|
||||
dir = os.path.join(current_app.config['OPAQUE_STORAGE'],
|
||||
str(ocr_job.user_id),
|
||||
'jobs',
|
||||
str(ocr_job.id))
|
||||
|
||||
try:
|
||||
os.makedirs(dir)
|
||||
except OSError:
|
||||
flash('OSError!')
|
||||
db.session.remove(ocr_job)
|
||||
db.session.commit()
|
||||
else:
|
||||
for file in new_ocr_job_form.files.data:
|
||||
file.save(os.path.join(dir, file.filename))
|
||||
'''
|
||||
' TODO: Let the scheduler run this job in the background.
|
||||
'
|
||||
' NOTE: Using self created threads is just for testing purpose as
|
||||
' there is no scheduler available.
|
||||
'''
|
||||
db.session.expunge(ocr_job)
|
||||
thread = Thread(target=swarm.run, args=(ocr_job,))
|
||||
thread.start()
|
||||
flash('Job created!')
|
||||
return redirect(url_for('services.ocr'))
|
||||
|
||||
return render_template(
|
||||
'services/ocr.html.j2',
|
||||
title='Optical Character Recognition',
|
||||
new_ocr_job_form=new_ocr_job_form
|
||||
)
|
||||
|
Reference in New Issue
Block a user