diff --git a/app/models.py b/app/models.py index e7db7174..ba0859e8 100644 --- a/app/models.py +++ b/app/models.py @@ -221,24 +221,17 @@ class Job(db.Model): # Primary key id = db.Column(db.Integer, primary_key=True) creation_date = db.Column(db.DateTime(), default=datetime.utcnow) - description = db.Column(db.String(64)) - ''' - ' Requested ressources. - ' Example: {"n_cores": 2, - ' "mem_mb": 4096 - ' } - ''' - ressources = db.Column(db.String(255)) + description = db.Column(db.String(255)) + mem_mb = db.Column(db.Integer) + n_cores = db.Column(db.Integer) service = db.Column(db.String(64)) ''' - ' Service specific arguments in JSON format. - ' Example: {"args": ["--keep-intermediates", "skip-binarization"], - ' "lang": "eng", - ' "version": "latest" - ' } + ' Service specific arguments as string list. + ' Example: ["-l eng", "--keep-intermediates", "--skip-binarization"] ''' service_args = db.Column(db.String(255)) - status = db.Column(db.String(8)) + service_version = db.Column(db.String(16)) + status = db.Column(db.String(16)) title = db.Column(db.String(32)) user_id = db.Column(db.Integer, db.ForeignKey('users.id')) diff --git a/app/services/forms.py b/app/services/forms.py index 1c1aac02..5a79f8bb 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -3,10 +3,50 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va from wtforms.validators import DataRequired, Length +class NewNLPJobForm(FlaskForm): + description = StringField( + 'Description', + validators=[DataRequired(), Length(1, 255)] + ) + files = MultipleFileField('Files', validators=[DataRequired()]) + language = SelectField( + 'Language', + choices=[('', 'Choose your option'), + ('en', 'English'), + ('fr', 'French'), + ('de', 'German'), + ('it', 'Italian'), + ('pt', 'Portuguese'), + ('es', 'Spanish') + ], + validators=[DataRequired()] + ) + submit = SubmitField('Submit') + title = StringField( + 'Title', + validators=[DataRequired(), Length(1, 32)] + ) + version = SelectField( + 'Version', + choices=[('', 'Choose your option'), + ('latest', 'Latest'), + ], + validators=[DataRequired()] + ) + + def validate_files(form, field): + for file in field.data: + if not file.filename.lower().endswith('.txt'): + raise ValidationError( + 'File does not have an approved extension: ' + '.txt' + ) + + class NewOCRJobForm(FlaskForm): description = StringField( 'Description', - validators=[DataRequired(), Length(1, 64)] + validators=[DataRequired(), Length(1, 255)] ) files = MultipleFileField('Files', validators=[DataRequired()]) language = SelectField( @@ -44,43 +84,3 @@ class NewOCRJobForm(FlaskForm): 'File does not have an approved extension: ' '.pdf | .tif | .tiff' ) - - -class NewNLPJobForm(FlaskForm): - description = StringField( - 'Description', - validators=[DataRequired(), Length(1, 64)] - ) - files = MultipleFileField('Files', validators=[DataRequired()]) - language = SelectField( - 'Language', - choices=[('', 'Choose your option'), - ('en', 'English'), - ('fr', 'French'), - ('de', 'German'), - ('it', 'Italian'), - ('pt', 'Portuguese'), - ('es', 'Spanish') - ], - validators=[DataRequired()] - ) - submit = SubmitField('Submit') - title = StringField( - 'Title', - validators=[DataRequired(), Length(1, 32)] - ) - version = SelectField( - 'Version', - choices=[('', 'Choose your option'), - ('latest', 'Latest'), - ], - validators=[DataRequired()] - ) - - def validate_files(form, field): - for file in field.data: - if not file.filename.lower().endswith('.txt'): - raise ValidationError( - 'File does not have an approved extension: ' - '.txt' - ) diff --git a/app/services/views.py b/app/services/views.py index 20af79fa..6d493008 100644 --- a/app/services/views.py +++ b/app/services/views.py @@ -6,61 +6,8 @@ from ..models import Job from ..import swarm from .. import db from threading import Thread -import os import json - - -@services.route('/ocr', methods=['GET', 'POST']) -@login_required -def ocr(): - new_ocr_job_form = NewOCRJobForm() - if new_ocr_job_form.validate_on_submit(): - ocr_job = Job(creator=current_user._get_current_object(), - description=new_ocr_job_form.description.data, - service="ocr", - ressources=json.dumps({"n_cores": 4, - "mem_mb": 8192}), - service_args=json.dumps({"args": ["--keep-intermediates", - "--skip-binarisation"], - "lang": new_ocr_job_form.language.data, - "version": new_ocr_job_form.version.data}), - status="pending", - title=new_ocr_job_form.title.data) - - db.session.add(ocr_job) - db.session.commit() - - dir = os.path.join(current_app.config['OPAQUE_STORAGE'], - str(ocr_job.user_id), - 'jobs', - str(ocr_job.id)) - - try: - os.makedirs(dir) - except OSError: - flash('OSError!') - db.session.remove(ocr_job) - db.session.commit() - else: - for file in new_ocr_job_form.files.data: - file.save(os.path.join(dir, file.filename)) - ''' - ' TODO: Let the scheduler run this job in the background. - ' - ' NOTE: Using self created threads is just for testing purpose as - ' there is no scheduler available. - ''' - db.session.expunge(ocr_job) - thread = Thread(target=swarm.run, args=(ocr_job,)) - thread.start() - flash('Job created!') - return redirect(url_for('services.ocr')) - - return render_template( - 'services/ocr.html.j2', - title='Optical Character Recognition', - new_ocr_job_form=new_ocr_job_form - ) +import os @services.route('/nlp', methods=['GET', 'POST']) @@ -68,15 +15,14 @@ def ocr(): def nlp(): new_nlp_job_form = NewNLPJobForm() if new_nlp_job_form.validate_on_submit(): - nlp_job = Job(creator=current_user._get_current_object(), + nlp_job = Job(creator=current_user, description=new_nlp_job_form.description.data, - service="nlp", - ressources=json.dumps({"n_cores": 2, - "mem_mb": 4096}), - service_args=json.dumps({"args": [], - "lang": new_nlp_job_form.language.data, - "version": new_nlp_job_form.version.data}), - status="pending", + mem_mb=4096, + n_cores=2, + service='nlp', + service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]), + service_version=new_nlp_job_form.version.data, + status='submitted', title=new_nlp_job_form.title.data) db.session.add(nlp_job) @@ -113,3 +59,57 @@ def nlp(): title='Natrual Language Processing', new_nlp_job_form=new_nlp_job_form ) + + +@services.route('/ocr', methods=['GET', 'POST']) +@login_required +def ocr(): + new_ocr_job_form = NewOCRJobForm() + if new_ocr_job_form.validate_on_submit(): + ocr_job = Job(creator=current_user, + description=new_ocr_job_form.description.data, + mem_mb=8192, + n_cores=4, + service='ocr', + service_args=json.dumps([ + '-l {}'.format(new_ocr_job_form.language.data), + '--keep-intermediates', + '--skip-binarisation']), + service_version=new_ocr_job_form.version.data, + status='submitted', + title=new_ocr_job_form.title.data) + + db.session.add(ocr_job) + db.session.commit() + + dir = os.path.join(current_app.config['OPAQUE_STORAGE'], + str(ocr_job.user_id), + 'jobs', + str(ocr_job.id)) + + try: + os.makedirs(dir) + except OSError: + flash('OSError!') + db.session.remove(ocr_job) + db.session.commit() + else: + for file in new_ocr_job_form.files.data: + file.save(os.path.join(dir, file.filename)) + ''' + ' TODO: Let the scheduler run this job in the background. + ' + ' NOTE: Using self created threads is just for testing purpose as + ' there is no scheduler available. + ''' + db.session.expunge(ocr_job) + thread = Thread(target=swarm.run, args=(ocr_job,)) + thread.start() + flash('Job created!') + return redirect(url_for('services.ocr')) + + return render_template( + 'services/ocr.html.j2', + title='Optical Character Recognition', + new_ocr_job_form=new_ocr_job_form + ) diff --git a/app/swarm.py b/app/swarm.py index 748e4822..3ee6596d 100644 --- a/app/swarm.py +++ b/app/swarm.py @@ -35,16 +35,14 @@ class Swarm: ''' # Prepare argument values needed for the service creation. service_args = json.loads(job.service_args) - ressources = json.loads(job.ressources) _command = (job.service + ' -i /files' - + ' -l {}'.format(service_args['lang']) + ' -o /files/output' - + ' ' + ' '.join(service_args['args'])) + + ' ' + ' '.join(service_args)) _constraints = ['node.role==worker'] _image = 'gitlab.ub.uni-bielefeld.de:4567/sfb1288inf/{}:{}'.format( job.service, - service_args['version'] + job.service_version ) _labels = {'service': job.service} _mounts = [os.path.join('/home/compute/mnt/opaque', @@ -64,8 +62,8 @@ class Swarm: ' in megabytes, it is also necessary to convert the value. ''' _resources = docker.types.Resources( - cpu_reservation=ressources['n_cores'] * (10 ** 9), - mem_reservation=ressources['mem_mb'] * (10 ** 6) + cpu_reservation=job.n_cores * (10 ** 9), + mem_reservation=job.mem_mb * (10 ** 6) ) _restart_policy = docker.types.RestartPolicy(condition='none') '''