mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-10-31 02:32:45 +00:00 
			
		
		
		
	Change job model.
This commit is contained in:
		| @@ -3,10 +3,50 @@ from wtforms import MultipleFileField, SelectField, StringField, SubmitField, Va | ||||
| from wtforms.validators import DataRequired, Length | ||||
|  | ||||
|  | ||||
| class NewNLPJobForm(FlaskForm): | ||||
|     description = StringField( | ||||
|         'Description', | ||||
|         validators=[DataRequired(), Length(1, 255)] | ||||
|     ) | ||||
|     files = MultipleFileField('Files', validators=[DataRequired()]) | ||||
|     language = SelectField( | ||||
|         'Language', | ||||
|         choices=[('', 'Choose your option'), | ||||
|                  ('en', 'English'), | ||||
|                  ('fr', 'French'), | ||||
|                  ('de', 'German'), | ||||
|                  ('it', 'Italian'), | ||||
|                  ('pt', 'Portuguese'), | ||||
|                  ('es', 'Spanish') | ||||
|                  ], | ||||
|         validators=[DataRequired()] | ||||
|     ) | ||||
|     submit = SubmitField('Submit') | ||||
|     title = StringField( | ||||
|         'Title', | ||||
|         validators=[DataRequired(), Length(1, 32)] | ||||
|     ) | ||||
|     version = SelectField( | ||||
|         'Version', | ||||
|         choices=[('', 'Choose your option'), | ||||
|                  ('latest', 'Latest'), | ||||
|                  ], | ||||
|         validators=[DataRequired()] | ||||
|     ) | ||||
|  | ||||
|     def validate_files(form, field): | ||||
|         for file in field.data: | ||||
|             if not file.filename.lower().endswith('.txt'): | ||||
|                 raise ValidationError( | ||||
|                     'File does not have an approved extension: ' | ||||
|                     '.txt' | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| class NewOCRJobForm(FlaskForm): | ||||
|     description = StringField( | ||||
|         'Description', | ||||
|         validators=[DataRequired(), Length(1, 64)] | ||||
|         validators=[DataRequired(), Length(1, 255)] | ||||
|     ) | ||||
|     files = MultipleFileField('Files', validators=[DataRequired()]) | ||||
|     language = SelectField( | ||||
| @@ -44,43 +84,3 @@ class NewOCRJobForm(FlaskForm): | ||||
|                     'File does not have an approved extension: ' | ||||
|                     '.pdf | .tif | .tiff' | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| class NewNLPJobForm(FlaskForm): | ||||
|     description = StringField( | ||||
|         'Description', | ||||
|         validators=[DataRequired(), Length(1, 64)] | ||||
|     ) | ||||
|     files = MultipleFileField('Files', validators=[DataRequired()]) | ||||
|     language = SelectField( | ||||
|         'Language', | ||||
|         choices=[('', 'Choose your option'), | ||||
|                  ('en', 'English'), | ||||
|                  ('fr', 'French'), | ||||
|                  ('de', 'German'), | ||||
|                  ('it', 'Italian'), | ||||
|                  ('pt', 'Portuguese'), | ||||
|                  ('es', 'Spanish') | ||||
|                  ], | ||||
|         validators=[DataRequired()] | ||||
|     ) | ||||
|     submit = SubmitField('Submit') | ||||
|     title = StringField( | ||||
|         'Title', | ||||
|         validators=[DataRequired(), Length(1, 32)] | ||||
|     ) | ||||
|     version = SelectField( | ||||
|         'Version', | ||||
|         choices=[('', 'Choose your option'), | ||||
|                  ('latest', 'Latest'), | ||||
|                  ], | ||||
|         validators=[DataRequired()] | ||||
|     ) | ||||
|  | ||||
|     def validate_files(form, field): | ||||
|         for file in field.data: | ||||
|             if not file.filename.lower().endswith('.txt'): | ||||
|                 raise ValidationError( | ||||
|                     'File does not have an approved extension: ' | ||||
|                     '.txt' | ||||
|                 ) | ||||
|   | ||||
| @@ -6,61 +6,8 @@ from ..models import Job | ||||
| from ..import swarm | ||||
| from .. import db | ||||
| from threading import Thread | ||||
| import os | ||||
| import json | ||||
|  | ||||
|  | ||||
| @services.route('/ocr', methods=['GET', 'POST']) | ||||
| @login_required | ||||
| def ocr(): | ||||
|     new_ocr_job_form = NewOCRJobForm() | ||||
|     if new_ocr_job_form.validate_on_submit(): | ||||
|         ocr_job = Job(creator=current_user._get_current_object(), | ||||
|                       description=new_ocr_job_form.description.data, | ||||
|                       service="ocr", | ||||
|                       ressources=json.dumps({"n_cores": 4, | ||||
|                                              "mem_mb": 8192}), | ||||
|                       service_args=json.dumps({"args": ["--keep-intermediates", | ||||
|                                                         "--skip-binarisation"], | ||||
|                                                "lang": new_ocr_job_form.language.data, | ||||
|                                                "version": new_ocr_job_form.version.data}), | ||||
|                       status="pending", | ||||
|                       title=new_ocr_job_form.title.data) | ||||
|  | ||||
|         db.session.add(ocr_job) | ||||
|         db.session.commit() | ||||
|  | ||||
|         dir = os.path.join(current_app.config['OPAQUE_STORAGE'], | ||||
|                            str(ocr_job.user_id), | ||||
|                            'jobs', | ||||
|                            str(ocr_job.id)) | ||||
|  | ||||
|         try: | ||||
|             os.makedirs(dir) | ||||
|         except OSError: | ||||
|             flash('OSError!') | ||||
|             db.session.remove(ocr_job) | ||||
|             db.session.commit() | ||||
|         else: | ||||
|             for file in new_ocr_job_form.files.data: | ||||
|                 file.save(os.path.join(dir, file.filename)) | ||||
|             ''' | ||||
|             ' TODO: Let the scheduler run this job in the background. | ||||
|             ' | ||||
|             ' NOTE: Using self created threads is just for testing purpose as | ||||
|             '       there is no scheduler available. | ||||
|             ''' | ||||
|             db.session.expunge(ocr_job) | ||||
|             thread = Thread(target=swarm.run, args=(ocr_job,)) | ||||
|             thread.start() | ||||
|             flash('Job created!') | ||||
|         return redirect(url_for('services.ocr')) | ||||
|  | ||||
|     return render_template( | ||||
|         'services/ocr.html.j2', | ||||
|         title='Optical Character Recognition', | ||||
|         new_ocr_job_form=new_ocr_job_form | ||||
|     ) | ||||
| import os | ||||
|  | ||||
|  | ||||
| @services.route('/nlp', methods=['GET', 'POST']) | ||||
| @@ -68,15 +15,14 @@ def ocr(): | ||||
| def nlp(): | ||||
|     new_nlp_job_form = NewNLPJobForm() | ||||
|     if new_nlp_job_form.validate_on_submit(): | ||||
|         nlp_job = Job(creator=current_user._get_current_object(), | ||||
|         nlp_job = Job(creator=current_user, | ||||
|                       description=new_nlp_job_form.description.data, | ||||
|                       service="nlp", | ||||
|                       ressources=json.dumps({"n_cores": 2, | ||||
|                                              "mem_mb": 4096}), | ||||
|                       service_args=json.dumps({"args": [], | ||||
|                                                "lang": new_nlp_job_form.language.data, | ||||
|                                                "version": new_nlp_job_form.version.data}), | ||||
|                       status="pending", | ||||
|                       mem_mb=4096, | ||||
|                       n_cores=2, | ||||
|                       service='nlp', | ||||
|                       service_args=json.dumps(['-l {}'.format(new_nlp_job_form.language.data)]), | ||||
|                       service_version=new_nlp_job_form.version.data, | ||||
|                       status='submitted', | ||||
|                       title=new_nlp_job_form.title.data) | ||||
|  | ||||
|         db.session.add(nlp_job) | ||||
| @@ -113,3 +59,57 @@ def nlp(): | ||||
|         title='Natrual Language Processing', | ||||
|         new_nlp_job_form=new_nlp_job_form | ||||
|     ) | ||||
|  | ||||
|  | ||||
| @services.route('/ocr', methods=['GET', 'POST']) | ||||
| @login_required | ||||
| def ocr(): | ||||
|     new_ocr_job_form = NewOCRJobForm() | ||||
|     if new_ocr_job_form.validate_on_submit(): | ||||
|         ocr_job = Job(creator=current_user, | ||||
|                       description=new_ocr_job_form.description.data, | ||||
|                       mem_mb=8192, | ||||
|                       n_cores=4, | ||||
|                       service='ocr', | ||||
|                       service_args=json.dumps([ | ||||
|                         '-l {}'.format(new_ocr_job_form.language.data), | ||||
|                         '--keep-intermediates', | ||||
|                         '--skip-binarisation']), | ||||
|                       service_version=new_ocr_job_form.version.data, | ||||
|                       status='submitted', | ||||
|                       title=new_ocr_job_form.title.data) | ||||
|  | ||||
|         db.session.add(ocr_job) | ||||
|         db.session.commit() | ||||
|  | ||||
|         dir = os.path.join(current_app.config['OPAQUE_STORAGE'], | ||||
|                            str(ocr_job.user_id), | ||||
|                            'jobs', | ||||
|                            str(ocr_job.id)) | ||||
|  | ||||
|         try: | ||||
|             os.makedirs(dir) | ||||
|         except OSError: | ||||
|             flash('OSError!') | ||||
|             db.session.remove(ocr_job) | ||||
|             db.session.commit() | ||||
|         else: | ||||
|             for file in new_ocr_job_form.files.data: | ||||
|                 file.save(os.path.join(dir, file.filename)) | ||||
|             ''' | ||||
|             ' TODO: Let the scheduler run this job in the background. | ||||
|             ' | ||||
|             ' NOTE: Using self created threads is just for testing purpose as | ||||
|             '       there is no scheduler available. | ||||
|             ''' | ||||
|             db.session.expunge(ocr_job) | ||||
|             thread = Thread(target=swarm.run, args=(ocr_job,)) | ||||
|             thread.start() | ||||
|             flash('Job created!') | ||||
|         return redirect(url_for('services.ocr')) | ||||
|  | ||||
|     return render_template( | ||||
|         'services/ocr.html.j2', | ||||
|         title='Optical Character Recognition', | ||||
|         new_ocr_job_form=new_ocr_job_form | ||||
|     ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user