Add check_encoding to nlp jobs

This commit is contained in:
Stephan Porada 2020-02-13 14:41:02 +01:00
parent bb5f456e6e
commit 81402d2d6a
3 changed files with 19 additions and 0 deletions

View File

@ -25,6 +25,7 @@ class AddNLPJobForm(FlaskForm):
choices=[('2.2.0', 'Latest (2.2.0)'),
('2.2.0', '2.2.0')],
validators=[DataRequired()])
check_encoding = BooleanField('Check encoding')
def validate_files(form, field):
for file in field.data:

View File

@ -8,6 +8,7 @@ from werkzeug.utils import secure_filename
from . import services
import json
import os
from app import logger
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
@ -36,7 +37,12 @@ def service(service):
return make_response(add_job_form.errors, 400)
service_args = []
if service == 'nlp':
logger.warning(add_job_form.check_encoding)
service_args.append('-l {}'.format(add_job_form.language.data))
logger.warning("Service args: {}".format(service_args))
if add_job_form.check_encoding.data:
service_args.append('--check-encoding')
logger.warning("Service args: {}".format(service_args))
if service == 'ocr':
service_args.append('-l {}'.format(add_job_form.language.data))
if not add_job_form.binarization.data:

View File

@ -101,6 +101,18 @@
</div>
</div>
</div>
<div class="row">
<div class="col s12 m6">
<span class="card-title">Check Encoding</span>
<p>If the input files are not created with the nopaque OCR service and you do not know if your tex files are UTF-8 encoded, check this switch. We will try to automatically determine the right encoding for your texts.</p>
<div class="switch">
<label>
{{ add_job_form.check_encoding() }}
<span class="lever"></span>
</label>
</div>
</div>
</div>
</div>
<div class="card-action right-align">
<button class="btn waves-effect waves-light" id="submit" name="submit" type="submit">Submit<i class="material-icons right">send</i></button>