mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-11-15 09:15:41 +00:00
169 lines
7.3 KiB
Python
169 lines
7.3 KiB
Python
from flask import (abort, current_app, flash, make_response, render_template,
|
|
url_for)
|
|
from flask_login import current_user, login_required
|
|
from .forms import ImportResultsForm
|
|
from werkzeug.utils import secure_filename
|
|
from . import services
|
|
from .. import db
|
|
from ..jobs.forms import AddFileSetupJobForm, AddNLPJobForm, AddOCRJobForm
|
|
from ..models import Job, JobInput, Result, ResultFile, User
|
|
from .tables import ResultTable, ResultItem
|
|
import json
|
|
import os
|
|
import html
|
|
from datetime import datetime
|
|
|
|
|
|
SERVICES = {'corpus_analysis': {'name': 'Corpus analysis'},
|
|
'file-setup': {'name': 'File setup',
|
|
'resources': {'mem_mb': 4096, 'n_cores': 4},
|
|
'add_job_form': AddFileSetupJobForm},
|
|
'nlp': {'name': 'Natural Language Processing',
|
|
'resources': {'mem_mb': 4096, 'n_cores': 2},
|
|
'add_job_form': AddNLPJobForm},
|
|
'ocr': {'name': 'Optical Character Recognition',
|
|
'resources': {'mem_mb': 8192, 'n_cores': 4},
|
|
'add_job_form': AddOCRJobForm}}
|
|
|
|
|
|
@services.route('/<service>', methods=['GET', 'POST'])
|
|
@login_required
|
|
def service(service):
|
|
if service not in SERVICES:
|
|
abort(404)
|
|
if service == 'corpus_analysis':
|
|
return render_template('services/{}.html.j2'.format(service),
|
|
title=SERVICES[service]['name'])
|
|
add_job_form = SERVICES[service]['add_job_form'](prefix='add-job-form')
|
|
if add_job_form.is_submitted():
|
|
if not add_job_form.validate():
|
|
return make_response(add_job_form.errors, 400)
|
|
service_args = []
|
|
if service == 'nlp':
|
|
service_args.append('-l {}'.format(add_job_form.language.data))
|
|
if add_job_form.check_encoding.data:
|
|
service_args.append('--check-encoding')
|
|
if service == 'ocr':
|
|
service_args.append('-l {}'.format(add_job_form.language.data))
|
|
if add_job_form.binarization.data:
|
|
service_args.append('--binarize')
|
|
job = Job(creator=current_user,
|
|
description=add_job_form.description.data,
|
|
mem_mb=SERVICES[service]['resources']['mem_mb'],
|
|
n_cores=SERVICES[service]['resources']['n_cores'],
|
|
service=service, service_args=json.dumps(service_args),
|
|
service_version=add_job_form.version.data,
|
|
status='preparing', title=add_job_form.title.data)
|
|
if job.service != 'corpus_analysis':
|
|
job.create_secure_filename()
|
|
db.session.add(job)
|
|
db.session.commit()
|
|
relative_dir = os.path.join(str(job.user_id), 'jobs', str(job.id))
|
|
absolut_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'],
|
|
relative_dir)
|
|
try:
|
|
os.makedirs(absolut_dir)
|
|
except OSError:
|
|
job.delete()
|
|
flash('Internal Server Error', 'job')
|
|
return make_response({'redirect_url': url_for('services.service',
|
|
service=service)},
|
|
500)
|
|
else:
|
|
for file in add_job_form.files.data:
|
|
filename = secure_filename(file.filename)
|
|
file.save(os.path.join(absolut_dir, filename))
|
|
job_input = JobInput(dir=relative_dir, filename=filename,
|
|
job=job)
|
|
db.session.add(job_input)
|
|
job.status = 'submitted'
|
|
db.session.commit()
|
|
url = url_for('jobs.job', job_id=job.id)
|
|
flash('[<a href="{}">{}</a>] added'.format(url, job.title), 'job')
|
|
return make_response(
|
|
{'redirect_url': url_for('jobs.job', job_id=job.id)}, 201)
|
|
return render_template('services/{}.html.j2'.format(service),
|
|
title=SERVICES[service]['name'],
|
|
add_job_form=add_job_form)
|
|
|
|
|
|
@services.route('/import_results', methods=['GET', 'POST'])
|
|
@login_required
|
|
def import_results():
|
|
'''
|
|
View to import one json result file. Uses the ImportReultFileForm.
|
|
'''
|
|
# TODO: Build in a check if uploaded json is actually a result file and
|
|
# not something different
|
|
# Add the possibility to add several result files at once.
|
|
import_results_form = ImportResultsForm(prefix='add-result-file-form')
|
|
if import_results_form.is_submitted():
|
|
if not import_results_form.validate():
|
|
return make_response(import_results_form.errors, 400)
|
|
# Save the file
|
|
# result creation only happens on file save to avoid creating a result
|
|
# object in the db everytime by just visiting the import_results page
|
|
result = Result(user_id=current_user.id)
|
|
db.session.add(result)
|
|
db.session.commit()
|
|
if not (result.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
dir = os.path.join(str(result.user_id),
|
|
'results',
|
|
'corpus_analysis_results',
|
|
str(result.id))
|
|
abs_dir = os.path.join(current_app.config['NOPAQUE_STORAGE'], dir)
|
|
abs_file_path = os.path.join(abs_dir,
|
|
import_results_form.file.data.filename)
|
|
os.makedirs(abs_dir)
|
|
import_results_form.file.data.save(abs_file_path)
|
|
# Saves all needed metadata entries in one json field
|
|
with open(abs_file_path, 'r') as f:
|
|
corpus_metadata = json.load(f)
|
|
del corpus_metadata['matches']
|
|
del corpus_metadata['cpos_lookup']
|
|
result_file = ResultFile(
|
|
result_id=result.id,
|
|
dir=dir,
|
|
filename=import_results_form.file.data.filename)
|
|
result.corpus_metadata = corpus_metadata
|
|
db.session.add(result_file)
|
|
db.session.commit()
|
|
flash('Result file added!', 'result')
|
|
return make_response(
|
|
{'redirect_url': url_for('services.results')},
|
|
201)
|
|
return render_template('services/import_results.html.j2',
|
|
import_results_form=import_results_form,
|
|
title='Add corpus file')
|
|
|
|
|
|
@services.route('/results')
|
|
@login_required
|
|
def results():
|
|
'''
|
|
Shows an overview of imported results.
|
|
'''
|
|
# get all results of current user
|
|
results = User.query.get(current_user.id).results
|
|
# create table row for every result#
|
|
|
|
def __p_time(time_str):
|
|
return datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%f')
|
|
|
|
items = [ResultItem(r.corpus_metadata['query'],
|
|
r.corpus_metadata['match_count'],
|
|
r.corpus_metadata['corpus_name'],
|
|
__p_time(r.corpus_metadata['corpus_creation_date']),
|
|
__p_time(r.corpus_metadata['corpus_analysis_date']),
|
|
r.corpus_metadata['corpus_type'],
|
|
r.id) for r in results]
|
|
# create table with items and save it as html
|
|
table = html.unescape(ResultTable(items).__html__())
|
|
# add class=list to table body with string replacement
|
|
table = table.replace('tbody', 'tbody class=list', 1)
|
|
return render_template('services/results.html.j2',
|
|
title='Imported Results',
|
|
table=table)
|