mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-01-25 00:50:35 +00:00
418 lines
18 KiB
Python
418 lines
18 KiB
Python
from flask import (abort, flash, make_response, redirect, request,
|
|
render_template, url_for, send_from_directory)
|
|
from flask_login import current_user, login_required
|
|
from . import corpora
|
|
from . import tasks
|
|
from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm,
|
|
EditCorpusFileForm, QueryDownloadForm, QueryForm,
|
|
DisplayOptionsForm, InspectDisplayOptionsForm,
|
|
ImportCorpusForm)
|
|
from jsonschema import validate
|
|
from .. import db
|
|
from ..models import Corpus, CorpusFile, QueryResult
|
|
import json
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import glob
|
|
import xml.etree.ElementTree as ET
|
|
from zipfile import ZipFile
|
|
from .import_corpus import check_zip_contents
|
|
|
|
|
|
@corpora.route('/add', methods=['GET', 'POST'])
|
|
@login_required
|
|
def add_corpus():
|
|
form = AddCorpusForm()
|
|
if form.validate_on_submit():
|
|
corpus = Corpus(creator=current_user,
|
|
description=form.description.data,
|
|
title=form.title.data)
|
|
db.session.add(corpus)
|
|
db.session.commit()
|
|
try:
|
|
os.makedirs(corpus.path)
|
|
except OSError:
|
|
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
|
|
db.session.delete(corpus)
|
|
db.session.commit()
|
|
abort(500)
|
|
flash('Corpus "{}" added!'.format(corpus.title), 'corpus')
|
|
return redirect(url_for('.corpus', corpus_id=corpus.id))
|
|
return render_template('corpora/add_corpus.html.j2', form=form,
|
|
title='Add corpus')
|
|
|
|
|
|
@corpora.route('/import', methods=['GET', 'POST'])
|
|
@login_required
|
|
def import_corpus():
|
|
form = ImportCorpusForm()
|
|
if form.is_submitted():
|
|
if not form.validate():
|
|
return make_response(form.errors, 400)
|
|
corpus = Corpus(creator=current_user,
|
|
description=form.description.data,
|
|
title=form.title.data)
|
|
db.session.add(corpus)
|
|
db.session.commit()
|
|
try:
|
|
os.makedirs(corpus.path)
|
|
except OSError:
|
|
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
|
|
db.session.delete(corpus)
|
|
db.session.commit()
|
|
flash('Internal Server Error', 'error')
|
|
return make_response(
|
|
{'redirect_url': url_for('.import_corpus')}, 500)
|
|
# Upload zip
|
|
archive_file = os.path.join(corpus.path, form.file.data.filename)
|
|
form.file.data.save(archive_file)
|
|
# Some checks to verify it is a valid exported corpus
|
|
with ZipFile(archive_file, 'r') as zip:
|
|
contents = zip.namelist()
|
|
if set(check_zip_contents).issubset(contents):
|
|
# Unzip
|
|
shutil.unpack_archive(archive_file, corpus.path)
|
|
# Register vrt files to corpus
|
|
vrts = glob.glob(corpus.path + '/*.vrt')
|
|
for file in vrts:
|
|
element_tree = ET.parse(file)
|
|
text_node = element_tree.find('text')
|
|
corpus_file = CorpusFile(
|
|
address=text_node.get('address', 'NULL'),
|
|
author=text_node.get('author', 'NULL'),
|
|
booktitle=text_node.get('booktitle', 'NULL'),
|
|
chapter=text_node.get('chapter', 'NULL'),
|
|
corpus=corpus,
|
|
editor=text_node.get('editor', 'NULL'),
|
|
filename=os.path.basename(file),
|
|
institution=text_node.get('institution', 'NULL'),
|
|
journal=text_node.get('journal', 'NULL'),
|
|
pages=text_node.get('pages', 'NULL'),
|
|
publisher=text_node.get('publisher', 'NULL'),
|
|
publishing_year=text_node.get('publishing_year', ''),
|
|
school=text_node.get('school', 'NULL'),
|
|
title=text_node.get('title', 'NULL')
|
|
)
|
|
db.session.add(corpus_file)
|
|
# finish import and redirect to imported corpus
|
|
corpus.status = 'prepared'
|
|
db.session.commit()
|
|
os.remove(archive_file)
|
|
flash('Corpus "{}" imported!'.format(corpus.title), 'corpus')
|
|
return make_response(
|
|
{'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201)
|
|
else:
|
|
# If imported zip is not valid delete corpus and give feedback
|
|
flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa
|
|
tasks.delete_corpus(corpus.id)
|
|
return make_response(
|
|
{'redirect_url': url_for('.import_corpus')}, 201)
|
|
return render_template('corpora/import_corpus.html.j2', form=form,
|
|
title='Import Corpus')
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>')
|
|
@login_required
|
|
def corpus(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files]
|
|
return render_template('corpora/corpus.html.j2', corpus=corpus,
|
|
corpus_files=corpus_files, title='Corpus')
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/download')
|
|
@login_required
|
|
def download_corpus(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
# TODO: Check what happens here
|
|
dir = os.path.dirname(corpus.archive_file)
|
|
filename = os.path.basename(corpus.archive_file)
|
|
return send_from_directory(as_attachment=True, directory=dir,
|
|
filename=filename, mimetype='zip')
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/analyse')
|
|
@login_required
|
|
def analyse_corpus(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if corpus.status == 'prepared':
|
|
corpus.status = 'start analysis'
|
|
db.session.commit()
|
|
display_options_form = DisplayOptionsForm(
|
|
prefix='display-options-form',
|
|
result_context=request.args.get('context', 20),
|
|
results_per_page=request.args.get('results_per_page', 30)
|
|
)
|
|
query_form = QueryForm(prefix='query-form',
|
|
query=request.args.get('query'))
|
|
query_download_form = QueryDownloadForm(prefix='query-download-form')
|
|
inspect_display_options_form = InspectDisplayOptionsForm(
|
|
prefix='inspect-display-options-form')
|
|
return render_template(
|
|
'corpora/analyse_corpus.html.j2',
|
|
corpus=corpus,
|
|
display_options_form=display_options_form,
|
|
inspect_display_options_form=inspect_display_options_form,
|
|
query_form=query_form,
|
|
query_download_form=query_download_form,
|
|
title='Corpus analysis'
|
|
)
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/delete')
|
|
@login_required
|
|
def delete_corpus(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus')
|
|
tasks.delete_corpus(corpus_id)
|
|
return redirect(url_for('main.dashboard'))
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/add', methods=['GET', 'POST'])
|
|
@login_required
|
|
def add_corpus_file(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form')
|
|
if form.is_submitted():
|
|
if not form.validate():
|
|
return make_response(form.errors, 400)
|
|
# Save the file
|
|
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
|
|
corpus_file = CorpusFile(address=form.address.data,
|
|
author=form.author.data,
|
|
booktitle=form.booktitle.data,
|
|
chapter=form.chapter.data,
|
|
corpus=corpus,
|
|
editor=form.editor.data,
|
|
filename=form.file.data.filename,
|
|
institution=form.institution.data,
|
|
journal=form.journal.data,
|
|
pages=form.pages.data,
|
|
publisher=form.publisher.data,
|
|
publishing_year=form.publishing_year.data,
|
|
school=form.school.data,
|
|
title=form.title.data)
|
|
db.session.add(corpus_file)
|
|
corpus.status = 'unprepared'
|
|
db.session.commit()
|
|
flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus')
|
|
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa
|
|
return render_template('corpora/add_corpus_file.html.j2', corpus=corpus,
|
|
form=form, title='Add corpus file')
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/delete')
|
|
@login_required
|
|
def delete_corpus_file(corpus_id, corpus_file_id):
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
if not corpus_file.corpus_id == corpus_id:
|
|
abort(404)
|
|
if not (corpus_file.corpus.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa
|
|
tasks.delete_corpus_file(corpus_file_id)
|
|
return redirect(url_for('.corpus', corpus_id=corpus_id))
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/download')
|
|
@login_required
|
|
def download_corpus_file(corpus_id, corpus_file_id):
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
if not corpus_file.corpus_id == corpus_id:
|
|
abort(404)
|
|
if not (corpus_file.corpus.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
return send_from_directory(as_attachment=True,
|
|
directory=os.path.dirname(corpus_file.path),
|
|
filename=corpus_file.filename)
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>',
|
|
methods=['GET', 'POST'])
|
|
@login_required
|
|
def corpus_file(corpus_id, corpus_file_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
|
|
if corpus_file.corpus != corpus:
|
|
abort(404)
|
|
form = EditCorpusFileForm(prefix='edit-corpus-file-form')
|
|
if form.validate_on_submit():
|
|
corpus_file.address = form.address.data
|
|
corpus_file.author = form.author.data
|
|
corpus_file.booktitle = form.booktitle.data
|
|
corpus_file.chapter = form.chapter.data
|
|
corpus_file.editor = form.editor.data
|
|
corpus_file.institution = form.institution.data
|
|
corpus_file.journal = form.journal.data
|
|
corpus_file.pages = form.pages.data
|
|
corpus_file.publisher = form.publisher.data
|
|
corpus_file.publishing_year = form.publishing_year.data
|
|
corpus_file.school = form.school.data
|
|
corpus_file.title = form.title.data
|
|
corpus.status = 'unprepared'
|
|
db.session.commit()
|
|
flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa
|
|
return redirect(url_for('.corpus', corpus_id=corpus_id))
|
|
# If no form is submitted or valid, fill out fields with current values
|
|
form.address.data = corpus_file.address
|
|
form.author.data = corpus_file.author
|
|
form.booktitle.data = corpus_file.booktitle
|
|
form.chapter.data = corpus_file.chapter
|
|
form.editor.data = corpus_file.editor
|
|
form.institution.data = corpus_file.institution
|
|
form.journal.data = corpus_file.journal
|
|
form.pages.data = corpus_file.pages
|
|
form.publisher.data = corpus_file.publisher
|
|
form.publishing_year.data = corpus_file.publishing_year
|
|
form.school.data = corpus_file.school
|
|
form.title.data = corpus_file.title
|
|
return render_template('corpora/corpus_file.html.j2', corpus=corpus,
|
|
corpus_file=corpus_file, form=form,
|
|
title='Edit corpus file')
|
|
|
|
|
|
@corpora.route('/<int:corpus_id>/prepare')
|
|
@login_required
|
|
def prepare_corpus(corpus_id):
|
|
corpus = Corpus.query.get_or_404(corpus_id)
|
|
if not (corpus.creator == current_user or current_user.is_administrator()):
|
|
abort(403)
|
|
if corpus.files.all():
|
|
tasks.build_corpus(corpus_id)
|
|
flash('Corpus "{}" has been marked to get build!'.format(corpus.title), 'corpus') # noqa
|
|
else:
|
|
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
|
|
return redirect(url_for('.corpus', corpus_id=corpus_id))
|
|
|
|
|
|
# Following are view functions to add, view etc. exported results.
|
|
@corpora.route('/result/add', methods=['GET', 'POST'])
|
|
@login_required
|
|
def add_query_result():
|
|
'''
|
|
View to import a result as a json file.
|
|
'''
|
|
form = AddQueryResultForm(prefix='add-query-result-form')
|
|
if form.is_submitted():
|
|
if not form.validate():
|
|
return make_response(form.errors, 400)
|
|
query_result = QueryResult(creator=current_user,
|
|
description=form.description.data,
|
|
filename=form.file.data.filename,
|
|
title=form.title.data)
|
|
db.session.add(query_result)
|
|
db.session.commit()
|
|
try:
|
|
os.makedirs(query_result.path)
|
|
except OSError:
|
|
logging.error('Make dir {} led to an OSError!'.format(query_result.path)) # noqa
|
|
db.session.delete(query_result)
|
|
db.session.commit()
|
|
flash('Internal Server Error', 'error')
|
|
return make_response(
|
|
{'redirect_url': url_for('.add_query_result')}, 500)
|
|
# save the uploaded file
|
|
query_result_file_path = os.path.join(query_result.path,
|
|
query_result.filename)
|
|
form.file.data.save(query_result_file_path)
|
|
# parse json from file
|
|
with open(query_result_file_path, 'r') as file:
|
|
query_result_file_content = json.load(file)
|
|
# parse json schema
|
|
with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa
|
|
schema = json.load(file)
|
|
try:
|
|
# validate imported json file
|
|
validate(instance=query_result_file_content, schema=schema)
|
|
except Exception:
|
|
tasks.delete_query_result(query_result.id)
|
|
flash('Uploaded file is invalid', 'result')
|
|
return make_response(
|
|
{'redirect_url': url_for('.add_query_result')}, 201)
|
|
query_result_file_content.pop('matches')
|
|
query_result_file_content.pop('cpos_lookup')
|
|
query_result.query_metadata = query_result_file_content
|
|
db.session.commit()
|
|
flash('Query result added!', 'result')
|
|
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
|
|
return render_template('corpora/query_results/add_query_result.html.j2',
|
|
form=form, title='Add query result')
|
|
|
|
|
|
@corpora.route('/result/<int:query_result_id>')
|
|
@login_required
|
|
def query_result(query_result_id):
|
|
query_result = QueryResult.query.get_or_404(query_result_id)
|
|
if not (query_result.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
return render_template('corpora/query_results/query_result.html.j2',
|
|
query_result=query_result, title='Query result')
|
|
|
|
|
|
@corpora.route('/result/<int:query_result_id>/inspect')
|
|
@login_required
|
|
def inspect_query_result(query_result_id):
|
|
'''
|
|
View to inspect imported result file in a corpus analysis like interface
|
|
'''
|
|
query_result = QueryResult.query.get_or_404(query_result_id)
|
|
query_metadata = query_result.query_metadata
|
|
if not (query_result.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
display_options_form = DisplayOptionsForm(
|
|
prefix='display-options-form',
|
|
results_per_page=request.args.get('results_per_page', 30),
|
|
result_context=request.args.get('context', 20)
|
|
)
|
|
inspect_display_options_form = InspectDisplayOptionsForm(
|
|
prefix='inspect-display-options-form'
|
|
)
|
|
with open(query_result.path, 'r') as query_result_file:
|
|
query_result_file_content = json.load(query_result_file)
|
|
return render_template('corpora/query_results/inspect.html.j2',
|
|
query_result=query_result,
|
|
display_options_form=display_options_form,
|
|
inspect_display_options_form=inspect_display_options_form, # noqa
|
|
query_result_file_content=query_result_file_content,
|
|
query_metadata=query_metadata,
|
|
title='Inspect query result')
|
|
|
|
|
|
@corpora.route('/result/<int:query_result_id>/delete')
|
|
@login_required
|
|
def delete_query_result(query_result_id):
|
|
query_result = QueryResult.query.get_or_404(query_result_id)
|
|
if not (query_result.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
|
|
tasks.delete_query_result(query_result_id)
|
|
return redirect(url_for('services.service', service="corpus_analysis"))
|
|
|
|
|
|
@corpora.route('/result/<int:query_result_id>/download')
|
|
@login_required
|
|
def download_query_result(query_result_id):
|
|
query_result = QueryResult.query.get_or_404(query_result_id)
|
|
if not (query_result.creator == current_user
|
|
or current_user.is_administrator()):
|
|
abort(403)
|
|
return send_from_directory(as_attachment=True,
|
|
directory=os.path.dirname(query_result.path),
|
|
filename=query_result.filename)
|