More exception handling. Remove unused database models. New common view structure!

This commit is contained in:
Patrick Jentsch
2020-11-13 10:01:51 +01:00
parent cb9da5c7dd
commit 5a06a6b241
45 changed files with 692 additions and 1005 deletions

View File

@ -1,4 +1,4 @@
from flask import (abort, current_app, flash, make_response, redirect, request,
from flask import (abort, flash, make_response, redirect, request,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import corpora
@ -11,6 +11,7 @@ from jsonschema import validate
from .. import db
from ..models import Corpus, CorpusFile, QueryResult
import json
import logging
import os
import shutil
import glob
@ -22,106 +23,92 @@ from .import_corpus import check_zip_contents
@corpora.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
add_corpus_form = AddCorpusForm()
if add_corpus_form.validate_on_submit():
form = AddCorpusForm()
if form.validate_on_submit():
corpus = Corpus(creator=current_user,
description=add_corpus_form.description.data,
status='unprepared', title=add_corpus_form.title.data)
description=form.description.data,
title=form.title.data)
db.session.add(corpus)
db.session.commit()
dir = os.path.join(current_app.config['DATA_DIR'],
str(corpus.user_id), 'corpora', str(corpus.id))
try:
os.makedirs(dir)
os.makedirs(corpus.path)
except OSError:
flash('[ERROR]: Could not add corpus!', 'corpus')
corpus.delete()
else:
url = url_for('corpora.corpus', corpus_id=corpus.id)
flash('[<a href="{}">{}</a>] added'.format(url, corpus.title),
'corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus.id))
return render_template('corpora/add_corpus.html.j2',
add_corpus_form=add_corpus_form,
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
db.session.delete(corpus)
db.session.commit()
abort(500)
flash('Corpus "{}" added!'.format(corpus.title), 'corpus')
return redirect(url_for('.corpus', corpus_id=corpus.id))
return render_template('corpora/add_corpus.html.j2', form=form,
title='Add corpus')
@corpora.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
import_corpus_form = ImportCorpusForm()
if import_corpus_form.is_submitted():
if not import_corpus_form.validate():
return make_response(import_corpus_form.errors, 400)
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(creator=current_user,
description=import_corpus_form.description.data,
status='unprepared',
title=import_corpus_form.title.data)
description=form.description.data,
title=form.title.data)
db.session.add(corpus)
db.session.commit()
dir = os.path.join(current_app.config['DATA_DIR'],
str(corpus.user_id), 'corpora', str(corpus.id))
try:
os.makedirs(dir)
os.makedirs(corpus.path)
except OSError:
flash('[ERROR]: Could not import corpus!', 'corpus')
corpus.delete()
logging.error('Make dir {} led to an OSError!'.format(corpus.path))
db.session.delete(corpus)
db.session.commit()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.import_corpus')}, 500)
# Upload zip
archive_file = os.path.join(corpus.path, form.file.data.filename)
form.file.data.save(archive_file)
# Some checks to verify it is a valid exported corpus
with ZipFile(archive_file, 'r') as zip:
contents = zip.namelist()
if set(check_zip_contents).issubset(contents):
# Unzip
shutil.unpack_archive(archive_file, corpus.path)
# Register vrt files to corpus
vrts = glob.glob(corpus.path + '/*.vrt')
for file in vrts:
element_tree = ET.parse(file)
text_node = element_tree.find('text')
corpus_file = CorpusFile(
address=text_node.get('address', 'NULL'),
author=text_node.get('author', 'NULL'),
booktitle=text_node.get('booktitle', 'NULL'),
chapter=text_node.get('chapter', 'NULL'),
corpus=corpus,
editor=text_node.get('editor', 'NULL'),
filename=os.path.basename(file),
institution=text_node.get('institution', 'NULL'),
journal=text_node.get('journal', 'NULL'),
pages=text_node.get('pages', 'NULL'),
publisher=text_node.get('publisher', 'NULL'),
publishing_year=text_node.get('publishing_year', ''),
school=text_node.get('school', 'NULL'),
title=text_node.get('title', 'NULL')
)
db.session.add(corpus_file)
# finish import and redirect to imported corpus
corpus.status = 'prepared'
db.session.commit()
os.remove(archive_file)
flash('Corpus "{}" imported!'.format(corpus.title), 'corpus')
return make_response(
{'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201)
else:
# Upload zip
archive_file = os.path.join(current_app.config['DATA_DIR'], dir,
import_corpus_form.file.data.filename)
corpus_dir = os.path.dirname(archive_file)
import_corpus_form.file.data.save(archive_file)
# Some checks to verify it is a valid exported corpus
with ZipFile(archive_file, 'r') as zip:
contents = zip.namelist()
if set(check_zip_contents).issubset(contents):
# Unzip
shutil.unpack_archive(archive_file, corpus_dir)
# Register vrt files to corpus
vrts = glob.glob(corpus_dir + '/*.vrt')
for file in vrts:
element_tree = ET.parse(file)
text_node = element_tree.find('text')
corpus_file = CorpusFile(
address=text_node.get('address', 'NULL'),
author=text_node.get('author', 'NULL'),
booktitle=text_node.get('booktitle', 'NULL'),
chapter=text_node.get('chapter', 'NULL'),
corpus=corpus,
dir=dir,
editor=text_node.get('editor', 'NULL'),
filename=os.path.basename(file),
institution=text_node.get('institution', 'NULL'),
journal=text_node.get('journal', 'NULL'),
pages=text_node.get('pages', 'NULL'),
publisher=text_node.get('publisher', 'NULL'),
publishing_year=text_node.get('publishing_year', ''),
school=text_node.get('school', 'NULL'),
title=text_node.get('title', 'NULL'))
db.session.add(corpus_file)
# finish import and got to imported corpus
url = url_for('corpora.corpus', corpus_id=corpus.id)
corpus.status = 'prepared'
db.session.commit()
os.remove(archive_file)
flash('[<a href="{}">{}</a>] imported'.format(url,
corpus.title),
'corpus')
return make_response(
{'redirect_url': url_for('corpora.corpus',
corpus_id=corpus.id)},
201)
else:
# If imported zip is not valid delete corpus and give feedback
corpus.delete()
db.session.commit()
flash('Imported corpus is not valid.', 'error')
return make_response(
{'redirect_url': url_for('corpora.import_corpus')},
201)
return render_template('corpora/import_corpus.html.j2',
import_corpus_form=import_corpus_form,
# If imported zip is not valid delete corpus and give feedback
flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa
tasks.delete_corpus(corpus.id)
return make_response(
{'redirect_url': url_for('.import_corpus')}, 201)
return render_template('corpora/import_corpus.html.j2', form=form,
title='Import Corpus')
@ -131,17 +118,9 @@ def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
corpus_files = [dict(filename=corpus_file.filename,
author=corpus_file.author,
title=corpus_file.title,
publishing_year=corpus_file.publishing_year,
corpus_id=corpus.id,
id=corpus_file.id)
for corpus_file in corpus.files]
return render_template('corpora/corpus.html.j2',
corpus=corpus,
corpus_files=corpus_files,
title='Corpus')
corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files]
return render_template('corpora/corpus.html.j2', corpus=corpus,
corpus_files=corpus_files, title='Corpus')
@corpora.route('/<int:corpus_id>/export')
@ -150,12 +129,11 @@ def export_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
# TODO: Check what happens here
dir = os.path.dirname(corpus.archive_file)
filename = os.path.basename(corpus.archive_file)
return send_from_directory(directory=dir,
filename=filename,
mimetype='zip',
as_attachment=True)
return send_from_directory(as_attachment=True, directory=dir,
filename=filename, mimetype='zip')
@corpora.route('/<int:corpus_id>/analyse')
@ -168,7 +146,8 @@ def analyse_corpus(corpus_id):
display_options_form = DisplayOptionsForm(
prefix='display-options-form',
result_context=request.args.get('context', 20),
results_per_page=request.args.get('results_per_page', 30))
results_per_page=request.args.get('results_per_page', 30)
)
query_form = QueryForm(prefix='query-form',
query=request.args.get('query'))
query_download_form = QueryDownloadForm(prefix='query-download-form')
@ -177,12 +156,12 @@ def analyse_corpus(corpus_id):
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
corpus_id=corpus_id,
display_options_form=display_options_form,
inspect_display_options_form=inspect_display_options_form,
query_form=query_form,
query_download_form=query_download_form,
inspect_display_options_form=inspect_display_options_form,
title='Corpus analysis')
title='Corpus analysis'
)
@corpora.route('/<int:corpus_id>/delete')
@ -191,8 +170,8 @@ def delete_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus')
tasks.delete_corpus(corpus_id)
flash('Corpus deleted!', 'corpus')
return redirect(url_for('main.dashboard'))
@ -202,43 +181,33 @@ def add_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.creator == current_user or current_user.is_administrator()):
abort(403)
add_corpus_file_form = AddCorpusFileForm(corpus,
prefix='add-corpus-file-form')
if add_corpus_file_form.is_submitted():
if not add_corpus_file_form.validate():
return make_response(add_corpus_file_form.errors, 400)
form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
# Save the file
dir = os.path.join(str(corpus.user_id), 'corpora', str(corpus.id))
add_corpus_file_form.file.data.save(
os.path.join(current_app.config['DATA_DIR'], dir,
add_corpus_file_form.file.data.filename))
corpus_file = CorpusFile(
address=add_corpus_file_form.address.data,
author=add_corpus_file_form.author.data,
booktitle=add_corpus_file_form.booktitle.data,
chapter=add_corpus_file_form.chapter.data,
corpus=corpus,
dir=dir,
editor=add_corpus_file_form.editor.data,
filename=add_corpus_file_form.file.data.filename,
institution=add_corpus_file_form.institution.data,
journal=add_corpus_file_form.journal.data,
pages=add_corpus_file_form.pages.data,
publisher=add_corpus_file_form.publisher.data,
publishing_year=add_corpus_file_form.publishing_year.data,
school=add_corpus_file_form.school.data,
title=add_corpus_file_form.title.data)
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
corpus_file = CorpusFile(address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file added!', 'corpus')
return make_response(
{'redirect_url': url_for('corpora.corpus', corpus_id=corpus.id)},
201)
return render_template('corpora/add_corpus_file.html.j2',
corpus=corpus,
add_corpus_file_form=add_corpus_file_form,
title='Add corpus file')
flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus')
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa
return render_template('corpora/add_corpus_file.html.j2', corpus=corpus,
form=form, title='Add corpus file')
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/delete')
@ -250,9 +219,9 @@ def delete_corpus_file(corpus_id, corpus_file_id):
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa
tasks.delete_corpus_file(corpus_file_id)
flash('Corpus file deleted!', 'corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
return redirect(url_for('.corpus', corpus_id=corpus_id))
@corpora.route('/<int:corpus_id>/files/<int:corpus_file_id>/download')
@ -264,9 +233,8 @@ def download_corpus_file(corpus_id, corpus_file_id):
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
dir = os.path.join(current_app.config['DATA_DIR'],
corpus_file.dir)
return send_from_directory(as_attachment=True, directory=dir,
return send_from_directory(as_attachment=True,
directory=corpus_file.corpus.path,
filename=corpus_file.filename)
@ -274,48 +242,45 @@ def download_corpus_file(corpus_id, corpus_file_id):
methods=['GET', 'POST'])
@login_required
def corpus_file(corpus_id, corpus_file_id):
corpus = Corpus.query.get_or_404(corpus_id)
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if not corpus_file.corpus_id == corpus_id:
if corpus_file.corpus_id != corpus_id:
abort(404)
if not (corpus_file.corpus.creator == current_user
or current_user.is_administrator()):
abort(403)
edit_corpus_file_form = EditCorpusFileForm(prefix='edit-corpus-file-form')
if edit_corpus_file_form.validate_on_submit():
corpus_file.address = edit_corpus_file_form.address.data
corpus_file.author = edit_corpus_file_form.author.data
corpus_file.booktitle = edit_corpus_file_form.booktitle.data
corpus_file.chapter = edit_corpus_file_form.chapter.data
corpus_file.editor = edit_corpus_file_form.editor.data
corpus_file.institution = edit_corpus_file_form.institution.data
corpus_file.journal = edit_corpus_file_form.journal.data
corpus_file.pages = edit_corpus_file_form.pages.data
corpus_file.publisher = edit_corpus_file_form.publisher.data
corpus_file.publishing_year = \
edit_corpus_file_form.publishing_year.data
corpus_file.school = edit_corpus_file_form.school.data
corpus_file.title = edit_corpus_file_form.title.data
form = EditCorpusFileForm(prefix='edit-corpus-file-form')
if form.validate_on_submit():
corpus_file.address = form.address.data
corpus_file.author = form.author.data
corpus_file.booktitle = form.booktitle.data
corpus_file.chapter = form.chapter.data
corpus_file.editor = form.editor.data
corpus_file.institution = form.institution.data
corpus_file.journal = form.journal.data
corpus_file.pages = form.pages.data
corpus_file.publisher = form.publisher.data
corpus_file.publishing_year = form.publishing_year.data
corpus_file.school = form.school.data
corpus_file.title = form.title.data
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file edited!', 'corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# If no form is submitted or valid, fill out fields with current values
edit_corpus_file_form.address.data = corpus_file.address
edit_corpus_file_form.author.data = corpus_file.author
edit_corpus_file_form.booktitle.data = corpus_file.booktitle
edit_corpus_file_form.chapter.data = corpus_file.chapter
edit_corpus_file_form.editor.data = corpus_file.editor
edit_corpus_file_form.institution.data = corpus_file.institution
edit_corpus_file_form.journal.data = corpus_file.journal
edit_corpus_file_form.pages.data = corpus_file.pages
edit_corpus_file_form.publisher.data = corpus_file.publisher
edit_corpus_file_form.publishing_year.data = corpus_file.publishing_year
edit_corpus_file_form.school.data = corpus_file.school
edit_corpus_file_form.title.data = corpus_file.title
return render_template('corpora/corpus_file.html.j2',
corpus_file=corpus_file, corpus=corpus,
edit_corpus_file_form=edit_corpus_file_form,
form.address.data = corpus_file.address
form.author.data = corpus_file.author
form.booktitle.data = corpus_file.booktitle
form.chapter.data = corpus_file.chapter
form.editor.data = corpus_file.editor
form.institution.data = corpus_file.institution
form.journal.data = corpus_file.journal
form.pages.data = corpus_file.pages
form.publisher.data = corpus_file.publisher
form.publishing_year.data = corpus_file.publishing_year
form.school.data = corpus_file.school
form.title.data = corpus_file.title
return render_template('corpora/corpus_file.html.j2', corpus=corpus,
corpus_file=corpus_file, form=form,
title='Edit corpus file')
@ -327,10 +292,10 @@ def prepare_corpus(corpus_id):
abort(403)
if corpus.files.all():
tasks.build_corpus(corpus_id)
flash('Building Corpus...', 'corpus')
flash('Corpus "{}" has been marked to get build!', 'corpus')
else:
flash('Can not build corpus, please add corpus file(s).', 'corpus')
return redirect(url_for('corpora.corpus', corpus_id=corpus_id))
flash('Can not build corpus "{}": No corpus file(s)!', 'error')
return redirect(url_for('.corpus', corpus_id=corpus_id))
# Following are view functions to add, view etc. exported results.
@ -340,35 +305,29 @@ def add_query_result():
'''
View to import a result as a json file.
'''
add_query_result_form = AddQueryResultForm(prefix='add-query-result-form')
if add_query_result_form.is_submitted():
if not add_query_result_form.validate():
return make_response(add_query_result_form.errors, 400)
query_result = QueryResult(
creator=current_user,
description=add_query_result_form.description.data,
filename=add_query_result_form.file.data.filename,
title=add_query_result_form.title.data
)
form = AddQueryResultForm(prefix='add-query-result-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
query_result = QueryResult(creator=current_user,
description=form.description.data,
filename=form.file.data.filename,
title=form.title.data)
db.session.add(query_result)
db.session.commit()
# create paths to save the uploaded json file
query_result_dir = os.path.join(current_app.config['DATA_DIR'],
str(current_user.id),
'query_results',
str(query_result.id))
try:
os.makedirs(query_result_dir)
except Exception:
os.makedirs(query_result.path)
except OSError:
logging.error('Make dir {} led to an OSError!'.format(query_result.path)) # noqa
db.session.delete(query_result)
db.session.commit()
flash('Internal Server Error', 'error')
redirect_url = url_for('corpora.add_query_result')
return make_response({'redirect_url': redirect_url}, 500)
return make_response(
{'redirect_url': url_for('.add_query_result')}, 500)
# save the uploaded file
query_result_file_path = os.path.join(query_result_dir,
query_result_file_path = os.path.join(query_result.path,
query_result.filename)
add_query_result_form.file.data.save(query_result_file_path)
form.file.data.save(query_result_file_path)
# parse json from file
with open(query_result_file_path, 'r') as file:
query_result_file_content = json.load(file)
@ -381,19 +340,16 @@ def add_query_result():
except Exception:
tasks.delete_query_result(query_result.id)
flash('Uploaded file is invalid', 'result')
redirect_url = url_for('corpora.add_query_result')
return make_response({'redirect_url': redirect_url}, 201)
return make_response(
{'redirect_url': url_for('.add_query_result')}, 201)
query_result_file_content.pop('matches')
query_result_file_content.pop('cpos_lookup')
query_result.query_metadata = query_result_file_content
db.session.commit()
flash('Query result added!', 'result')
redirect_url = url_for('corpora.query_result',
query_result_id=query_result.id)
return make_response({'redirect_url': redirect_url}, 201)
return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa
return render_template('corpora/query_results/add_query_result.html.j2',
add_query_result_form=add_query_result_form,
title='Add query result')
form=form, title='Add query result')
@corpora.route('/result/<int:query_result_id>')
@ -404,8 +360,7 @@ def query_result(query_result_id):
or current_user.is_administrator()):
abort(403)
return render_template('corpora/query_results/query_result.html.j2',
query_result=query_result,
title='Query result')
query_result=query_result, title='Query result')
@corpora.route('/result/<int:query_result_id>/inspect')
@ -427,13 +382,7 @@ def inspect_query_result(query_result_id):
inspect_display_options_form = InspectDisplayOptionsForm(
prefix='inspect-display-options-form'
)
query_result_file_path = os.path.join(
current_app.config['DATA_DIR'],
str(current_user.id),
'query_results',
str(query_result.id),
query_result.filename
)
query_result_file_path = os.path.join(query_result.path, query_result.filename) # noqa
with open(query_result_file_path, 'r') as query_result_file:
query_result_file_content = json.load(query_result_file)
return render_template('corpora/query_results/inspect.html.j2',
@ -452,8 +401,8 @@ def delete_query_result(query_result_id):
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa
tasks.delete_query_result(query_result_id)
flash('Query result deleted!', 'result')
return redirect(url_for('services.service', service="corpus_analysis"))
@ -464,10 +413,5 @@ def download_query_result(query_result_id):
if not (query_result.creator == current_user
or current_user.is_administrator()):
abort(403)
query_result_dir = os.path.join(current_app.config['DATA_DIR'],
str(current_user.id),
'query_results',
str(query_result.id))
return send_from_directory(as_attachment=True,
directory=query_result_dir,
return send_from_directory(as_attachment=True, directory=query_result.path,
filename=query_result.filename)