nopaque/app/corpora/routes.py
2021-11-30 16:22:16 +01:00

288 lines
11 KiB
Python

from flask import (abort, current_app, flash, make_response, redirect,
render_template, url_for, send_from_directory)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm,
ImportCorpusForm)
from .. import db
from ..models import Corpus, CorpusFile
import os
import shutil
import glob
import xml.etree.ElementTree as ET
from zipfile import ZipFile
from .import_corpus import check_zip_contents
@bp.route('/add', methods=['GET', 'POST'])
@login_required
def add_corpus():
form = AddCorpusForm(prefix='add-corpus-form')
if form.validate_on_submit():
corpus = Corpus(
user=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError as e:
current_app.logger.error(f'Could not add corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
abort(500)
else:
db.session.commit()
flash('Corpus "{}" added!'.format(corpus.title), 'corpus')
return redirect(url_for('.corpus', corpus_id=corpus.id))
return render_template('corpora/add_corpus.html.j2', form=form,
title='Add corpus')
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
abort(503)
form = ImportCorpusForm()
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(
user=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
os.makedirs(corpus.path)
except OSError as e:
current_app.logger.error(f'Could not import corpus: {e}')
db.session.rollback()
flash('Internal Server Error', 'error')
return make_response(
{'redirect_url': url_for('.import_corpus')}, 500)
# Upload zip
archive_file = os.path.join(corpus.path, form.file.data.filename)
form.file.data.save(archive_file)
# Some checks to verify it is a valid exported corpus
with ZipFile(archive_file, 'r') as zip:
contents = zip.namelist()
if set(check_zip_contents).issubset(contents):
# Unzip
shutil.unpack_archive(archive_file, corpus.path)
# Register vrt files to corpus
vrts = glob.glob(corpus.path + '/*.vrt')
for file in vrts:
element_tree = ET.parse(file)
text_node = element_tree.find('text')
corpus_file = CorpusFile(
address=text_node.get('address', 'NULL'),
author=text_node.get('author', 'NULL'),
booktitle=text_node.get('booktitle', 'NULL'),
chapter=text_node.get('chapter', 'NULL'),
corpus=corpus,
editor=text_node.get('editor', 'NULL'),
filename=os.path.basename(file),
institution=text_node.get('institution', 'NULL'),
journal=text_node.get('journal', 'NULL'),
pages=text_node.get('pages', 'NULL'),
publisher=text_node.get('publisher', 'NULL'),
publishing_year=text_node.get('publishing_year', ''),
school=text_node.get('school', 'NULL'),
title=text_node.get('title', 'NULL')
)
db.session.add(corpus_file)
# finish import and redirect to imported corpus
corpus.status = 'prepared'
db.session.commit()
os.remove(archive_file)
flash('Corpus "{}" imported!'.format(corpus.title), 'corpus')
return make_response(
{'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201)
else:
# If imported zip is not valid delete corpus and give feedback
flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa
tasks.delete_corpus(corpus.id)
return make_response(
{'redirect_url': url_for('.import_corpus')}, 201)
return render_template('corpora/import_corpus.html.j2', form=form,
title='Import Corpus')
@bp.route('/<hashid:corpus_id>')
@login_required
def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files]
return render_template('corpora/corpus.html.j2', corpus=corpus,
corpus_files=corpus_files, title='Corpus')
@bp.route('/<hashid:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
title=f'Analyse Corpus {corpus.title}'
)
@bp.route('/<hashid:corpus_id>/download')
@login_required
def download_corpus(corpus_id):
abort(503)
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
as_attachment=True,
directory=os.path.join(corpus.user.path, 'corpora'),
filename=corpus.archive_file,
mimetype='zip'
)
@bp.route('/<hashid:corpus_id>/delete')
@login_required
def delete_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus')
tasks.delete_corpus(corpus_id)
return redirect(url_for('main.dashboard'))
@bp.route('/<hashid:corpus_id>/files/add', methods=['GET', 'POST'])
@login_required
def add_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
# Save the file
form.file.data.save(os.path.join(corpus.path, form.file.data.filename))
corpus_file = CorpusFile(
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.file.data.filename,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data
)
db.session.add(corpus_file)
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus')
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa
return render_template('corpora/add_corpus_file.html.j2', corpus=corpus,
form=form, title='Add corpus file')
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/delete')
@login_required
def delete_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if not corpus_file.corpus_id == corpus_id:
abort(404)
if not (corpus_file.corpus.user == current_user
or current_user.is_administrator()):
abort(403)
flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa
tasks.delete_corpus_file(corpus_file_id)
return redirect(url_for('.corpus', corpus_id=corpus_id))
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/download')
@login_required
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if not corpus_file.corpus_id == corpus_id:
abort(404)
if not (corpus_file.corpus.user == current_user
or current_user.is_administrator()):
abort(403)
return send_from_directory(as_attachment=True,
directory=os.path.dirname(corpus_file.path),
filename=corpus_file.filename)
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST'])
@login_required
def corpus_file(corpus_id, corpus_file_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if corpus_file.corpus != corpus:
abort(404)
form = EditCorpusFileForm(prefix='edit-corpus-file-form')
if form.validate_on_submit():
corpus_file.address = form.address.data
corpus_file.author = form.author.data
corpus_file.booktitle = form.booktitle.data
corpus_file.chapter = form.chapter.data
corpus_file.editor = form.editor.data
corpus_file.institution = form.institution.data
corpus_file.journal = form.journal.data
corpus_file.pages = form.pages.data
corpus_file.publisher = form.publisher.data
corpus_file.publishing_year = form.publishing_year.data
corpus_file.school = form.school.data
corpus_file.title = form.title.data
corpus.status = 'unprepared'
db.session.commit()
flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# If no form is submitted or valid, fill out fields with current values
form.address.data = corpus_file.address
form.author.data = corpus_file.author
form.booktitle.data = corpus_file.booktitle
form.chapter.data = corpus_file.chapter
form.editor.data = corpus_file.editor
form.institution.data = corpus_file.institution
form.journal.data = corpus_file.journal
form.pages.data = corpus_file.pages
form.publisher.data = corpus_file.publisher
form.publishing_year.data = corpus_file.publishing_year
form.school.data = corpus_file.school
form.title.data = corpus_file.title
return render_template('corpora/corpus_file.html.j2', corpus=corpus,
corpus_file=corpus_file, form=form,
title='Edit corpus file')
@bp.route('/<hashid:corpus_id>/prepare')
@login_required
def prepare_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
if corpus.files.all():
tasks.build_corpus(corpus_id)
flash('Corpus "{}" has been marked to get build!'.format(corpus.title), 'corpus') # noqa
else:
flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))