from flask import (abort, current_app, flash, make_response, redirect, render_template, url_for, send_from_directory) from flask_login import current_user, login_required from . import bp from . import tasks from .forms import (AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm, ImportCorpusForm) from .. import db from ..models import Corpus, CorpusFile import os import shutil import glob import xml.etree.ElementTree as ET from zipfile import ZipFile from .import_corpus import check_zip_contents @bp.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): form = AddCorpusForm(prefix='add-corpus-form') if form.validate_on_submit(): corpus = Corpus( creator=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: os.makedirs(corpus.path) except OSError as e: current_app.logger.error(f'Could not add corpus: {e}') db.session.rollback() flash('Internal Server Error', 'error') abort(500) else: db.session.commit() flash('Corpus "{}" added!'.format(corpus.title), 'corpus') return redirect(url_for('.corpus', corpus_id=corpus.id)) return render_template('corpora/add_corpus.html.j2', form=form, title='Add corpus') @bp.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): abort(503) form = ImportCorpusForm() if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) corpus = Corpus( creator=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: os.makedirs(corpus.path) except OSError as e: current_app.logger.error(f'Could not import corpus: {e}') db.session.rollback() flash('Internal Server Error', 'error') return make_response( {'redirect_url': url_for('.import_corpus')}, 500) # Upload zip archive_file = os.path.join(corpus.path, form.file.data.filename) form.file.data.save(archive_file) # Some checks to verify it is a valid exported corpus with ZipFile(archive_file, 'r') as zip: contents = zip.namelist() if set(check_zip_contents).issubset(contents): # Unzip shutil.unpack_archive(archive_file, corpus.path) # Register vrt files to corpus vrts = glob.glob(corpus.path + '/*.vrt') for file in vrts: element_tree = ET.parse(file) text_node = element_tree.find('text') corpus_file = CorpusFile( address=text_node.get('address', 'NULL'), author=text_node.get('author', 'NULL'), booktitle=text_node.get('booktitle', 'NULL'), chapter=text_node.get('chapter', 'NULL'), corpus=corpus, editor=text_node.get('editor', 'NULL'), filename=os.path.basename(file), institution=text_node.get('institution', 'NULL'), journal=text_node.get('journal', 'NULL'), pages=text_node.get('pages', 'NULL'), publisher=text_node.get('publisher', 'NULL'), publishing_year=text_node.get('publishing_year', ''), school=text_node.get('school', 'NULL'), title=text_node.get('title', 'NULL') ) db.session.add(corpus_file) # finish import and redirect to imported corpus corpus.status = 'prepared' db.session.commit() os.remove(archive_file) flash('Corpus "{}" imported!'.format(corpus.title), 'corpus') return make_response( {'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) else: # If imported zip is not valid delete corpus and give feedback flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa tasks.delete_corpus(corpus.id) return make_response( {'redirect_url': url_for('.import_corpus')}, 201) return render_template('corpora/import_corpus.html.j2', form=form, title='Import Corpus') @bp.route('/') @login_required def corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files] return render_template('corpora/corpus.html.j2', corpus=corpus, corpus_files=corpus_files, title='Corpus') @bp.route('//analyse') @login_required def analyse_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) return render_template( 'corpora/analyse_corpus.html.j2', corpus=corpus, title=f'Analyse Corpus {corpus.title}' ) @bp.route('//download') @login_required def download_corpus(corpus_id): abort(503) corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) return send_from_directory( as_attachment=True, directory=os.path.join(corpus.creator.path, 'corpora'), filename=corpus.archive_file, mimetype='zip' ) @bp.route('//delete') @login_required def delete_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus') tasks.delete_corpus(corpus_id) return redirect(url_for('main.dashboard')) @bp.route('//files/add', methods=['GET', 'POST']) @login_required def add_corpus_file(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) # Save the file form.file.data.save(os.path.join(corpus.path, form.file.data.filename)) corpus_file = CorpusFile( address=form.address.data, author=form.author.data, booktitle=form.booktitle.data, chapter=form.chapter.data, corpus=corpus, editor=form.editor.data, filename=form.file.data.filename, institution=form.institution.data, journal=form.journal.data, pages=form.pages.data, publisher=form.publisher.data, publishing_year=form.publishing_year.data, school=form.school.data, title=form.title.data ) db.session.add(corpus_file) corpus.status = 'unprepared' db.session.commit() flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus') return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa return render_template('corpora/add_corpus_file.html.j2', corpus=corpus, form=form, title='Add corpus file') @bp.route('//files//delete') @login_required def delete_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if not corpus_file.corpus_id == corpus_id: abort(404) if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa tasks.delete_corpus_file(corpus_file_id) return redirect(url_for('.corpus', corpus_id=corpus_id)) @bp.route('//files//download') @login_required def download_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if not corpus_file.corpus_id == corpus_id: abort(404) if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) return send_from_directory(as_attachment=True, directory=os.path.dirname(corpus_file.path), filename=corpus_file.filename) @bp.route('//files/', methods=['GET', 'POST']) @login_required def corpus_file(corpus_id, corpus_file_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if corpus_file.corpus != corpus: abort(404) form = EditCorpusFileForm(prefix='edit-corpus-file-form') if form.validate_on_submit(): corpus_file.address = form.address.data corpus_file.author = form.author.data corpus_file.booktitle = form.booktitle.data corpus_file.chapter = form.chapter.data corpus_file.editor = form.editor.data corpus_file.institution = form.institution.data corpus_file.journal = form.journal.data corpus_file.pages = form.pages.data corpus_file.publisher = form.publisher.data corpus_file.publishing_year = form.publishing_year.data corpus_file.school = form.school.data corpus_file.title = form.title.data corpus.status = 'unprepared' db.session.commit() flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) # If no form is submitted or valid, fill out fields with current values form.address.data = corpus_file.address form.author.data = corpus_file.author form.booktitle.data = corpus_file.booktitle form.chapter.data = corpus_file.chapter form.editor.data = corpus_file.editor form.institution.data = corpus_file.institution form.journal.data = corpus_file.journal form.pages.data = corpus_file.pages form.publisher.data = corpus_file.publisher form.publishing_year.data = corpus_file.publishing_year form.school.data = corpus_file.school form.title.data = corpus_file.title return render_template('corpora/corpus_file.html.j2', corpus=corpus, corpus_file=corpus_file, form=form, title='Edit corpus file') @bp.route('//prepare') @login_required def prepare_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) if corpus.files.all(): tasks.build_corpus(corpus_id) flash('Corpus "{}" has been marked to get build!'.format(corpus.title), 'corpus') # noqa else: flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id))