from app import db from app.models import Corpus, CorpusFile, CorpusStatus from flask import ( abort, current_app, flash, make_response, redirect, render_template, url_for, send_from_directory ) from flask_login import current_user, login_required from werkzeug.utils import secure_filename from zipfile import ZipFile from . import bp from . import tasks from .forms import ( AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm, ImportCorpusForm ) from .import_corpus import check_zip_contents import os import shutil import glob import xml.etree.ElementTree as ET @bp.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): form = AddCorpusForm(prefix='add-corpus-form') if form.validate_on_submit(): corpus = Corpus( user=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: corpus.makedirs() except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') abort(500) db.session.commit() flash(f'Corpus "{corpus.title}" added', category='corpus') return redirect(url_for('.corpus', corpus_id=corpus.id)) return render_template( 'corpora/add_corpus.html.j2', form=form, title='Add corpus' ) @bp.route('//export') @login_required def export_corpus(corpus_id): abort(503) corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) return send_from_directory( as_attachment=True, directory=os.path.join(corpus.user.path, 'corpora'), filename=corpus.archive_file, mimetype='zip' ) @bp.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): abort(503) form = ImportCorpusForm() if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) corpus = Corpus( user=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: os.makedirs(corpus.path) except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa # Upload zip archive_file = os.path.join(corpus.path, form.file.data.filename) form.file.data.save(archive_file) # Some checks to verify it is a valid exported corpus with ZipFile(archive_file, 'r') as zip: contents = zip.namelist() if set(check_zip_contents).issubset(contents): # Unzip shutil.unpack_archive(archive_file, corpus.path) # Register vrt files to corpus vrts = glob.glob(corpus.path + '/*.vrt') for file in vrts: element_tree = ET.parse(file) text_node = element_tree.find('text') corpus_file = CorpusFile( address=text_node.get('address', 'NULL'), author=text_node.get('author', 'NULL'), booktitle=text_node.get('booktitle', 'NULL'), chapter=text_node.get('chapter', 'NULL'), corpus=corpus, editor=text_node.get('editor', 'NULL'), filename=os.path.basename(file), institution=text_node.get('institution', 'NULL'), journal=text_node.get('journal', 'NULL'), pages=text_node.get('pages', 'NULL'), publisher=text_node.get('publisher', 'NULL'), publishing_year=text_node.get('publishing_year', ''), school=text_node.get('school', 'NULL'), title=text_node.get('title', 'NULL') ) db.session.add(corpus_file) # finish import and redirect to imported corpus corpus.status = CorpusStatus.BUILT db.session.commit() os.remove(archive_file) flash(f'Corpus "{corpus.title}" imported', 'corpus') return make_response( {'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) else: # If imported zip is not valid delete corpus and give feedback flash( f'Can\'t import corpus "{corpus.title}": Invalid archive file', category='error' ) tasks.delete_corpus(corpus.id) return make_response({'redirect_url': url_for('.import_corpus')}, 201) # noqa return render_template( 'corpora/import_corpus.html.j2', form=form, title='Import Corpus' ) @bp.route('/') @login_required def corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) return render_template( 'corpora/corpus.html.j2', corpus=corpus, title='Corpus' ) @bp.route('//analyse') @login_required def analyse_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) return render_template( 'corpora/analyse_corpus.html.j2', corpus=corpus, title=f'Analyse Corpus {corpus.title}' ) @bp.route('//delete') @login_required def delete_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) flash(f'Corpus "{corpus.title}" marked for deletion', 'corpus') tasks.delete_corpus(corpus_id) return redirect(url_for('main.dashboard')) @bp.route('//files/add', methods=['GET', 'POST']) @login_required def add_corpus_file(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) # Save the file filename = secure_filename(form.file.data.filename) corpus_file = CorpusFile( address=form.address.data, author=form.author.data, booktitle=form.booktitle.data, chapter=form.chapter.data, corpus=corpus, editor=form.editor.data, filename=filename, institution=form.institution.data, journal=form.journal.data, mimetype='application/vrt+xml', pages=form.pages.data, publisher=form.publisher.data, publishing_year=form.publishing_year.data, school=form.school.data, title=form.title.data ) db.session.add(corpus_file) db.session.flush(objects=[corpus_file]) db.session.refresh(corpus_file) try: form.file.data.save(corpus_file.path) except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') return make_response({'redirect_url': url_for('.add_corpus_file', corpus_id=corpus.id)}, 500) # noqa corpus.status = CorpusStatus.UNPREPARED db.session.commit() flash(f'Corpus file "{corpus_file.filename}" added', category='corpus') return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa return render_template( 'corpora/add_corpus_file.html.j2', corpus=corpus, form=form, title='Add corpus file' ) @bp.route('//files//delete') @login_required def delete_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) flash( f'Corpus file "{corpus_file.filename}" marked for deletion', category='corpus' ) tasks.delete_corpus_file(corpus_file_id) return redirect(url_for('.corpus', corpus_id=corpus_id)) @bp.route('//files//download') @login_required def download_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) return send_from_directory( as_attachment=True, directory=os.path.dirname(corpus_file.path), filename=corpus_file.filename ) @bp.route('//files/', methods=['GET', 'POST']) # noqa @login_required def corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) form = EditCorpusFileForm(prefix='edit-corpus-file-form') if form.validate_on_submit(): corpus_file.address = form.address.data corpus_file.author = form.author.data corpus_file.booktitle = form.booktitle.data corpus_file.chapter = form.chapter.data corpus_file.editor = form.editor.data corpus_file.institution = form.institution.data corpus_file.journal = form.journal.data corpus_file.pages = form.pages.data corpus_file.publisher = form.publisher.data corpus_file.publishing_year = form.publishing_year.data corpus_file.school = form.school.data corpus_file.title = form.title.data corpus_file.corpus.status = CorpusStatus.UNPREPARED db.session.commit() flash(f'Corpus file "{corpus_file.filename}" edited', category='corpus') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) # If no form is submitted or valid, fill out fields with current values form.address.data = corpus_file.address form.author.data = corpus_file.author form.booktitle.data = corpus_file.booktitle form.chapter.data = corpus_file.chapter form.editor.data = corpus_file.editor form.institution.data = corpus_file.institution form.journal.data = corpus_file.journal form.pages.data = corpus_file.pages form.publisher.data = corpus_file.publisher form.publishing_year.data = corpus_file.publishing_year form.school.data = corpus_file.school form.title.data = corpus_file.title return render_template( 'corpora/corpus_file.html.j2', corpus=corpus, corpus_file=corpus_file, form=form, title='Edit corpus file' ) @bp.route('//build') @login_required def build_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) if corpus.files.all(): tasks.build_corpus(corpus_id) flash( f'Corpus "{corpus.title}" marked for building', category='corpus' ) else: flash( f'Can\'t build corpus "{corpus.title}": No corpus file(s)', category='error' ) return redirect(url_for('.corpus', corpus_id=corpus_id))