from app import db from app.models import Corpus, CorpusFile, CorpusStatus from flask import ( abort, current_app, flash, make_response, redirect, render_template, url_for, send_from_directory ) from flask_login import current_user, login_required from werkzeug.utils import secure_filename from zipfile import ZipFile from . import bp from . import tasks from .forms import ( AddCorpusFileForm, AddCorpusForm, EditCorpusFileForm, ImportCorpusForm ) import os import shutil import tempfile import glob import xml.etree.ElementTree as ET @bp.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): form = AddCorpusForm(prefix='add-corpus-form') if form.validate_on_submit(): corpus = Corpus( user=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush() db.session.refresh(corpus) try: corpus.makedirs() except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') abort(500) db.session.commit() flash(f'Corpus "{corpus.title}" added', category='corpus') return redirect(url_for('.corpus', corpus_id=corpus.id)) return render_template( 'corpora/add_corpus.html.j2', form=form, title='Add corpus' ) @bp.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): form = ImportCorpusForm(prefix='import-corpus-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) corpus = Corpus( user=current_user, description=form.description.data, title=form.title.data ) db.session.add(corpus) db.session.flush(objects=[corpus]) db.session.refresh(corpus) try: corpus.makedirs() except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa # Save the uploaded zip file in a temporary directory tmp_dir_base = os.path.join(current_app.config['NOPAQUE_DATA_DIR'], 'tmp') # noqa with tempfile.TemporaryDirectory(dir=tmp_dir_base) as tmp_dir: archive_file = os.path.join(tmp_dir, 'corpus.zip') try: form.archive.data.save(archive_file) except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error1', category='error') return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa shutil.unpack_archive(archive_file, extract_dir=tmp_dir) for vrt_filename in [x for x in os.listdir(tmp_dir) if x.endswith('.vrt')]: vrt_file = os.path.join(tmp_dir, vrt_filename) element_tree = ET.parse(vrt_file) text_node = element_tree.find('text') corpus_file = CorpusFile( author=text_node.get('author'), corpus=corpus, filename=vrt_filename, mimetype='application/vrt+xml', publishing_year=int(text_node.get('publishing_year')), title=text_node.get('title') ) if 'address' not in text_node.attrib: corpus_file.address = text_node.get('address') if 'booktitle' not in text_node.attrib: corpus_file.booktitle = text_node.get('booktitle') if 'chapter' not in text_node.attrib: corpus_file.chapter = text_node.get('chapter') if 'editor' not in text_node.attrib: corpus_file.editor = text_node.get('editor') if 'institution' not in text_node.attrib: corpus_file.institution = text_node.get('institution') if 'journal' not in text_node.attrib: corpus_file.journal = text_node.get('journal') if 'pages' not in text_node.attrib: corpus_file.pages = text_node.get('pages') if 'publisher' not in text_node.attrib: corpus_file.publisher = text_node.get('publisher') if 'school' not in text_node.attrib: corpus_file.school = text_node.get('school') db.session.add(corpus_file) db.session.flush(objects=[corpus_file]) db.session.refresh(corpus) current_app.logger.warning(vrt_file) current_app.logger.warning(corpus_file.path) try: shutil.copy2(vrt_file, corpus_file.path) except Exception as e: db.session.rollback() flash('Internal Server Error2', category='error') return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa db.session.commit() flash(f'Corpus "{corpus.title}" imported', 'corpus') return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) return render_template( 'corpora/import_corpus.html.j2', form=form, title='Import Corpus' ) @bp.route('/') @login_required def corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) return render_template( 'corpora/corpus.html.j2', corpus=corpus, title='Corpus' ) @bp.route('//analyse') @login_required def analyse_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) return render_template( 'corpora/analyse_corpus.html.j2', corpus=corpus, title=f'Analyse Corpus {corpus.title}' ) @bp.route('//build') @login_required def build_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) if corpus.files.all(): tasks.build_corpus(corpus_id) flash( f'Corpus "{corpus.title}" marked for building', category='corpus' ) else: flash( f'Can\'t build corpus "{corpus.title}": No corpus file(s)', category='error' ) return redirect(url_for('.corpus', corpus_id=corpus_id)) @bp.route('//delete') @login_required def delete_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) flash(f'Corpus "{corpus.title}" marked for deletion', 'corpus') tasks.delete_corpus(corpus_id) return redirect(url_for('main.dashboard')) @bp.route('//export') @login_required def export_corpus(corpus_id): abort(503) corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) return send_from_directory( as_attachment=True, directory=os.path.join(corpus.user.path, 'corpora'), filename=corpus.archive_file, mimetype='zip' ) @bp.route('//files/', methods=['GET', 'POST']) # noqa @login_required def corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) form = EditCorpusFileForm(prefix='edit-corpus-file-form') if form.validate_on_submit(): corpus_file.address = form.address.data corpus_file.author = form.author.data corpus_file.booktitle = form.booktitle.data corpus_file.chapter = form.chapter.data corpus_file.editor = form.editor.data corpus_file.institution = form.institution.data corpus_file.journal = form.journal.data corpus_file.pages = form.pages.data corpus_file.publisher = form.publisher.data corpus_file.publishing_year = form.publishing_year.data corpus_file.school = form.school.data corpus_file.title = form.title.data corpus_file.corpus.status = CorpusStatus.UNPREPARED db.session.commit() flash(f'Corpus file "{corpus_file.filename}" edited', category='corpus') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) # If no form is submitted or valid, fill out fields with current values form.address.data = corpus_file.address form.author.data = corpus_file.author form.booktitle.data = corpus_file.booktitle form.chapter.data = corpus_file.chapter form.editor.data = corpus_file.editor form.institution.data = corpus_file.institution form.journal.data = corpus_file.journal form.pages.data = corpus_file.pages form.publisher.data = corpus_file.publisher form.publishing_year.data = corpus_file.publishing_year form.school.data = corpus_file.school form.title.data = corpus_file.title return render_template( 'corpora/corpus_file.html.j2', corpus=corpus_file.corpus, corpus_file=corpus_file, form=form, title='Edit corpus file' ) @bp.route('//files/add', methods=['GET', 'POST']) @login_required def add_corpus_file(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.user == current_user or current_user.is_administrator()): abort(403) form = AddCorpusFileForm(prefix='add-corpus-file-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) # Save the file corpus_file = CorpusFile( address=form.address.data, author=form.author.data, booktitle=form.booktitle.data, chapter=form.chapter.data, corpus=corpus, editor=form.editor.data, filename=form.vrt.data.filename, institution=form.institution.data, journal=form.journal.data, mimetype='application/vrt+xml', pages=form.pages.data, publisher=form.publisher.data, publishing_year=form.publishing_year.data, school=form.school.data, title=form.title.data ) db.session.add(corpus_file) db.session.flush(objects=[corpus_file]) db.session.refresh(corpus_file) try: form.vrt.data.save(corpus_file.path) except OSError as e: current_app.logger.error(e) db.session.rollback() flash('Internal Server Error', category='error') return make_response({'redirect_url': url_for('.add_corpus_file', corpus_id=corpus.id)}, 500) # noqa corpus.status = CorpusStatus.UNPREPARED db.session.commit() flash(f'Corpus file "{corpus_file.filename}" added', category='corpus') return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa return render_template( 'corpora/add_corpus_file.html.j2', corpus=corpus, form=form, title='Add corpus file' ) @bp.route('//files//delete') @login_required def delete_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) flash( f'Corpus file "{corpus_file.filename}" marked for deletion', category='corpus' ) tasks.delete_corpus_file(corpus_file_id) return redirect(url_for('.corpus', corpus_id=corpus_id)) @bp.route('//files//download') @login_required def download_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.filter( CorpusFile.corpus_id == corpus_id, CorpusFile.id == corpus_file_id ).first_or_404() if not ( corpus_file.corpus.user == current_user or current_user.is_administrator() ): abort(403) return send_from_directory( as_attachment=True, attachment_filename=corpus_file.filename, directory=os.path.dirname(corpus_file.path), filename=os.path.basename(corpus_file.path) )