from flask import (abort, flash, make_response, redirect, request, render_template, url_for, send_from_directory) from flask_login import current_user, login_required from . import corpora from . import tasks from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm, EditCorpusFileForm, QueryDownloadForm, QueryForm, DisplayOptionsForm, InspectDisplayOptionsForm, ImportCorpusForm) from jsonschema import validate from .. import db from ..models import Corpus, CorpusFile, QueryResult import json import logging import os import shutil import glob import xml.etree.ElementTree as ET from zipfile import ZipFile from .import_corpus import check_zip_contents @corpora.route('/add', methods=['GET', 'POST']) @login_required def add_corpus(): form = AddCorpusForm() if form.validate_on_submit(): corpus = Corpus(creator=current_user, description=form.description.data, title=form.title.data) db.session.add(corpus) db.session.commit() try: os.makedirs(corpus.path) except OSError: logging.error('Make dir {} led to an OSError!'.format(corpus.path)) db.session.delete(corpus) db.session.commit() abort(500) flash('Corpus "{}" added!'.format(corpus.title), 'corpus') return redirect(url_for('.corpus', corpus_id=corpus.id)) return render_template('corpora/add_corpus.html.j2', form=form, title='Add corpus') @corpora.route('/import', methods=['GET', 'POST']) @login_required def import_corpus(): form = ImportCorpusForm() if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) corpus = Corpus(creator=current_user, description=form.description.data, title=form.title.data) db.session.add(corpus) db.session.commit() try: os.makedirs(corpus.path) except OSError: logging.error('Make dir {} led to an OSError!'.format(corpus.path)) db.session.delete(corpus) db.session.commit() flash('Internal Server Error', 'error') return make_response( {'redirect_url': url_for('.import_corpus')}, 500) # Upload zip archive_file = os.path.join(corpus.path, form.file.data.filename) form.file.data.save(archive_file) # Some checks to verify it is a valid exported corpus with ZipFile(archive_file, 'r') as zip: contents = zip.namelist() if set(check_zip_contents).issubset(contents): # Unzip shutil.unpack_archive(archive_file, corpus.path) # Register vrt files to corpus vrts = glob.glob(corpus.path + '/*.vrt') for file in vrts: element_tree = ET.parse(file) text_node = element_tree.find('text') corpus_file = CorpusFile( address=text_node.get('address', 'NULL'), author=text_node.get('author', 'NULL'), booktitle=text_node.get('booktitle', 'NULL'), chapter=text_node.get('chapter', 'NULL'), corpus=corpus, editor=text_node.get('editor', 'NULL'), filename=os.path.basename(file), institution=text_node.get('institution', 'NULL'), journal=text_node.get('journal', 'NULL'), pages=text_node.get('pages', 'NULL'), publisher=text_node.get('publisher', 'NULL'), publishing_year=text_node.get('publishing_year', ''), school=text_node.get('school', 'NULL'), title=text_node.get('title', 'NULL') ) db.session.add(corpus_file) # finish import and redirect to imported corpus corpus.status = 'prepared' db.session.commit() os.remove(archive_file) flash('Corpus "{}" imported!'.format(corpus.title), 'corpus') return make_response( {'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) else: # If imported zip is not valid delete corpus and give feedback flash('Can not import corpus "{}" not imported: Invalid archive file!', 'error') # noqa tasks.delete_corpus(corpus.id) return make_response( {'redirect_url': url_for('.import_corpus')}, 201) return render_template('corpora/import_corpus.html.j2', form=form, title='Import Corpus') @corpora.route('/') @login_required def corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) corpus_files = [corpus_file.to_dict() for corpus_file in corpus.files] return render_template('corpora/corpus.html.j2', corpus=corpus, corpus_files=corpus_files, title='Corpus') @corpora.route('//download') @login_required def download_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) # TODO: Check what happens here dir = os.path.dirname(corpus.archive_file) filename = os.path.basename(corpus.archive_file) return send_from_directory(as_attachment=True, directory=dir, filename=filename, mimetype='zip') @corpora.route('//analyse') @login_required def analyse_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if corpus.status == 'prepared': corpus.status = 'start analysis' db.session.commit() display_options_form = DisplayOptionsForm( prefix='display-options-form', result_context=request.args.get('context', 20), results_per_page=request.args.get('results_per_page', 30) ) query_form = QueryForm(prefix='query-form', query=request.args.get('query')) query_download_form = QueryDownloadForm(prefix='query-download-form') inspect_display_options_form = InspectDisplayOptionsForm( prefix='inspect-display-options-form') return render_template( 'corpora/analyse_corpus.html.j2', corpus=corpus, display_options_form=display_options_form, inspect_display_options_form=inspect_display_options_form, query_form=query_form, query_download_form=query_download_form, title='Corpus analysis' ) @corpora.route('//delete') @login_required def delete_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) flash('Corpus "{}" marked for deletion!'.format(corpus.title), 'corpus') tasks.delete_corpus(corpus_id) return redirect(url_for('main.dashboard')) @corpora.route('//files/add', methods=['GET', 'POST']) @login_required def add_corpus_file(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) form = AddCorpusFileForm(corpus, prefix='add-corpus-file-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) # Save the file form.file.data.save(os.path.join(corpus.path, form.file.data.filename)) corpus_file = CorpusFile(address=form.address.data, author=form.author.data, booktitle=form.booktitle.data, chapter=form.chapter.data, corpus=corpus, editor=form.editor.data, filename=form.file.data.filename, institution=form.institution.data, journal=form.journal.data, pages=form.pages.data, publisher=form.publisher.data, publishing_year=form.publishing_year.data, school=form.school.data, title=form.title.data) db.session.add(corpus_file) corpus.status = 'unprepared' db.session.commit() flash('Corpus file "{}" added!'.format(corpus_file.filename), 'corpus') return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa return render_template('corpora/add_corpus_file.html.j2', corpus=corpus, form=form, title='Add corpus file') @corpora.route('//files//delete') @login_required def delete_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if not corpus_file.corpus_id == corpus_id: abort(404) if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) flash('Corpus file "{}" marked for deletion!'.format(corpus_file.filename), 'corpus') # noqa tasks.delete_corpus_file(corpus_file_id) return redirect(url_for('.corpus', corpus_id=corpus_id)) @corpora.route('//files//download') @login_required def download_corpus_file(corpus_id, corpus_file_id): corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if not corpus_file.corpus_id == corpus_id: abort(404) if not (corpus_file.corpus.creator == current_user or current_user.is_administrator()): abort(403) return send_from_directory(as_attachment=True, directory=os.path.dirname(corpus_file.path), filename=corpus_file.filename) @corpora.route('//files/', methods=['GET', 'POST']) @login_required def corpus_file(corpus_id, corpus_file_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) corpus_file = CorpusFile.query.get_or_404(corpus_file_id) if corpus_file.corpus != corpus: abort(404) form = EditCorpusFileForm(prefix='edit-corpus-file-form') if form.validate_on_submit(): corpus_file.address = form.address.data corpus_file.author = form.author.data corpus_file.booktitle = form.booktitle.data corpus_file.chapter = form.chapter.data corpus_file.editor = form.editor.data corpus_file.institution = form.institution.data corpus_file.journal = form.journal.data corpus_file.pages = form.pages.data corpus_file.publisher = form.publisher.data corpus_file.publishing_year = form.publishing_year.data corpus_file.school = form.school.data corpus_file.title = form.title.data corpus.status = 'unprepared' db.session.commit() flash('Corpus file "{}" edited!'.format(corpus_file.filename), 'corpus') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) # If no form is submitted or valid, fill out fields with current values form.address.data = corpus_file.address form.author.data = corpus_file.author form.booktitle.data = corpus_file.booktitle form.chapter.data = corpus_file.chapter form.editor.data = corpus_file.editor form.institution.data = corpus_file.institution form.journal.data = corpus_file.journal form.pages.data = corpus_file.pages form.publisher.data = corpus_file.publisher form.publishing_year.data = corpus_file.publishing_year form.school.data = corpus_file.school form.title.data = corpus_file.title return render_template('corpora/corpus_file.html.j2', corpus=corpus, corpus_file=corpus_file, form=form, title='Edit corpus file') @corpora.route('//prepare') @login_required def prepare_corpus(corpus_id): corpus = Corpus.query.get_or_404(corpus_id) if not (corpus.creator == current_user or current_user.is_administrator()): abort(403) if corpus.files.all(): tasks.build_corpus(corpus_id) flash('Corpus "{}" has been marked to get build!'.format(corpus.title), 'corpus') # noqa else: flash('Can not build corpus "{}": No corpus file(s)!'.format(corpus.title), 'error') # noqa return redirect(url_for('.corpus', corpus_id=corpus_id)) # Following are view functions to add, view etc. exported results. @corpora.route('/result/add', methods=['GET', 'POST']) @login_required def add_query_result(): ''' View to import a result as a json file. ''' form = AddQueryResultForm(prefix='add-query-result-form') if form.is_submitted(): if not form.validate(): return make_response(form.errors, 400) query_result = QueryResult(creator=current_user, description=form.description.data, filename=form.file.data.filename, title=form.title.data) db.session.add(query_result) db.session.commit() try: os.makedirs(query_result.path) except OSError: logging.error('Make dir {} led to an OSError!'.format(query_result.path)) # noqa db.session.delete(query_result) db.session.commit() flash('Internal Server Error', 'error') return make_response( {'redirect_url': url_for('.add_query_result')}, 500) # save the uploaded file query_result_file_path = os.path.join(query_result.path, query_result.filename) form.file.data.save(query_result_file_path) # parse json from file with open(query_result_file_path, 'r') as file: query_result_file_content = json.load(file) # parse json schema with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', 'r') as file: # noqa schema = json.load(file) try: # validate imported json file validate(instance=query_result_file_content, schema=schema) except Exception: tasks.delete_query_result(query_result.id) flash('Uploaded file is invalid', 'result') return make_response( {'redirect_url': url_for('.add_query_result')}, 201) query_result_file_content.pop('matches') query_result_file_content.pop('cpos_lookup') query_result.query_metadata = query_result_file_content db.session.commit() flash('Query result added!', 'result') return make_response({'redirect_url': url_for('.query_result', query_result_id=query_result.id)}, 201) # noqa return render_template('corpora/query_results/add_query_result.html.j2', form=form, title='Add query result') @corpora.route('/result/') @login_required def query_result(query_result_id): query_result = QueryResult.query.get_or_404(query_result_id) if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) return render_template('corpora/query_results/query_result.html.j2', query_result=query_result, title='Query result') @corpora.route('/result//inspect') @login_required def inspect_query_result(query_result_id): ''' View to inspect imported result file in a corpus analysis like interface ''' query_result = QueryResult.query.get_or_404(query_result_id) query_metadata = query_result.query_metadata if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) display_options_form = DisplayOptionsForm( prefix='display-options-form', results_per_page=request.args.get('results_per_page', 30), result_context=request.args.get('context', 20) ) inspect_display_options_form = InspectDisplayOptionsForm( prefix='inspect-display-options-form' ) with open(query_result.path, 'r') as query_result_file: query_result_file_content = json.load(query_result_file) return render_template('corpora/query_results/inspect.html.j2', query_result=query_result, display_options_form=display_options_form, inspect_display_options_form=inspect_display_options_form, # noqa query_result_file_content=query_result_file_content, query_metadata=query_metadata, title='Inspect query result') @corpora.route('/result//delete') @login_required def delete_query_result(query_result_id): query_result = QueryResult.query.get_or_404(query_result_id) if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) flash('Query result "{}" has been marked for deletion!'.format(query_result), 'result') # noqa tasks.delete_query_result(query_result_id) return redirect(url_for('services.service', service="corpus_analysis")) @corpora.route('/result//download') @login_required def download_query_result(query_result_id): query_result = QueryResult.query.get_or_404(query_result_id) if not (query_result.creator == current_user or current_user.is_administrator()): abort(403) return send_from_directory(as_attachment=True, directory=os.path.dirname(query_result.path), filename=query_result.filename)