Restructure project

This commit is contained in:
Patrick Jentsch
2022-09-02 13:07:30 +02:00
parent ec9225b881
commit 8e1d94bb5d
73 changed files with 2105 additions and 2468 deletions

View File

@ -1,80 +1,67 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
from werkzeug.utils import secure_filename
from wtforms import (
StringField,
SubmitField,
ValidationError,
IntegerField
)
from wtforms.validators import DataRequired, InputRequired, Length
from wtforms import StringField, SubmitField, ValidationError, IntegerField
from wtforms.validators import InputRequired, Length
class AddCorpusFileForm(FlaskForm):
'''
Form to add a .vrt corpus file to the current corpus.
'''
# Required fields
author = StringField('Author', validators=[InputRequired(), Length(1, 255)])
publishing_year = IntegerField('Publishing year', validators=[InputRequired()])
title = StringField('Title', validators=[InputRequired(), Length(1, 255)])
vrt = FileField('File', validators=[FileRequired()])
# Optional fields
address = StringField('Adress', validators=[Length(0, 255)])
booktitle = StringField('Booktitle', validators=[Length(0, 255)])
chapter = StringField('Chapter', validators=[Length(0, 255)])
editor = StringField('Editor', validators=[Length(0, 255)])
institution = StringField('Institution', validators=[Length(0, 255)])
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
school = StringField('School', validators=[Length(0, 255)])
class CreateCorpusForm(FlaskForm):
description = StringField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
title = StringField('Title', validators=[InputRequired(), Length(max=32)])
submit = SubmitField()
class CorpusFileBaseForm(FlaskForm):
author = StringField(
'Author',
validators=[InputRequired(), Length(max=255)]
)
publishing_year = IntegerField(
'Publishing year',
validators=[InputRequired()]
)
title = StringField(
'Title',
validators=[InputRequired(), Length(max=255)]
)
address = StringField('Adress', validators=[Length(max=255)])
booktitle = StringField('Booktitle', validators=[Length(max=255)])
chapter = StringField('Chapter', validators=[Length(max=255)])
editor = StringField('Editor', validators=[Length(max=255)])
institution = StringField('Institution', validators=[Length(max=255)])
journal = StringField('Journal', validators=[Length(max=255)])
pages = StringField('Pages', validators=[Length(max=255)])
publisher = StringField('Publisher', validators=[Length(max=255)])
school = StringField('School', validators=[Length(max=255)])
submit = SubmitField()
class CreateCorpusFileForm(CorpusFileBaseForm):
vrt = FileField('File', validators=[FileRequired()])
def validate_vrt(self, field):
if not field.data.filename.lower().endswith('.vrt'):
raise ValidationError('VRT files only!')
class EditCorpusFileForm(FlaskForm):
'''
Form to edit meta data of one corpus file.
'''
# Required fields
author = StringField('Author', validators=[InputRequired(), Length(1, 255)])
publishing_year = IntegerField('Publishing year', validators=[InputRequired()])
title = StringField('Title', validators=[InputRequired(), Length(1, 255)])
# Optional fields
address = StringField('Adress', validators=[Length(0, 255)])
booktitle = StringField('Booktitle', validators=[Length(0, 255)])
chapter = StringField('Chapter', validators=[Length(0, 255)])
editor = StringField('Editor', validators=[Length(0, 255)])
institution = StringField('Institution', validators=[Length(0, 255)])
journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('Pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField()
class AddCorpusForm(FlaskForm):
'''
Form to add a a new corpus.
'''
description = StringField('Description', validators=[InputRequired(), Length(1, 255)])
title = StringField('Title', validators=[InputRequired(), Length(1, 32)])
submit = SubmitField()
class EditCorpusFileForm(CorpusFileBaseForm):
def prefill(self, corpus_file):
''' Pre-fill the form with data of an exististing corpus file '''
self.address.data = corpus_file.address
self.author.data = corpus_file.author
self.booktitle.data = corpus_file.booktitle
self.chapter.data = corpus_file.chapter
self.editor.data = corpus_file.editor
self.institution.data = corpus_file.institution
self.journal.data = corpus_file.journal
self.pages.data = corpus_file.pages
self.publisher.data = corpus_file.publisher
self.publishing_year.data = corpus_file.publishing_year
self.school.data = corpus_file.school
self.title.data = corpus_file.title
class ImportCorpusForm(FlaskForm):
'''
Form to import a corpus.
'''
description = StringField('Description', validators=[InputRequired(), Length(1, 255)])
archive = FileField('File', validators=[FileRequired()])
title = StringField('Title', validators=[InputRequired(), Length(1, 32)])
submit = SubmitField()
def validate_archive(self, field):
valid_mimetypes = ['application/zip', 'application/x-zip', 'application/x-zip-compressed']
if field.data.mimetype not in valid_mimetypes:
raise ValidationError('ZIP files only!')
pass

View File

@ -1,140 +1,44 @@
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from flask import (
abort,
current_app,
flash,
make_response,
Markup,
redirect,
render_template,
url_for,
send_from_directory
)
from flask_login import current_user, login_required
from . import bp
from . import tasks
from .forms import (
AddCorpusFileForm,
AddCorpusForm,
EditCorpusFileForm,
ImportCorpusForm
)
from threading import Thread
import os
import shutil
import tempfile
import xml.etree.ElementTree as ET
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from . import bp
from .forms import CreateCorpusFileForm, CreateCorpusForm, EditCorpusFileForm
@bp.route('/add', methods=['GET', 'POST'])
@bp.route('/create', methods=['GET', 'POST'])
@login_required
def add_corpus():
form = AddCorpusForm(prefix='add-corpus-form')
def create_corpus():
form = CreateCorpusForm(prefix='create-corpus-form')
if form.validate_on_submit():
corpus = Corpus(
user=current_user,
description=form.description.data,
title=form.title.data
)
db.session.add(corpus)
db.session.flush()
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', category='error')
corpus = Corpus.create(
title=form.title.data,
description=form.description.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
flash(f'Corpus "{corpus.title}" added', category='corpus')
return redirect(url_for('.corpus', corpus_id=corpus.id))
return render_template(
'corpora/add_corpus.html.j2',
form=form,
title='Add corpus'
)
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
form = ImportCorpusForm(prefix='import-corpus-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
corpus = Corpus(
user=current_user,
description=form.description.data,
title=form.title.data
message = Markup(
f'Corpus "<a href="{corpus.url}">{corpus.title}</a>" created'
)
db.session.add(corpus)
db.session.flush(objects=[corpus])
db.session.refresh(corpus)
try:
corpus.makedirs()
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', category='error')
return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa
# Save the uploaded zip file in a temporary directory
tmp_dir_base = os.path.join(current_app.config['NOPAQUE_DATA_DIR'], 'tmp') # noqa
with tempfile.TemporaryDirectory(dir=tmp_dir_base) as tmp_dir:
archive_file = os.path.join(tmp_dir, 'corpus.zip')
try:
form.archive.data.save(archive_file)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error1', category='error')
return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa
shutil.unpack_archive(archive_file, extract_dir=tmp_dir)
for vrt_filename in [x for x in os.listdir(tmp_dir) if x.endswith('.vrt')]:
vrt_file = os.path.join(tmp_dir, vrt_filename)
element_tree = ET.parse(vrt_file)
text_node = element_tree.find('text')
corpus_file = CorpusFile(
author=text_node.get('author'),
corpus=corpus,
filename=vrt_filename,
mimetype='application/vrt+xml',
publishing_year=int(text_node.get('publishing_year')),
title=text_node.get('title')
)
if 'address' not in text_node.attrib:
corpus_file.address = text_node.get('address')
if 'booktitle' not in text_node.attrib:
corpus_file.booktitle = text_node.get('booktitle')
if 'chapter' not in text_node.attrib:
corpus_file.chapter = text_node.get('chapter')
if 'editor' not in text_node.attrib:
corpus_file.editor = text_node.get('editor')
if 'institution' not in text_node.attrib:
corpus_file.institution = text_node.get('institution')
if 'journal' not in text_node.attrib:
corpus_file.journal = text_node.get('journal')
if 'pages' not in text_node.attrib:
corpus_file.pages = text_node.get('pages')
if 'publisher' not in text_node.attrib:
corpus_file.publisher = text_node.get('publisher')
if 'school' not in text_node.attrib:
corpus_file.school = text_node.get('school')
db.session.add(corpus_file)
db.session.flush(objects=[corpus_file])
db.session.refresh(corpus)
try:
shutil.copy2(vrt_file, corpus_file.path)
except Exception as e:
db.session.rollback()
flash('Internal Server Error2', category='error')
return make_response({'redirect_url': url_for('.import_corpus')}, 500) # noqa
db.session.commit()
flash(f'Corpus "{corpus.title}" imported', 'corpus')
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201)
flash(message, 'corpus')
return redirect(corpus.url)
return render_template(
'corpora/import_corpus.html.j2',
'corpora/create_corpus.html.j2',
form=form,
title='Import Corpus'
title='Create corpus'
)
@ -151,6 +55,26 @@ def corpus(corpus_id):
)
@bp.route('/<hashid:corpus_id>', methods=['DELETE'])
@login_required
def delete_corpus(corpus_id):
def _delete_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.delete()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_corpus,
args=(current_app._get_current_object(), corpus_id)
)
thread.start()
return {}, 202
@bp.route('/<hashid:corpus_id>/analyse')
@login_required
def analyse_corpus(corpus_id):
@ -162,95 +86,132 @@ def analyse_corpus(corpus_id):
)
@bp.route('/<hashid:corpus_id>/build')
@bp.route('/<hashid:corpus_id>/build', methods=['POST'])
@login_required
def build_corpus(corpus_id):
def _build_corpus(app, corpus_id):
with app.app_context():
corpus = Corpus.query.get(corpus_id)
corpus.build()
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
if corpus.files.all():
tasks.build_corpus(corpus_id)
flash(
f'Corpus "{corpus.title}" marked for building',
category='corpus'
)
else:
flash(
f'Can\'t build corpus "{corpus.title}": No corpus file(s)',
category='error'
)
return redirect(url_for('.corpus', corpus_id=corpus_id))
# Check if the corpus has corpus files
if not corpus.files.all():
response = {'errors': {'message': 'Corpus file(s) required'}}
return response, 409
thread = Thread(
target=_build_corpus,
args=(current_app._get_current_object(), corpus_id)
)
thread.start()
return {}, 202
@bp.route('/<hashid:corpus_id>/delete')
@bp.route('/<hashid:corpus_id>/files/create', methods=['GET', 'POST'])
@login_required
def delete_corpus(corpus_id):
def create_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
flash(f'Corpus "{corpus.title}" marked for deletion', 'corpus')
tasks.delete_corpus(corpus_id)
return redirect(url_for('main.dashboard'))
@bp.route('/<hashid:corpus_id>/export')
@login_required
def export_corpus(corpus_id):
abort(503)
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
as_attachment=True,
directory=os.path.join(corpus.user.path, 'corpora'),
filename=corpus.archive_file,
mimetype='zip'
form = CreateCorpusFileForm(prefix='create-corpus-file-form')
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
corpus_file = CorpusFile.create(
form.vrt.data,
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
editor=form.editor.data,
institution=form.institution.data,
journal=form.journal.data,
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data,
mimetype='application/vrt+xml',
corpus=corpus
)
except OSError:
abort(500)
corpus.status = CorpusStatus.UNPREPARED
db.session.commit()
message = Markup(
'Corpus file'
f'"<a href="{corpus_file.url}">{corpus_file.filename}</a>" added'
)
flash(message, category='corpus')
return {}, 201, {'Location': corpus.url}
return render_template(
'corpora/create_corpus_file.html.j2',
corpus=corpus,
form=form,
title='Add corpus file'
)
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['GET', 'POST']) # noqa
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>',
methods=['GET', 'POST'])
@login_required
def corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter(
CorpusFile.corpus_id == corpus_id,
CorpusFile.id == corpus_file_id
).first_or_404()
if not (
corpus_file.corpus.user == current_user
or current_user.is_administrator()
):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if corpus_file.corpus.id != corpus_id:
abort(404)
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
form = EditCorpusFileForm(prefix='edit-corpus-file-form')
if form.validate_on_submit():
corpus_file.address = form.address.data
corpus_file.author = form.author.data
corpus_file.booktitle = form.booktitle.data
corpus_file.chapter = form.chapter.data
corpus_file.editor = form.editor.data
corpus_file.institution = form.institution.data
corpus_file.journal = form.journal.data
corpus_file.pages = form.pages.data
corpus_file.publisher = form.publisher.data
corpus_file.publishing_year = form.publishing_year.data
corpus_file.school = form.school.data
corpus_file.title = form.title.data
corpus_file.corpus.status = CorpusStatus.UNPREPARED
has_changes = False
if corpus_file.address != form.address.data:
corpus_file.address = form.address.data
has_changes = True
if corpus_file.author != form.author.data:
corpus_file.author = form.author.data
has_changes = True
if corpus_file.booktitle != form.booktitle.data:
corpus_file.booktitle = form.booktitle.data
has_changes = True
if corpus_file.chapter != form.chapter.data:
corpus_file.chapter = form.chapter.data
has_changes = True
if corpus_file.editor != form.editor.data:
corpus_file.editor = form.editor.data
has_changes = True
if corpus_file.institution != form.institution.data:
corpus_file.institution = form.institution.data
has_changes = True
if corpus_file.journal != form.journal.data:
corpus_file.journal = form.journal.data
has_changes = True
if corpus_file.pages != form.pages.data:
corpus_file.pages = form.pages.data
has_changes = True
if corpus_file.publisher != form.publisher.data:
corpus_file.publisher = form.publisher.data
has_changes = True
if corpus_file.publishing_year != form.publishing_year.data:
corpus_file.publishing_year = form.publishing_year.data
has_changes = True
if corpus_file.school != form.school.data:
corpus_file.school = form.school.data
has_changes = True
if corpus_file.title != form.title.data:
corpus_file.title = form.title.data
has_changes = True
if has_changes:
corpus_file.corpus.status = CorpusStatus.UNPREPARED
db.session.commit()
flash(f'Corpus file "{corpus_file.filename}" edited', category='corpus') # noqa
return redirect(url_for('.corpus', corpus_id=corpus_id))
# If no form is submitted or valid, fill out fields with current values
form.address.data = corpus_file.address
form.author.data = corpus_file.author
form.booktitle.data = corpus_file.booktitle
form.chapter.data = corpus_file.chapter
form.editor.data = corpus_file.editor
form.institution.data = corpus_file.institution
form.journal.data = corpus_file.journal
form.pages.data = corpus_file.pages
form.publisher.data = corpus_file.publisher
form.publishing_year.data = corpus_file.publishing_year
form.school.data = corpus_file.school
form.title.data = corpus_file.title
message = Markup(f'Corpus file "<a href="{corpus_file.url}">{corpus_file.filename}</a>" updated')
flash(message, category='corpus')
return redirect(corpus_file.corpus.url)
form.prefill(corpus_file)
return render_template(
'corpora/corpus_file.html.j2',
corpus=corpus_file.corpus,
@ -260,91 +221,52 @@ def corpus_file(corpus_id, corpus_file_id):
)
@bp.route('/<hashid:corpus_id>/files/add', methods=['GET', 'POST'])
@login_required
def add_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
if not (corpus.user == current_user or current_user.is_administrator()):
abort(403)
form = AddCorpusFileForm(prefix='add-corpus-file-form')
if form.is_submitted():
if not form.validate():
return make_response(form.errors, 400)
# Save the file
corpus_file = CorpusFile(
address=form.address.data,
author=form.author.data,
booktitle=form.booktitle.data,
chapter=form.chapter.data,
corpus=corpus,
editor=form.editor.data,
filename=form.vrt.data.filename,
institution=form.institution.data,
journal=form.journal.data,
mimetype='application/vrt+xml',
pages=form.pages.data,
publisher=form.publisher.data,
publishing_year=form.publishing_year.data,
school=form.school.data,
title=form.title.data
)
db.session.add(corpus_file)
db.session.flush(objects=[corpus_file])
db.session.refresh(corpus_file)
try:
form.vrt.data.save(corpus_file.path)
except OSError as e:
current_app.logger.error(e)
db.session.rollback()
flash('Internal Server Error', category='error')
return make_response({'redirect_url': url_for('.add_corpus_file', corpus_id=corpus.id)}, 500) # noqa
corpus.status = CorpusStatus.UNPREPARED
db.session.commit()
flash(f'Corpus file "{corpus_file.filename}" added', category='corpus')
return make_response({'redirect_url': url_for('.corpus', corpus_id=corpus.id)}, 201) # noqa
return render_template(
'corpora/add_corpus_file.html.j2',
corpus=corpus,
form=form,
title='Add corpus file'
)
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/delete')
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>', methods=['DELETE'])
@login_required
def delete_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter(
CorpusFile.corpus_id == corpus_id,
CorpusFile.id == corpus_file_id
).first_or_404()
if not (
corpus_file.corpus.user == current_user
or current_user.is_administrator()
):
def _delete_corpus_file(app, corpus_file_id):
with app.app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
corpus_file.delete()
db.session.commit()
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if corpus_file.corpus.id != corpus_id:
abort(404)
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
flash(
f'Corpus file "{corpus_file.filename}" marked for deletion',
category='corpus'
thread = Thread(
target=_delete_corpus_file,
args=(current_app._get_current_object(), corpus_file_id)
)
tasks.delete_corpus_file(corpus_file_id)
return redirect(url_for('.corpus', corpus_id=corpus_id))
thread.start()
return {}, 202
@bp.route('/<hashid:corpus_id>/files/<hashid:corpus_file_id>/download')
@login_required
def download_corpus_file(corpus_id, corpus_file_id):
corpus_file = CorpusFile.query.filter(
CorpusFile.corpus_id == corpus_id,
CorpusFile.id == corpus_file_id
).first_or_404()
if not (
corpus_file.corpus.user == current_user
or current_user.is_administrator()
):
corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
if corpus_file.corpus.id != corpus_id:
abort(404)
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
os.path.dirname(corpus_file.path),
os.path.basename(corpus_file.path),
as_attachment=True,
attachment_filename=corpus_file.filename,
directory=os.path.dirname(corpus_file.path),
filename=os.path.basename(corpus_file.path)
mimetype=corpus_file.mimetype
)
@bp.route('/import', methods=['GET', 'POST'])
@login_required
def import_corpus():
abort(503)
@bp.route('/<hashid:corpus_id>/export')
@login_required
def export_corpus(corpus_id):
abort(503)

View File

@ -1,34 +0,0 @@
from app import db
from app.decorators import background
from app.models import Corpus, CorpusFile
@background
def build_corpus(corpus_id, *args, **kwargs):
app = kwargs['app']
with app.app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
raise Exception(f'Corpus {corpus_id} not found')
corpus.build()
db.session.commit()
@background
def delete_corpus(corpus_id, *args, **kwargs):
with kwargs['app'].app_context():
corpus = Corpus.query.get(corpus_id)
if corpus is None:
raise Exception(f'Corpus {corpus_id} not found')
corpus.delete()
db.session.commit()
@background
def delete_corpus_file(corpus_file_id, *args, **kwargs):
with kwargs['app'].app_context():
corpus_file = CorpusFile.query.get(corpus_file_id)
if corpus_file is None:
raise Exception(f'Corpus file {corpus_file_id} not found')
corpus_file.delete()
db.session.commit()