From 5864aa653e4f6f5ea107d4b582b661a4d67f6b0c Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Tue, 29 Nov 2022 15:27:50 +0100
Subject: [PATCH 01/64] standardize db col names and some preliminary work for
public corpora
---
app/models.py | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/app/models.py b/app/models.py
index c607dc5f..96d012b2 100644
--- a/app/models.py
+++ b/app/models.py
@@ -532,7 +532,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
- shared = db.Column(db.Boolean, default=False)
+ is_public = db.Column(db.Boolean, default=False)
# Backrefs: user: User
@property
@@ -576,7 +576,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
- model.shared = True
+ model.is_public = True
model.title = m['title']
model.version = m['version']
continue
@@ -587,7 +587,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
- shared=True,
+ is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version']
@@ -629,7 +629,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
- 'shared': self.shared,
+ 'is_public': self.is_public,
'title': self.title,
'version': self.version,
**self.file_mixin_to_json_serializeable()
@@ -656,7 +656,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
pipeline_name = db.Column(db.String(64))
- shared = db.Column(db.Boolean, default=False)
+ is_public = db.Column(db.Boolean, default=False)
# Backrefs: user: User
@property
@@ -700,7 +700,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
- model.shared = True
+ model.is_public = True
model.title = m['title']
model.version = m['version']
model.pipeline_name = m['pipeline_name']
@@ -712,7 +712,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
- shared=True,
+ is_public=True,
title=m['title'],
user=nopaque_user,
version=m['version'],
@@ -756,7 +756,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
'pipeline_name': self.pipeline_name,
- 'shared': self.shared,
+ 'is_public': self.is_public,
'title': self.title,
'version': self.version,
**self.file_mixin_to_json_serializeable()
@@ -1024,6 +1024,7 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
# Fields
author = db.Column(db.String(255))
+ description = db.Column(db.String(255))
publishing_year = db.Column(db.Integer)
title = db.Column(db.String(255))
address = db.Column(db.String(255))
From 5491cb184ae8bab63945767c824662f36894b8d1 Mon Sep 17 00:00:00 2001
From: Patrick Jentsch
Date: Tue, 29 Nov 2022 15:28:10 +0100
Subject: [PATCH 02/64] add all files for last commit
---
app/contributions/forms.py | 2 -
app/contributions/routes.py | 8 +--
app/corpora/forms.py | 38 ++++++++++--
app/corpora/routes.py | 61 +++++++++++++------
app/templates/_scripts.html.j2 | 4 +-
app/templates/corpora/create_corpus.html.j2 | 19 ++----
.../corpora/create_corpus_file.html.j2 | 1 +
migrations/versions/f2656133df2f_.py | 41 +++++++++++++
8 files changed, 130 insertions(+), 44 deletions(-)
create mode 100644 migrations/versions/f2656133df2f_.py
diff --git a/app/contributions/forms.py b/app/contributions/forms.py
index 0ba8f5d5..eb25babb 100644
--- a/app/contributions/forms.py
+++ b/app/contributions/forms.py
@@ -55,7 +55,6 @@ class CreateTesseractOCRPipelineModelForm(ContributionBaseForm):
)
def validate_tesseract_model_file(self, field):
- current_app.logger.warning(field.data.filename)
if not field.data.filename.lower().endswith('.traineddata'):
raise ValidationError('traineddata files only!')
@@ -80,7 +79,6 @@ class CreateSpaCyNLPPipelineModelForm(ContributionBaseForm):
)
def validate_spacy_model_file(self, field):
- current_app.logger.warning(field.data.filename)
if not field.data.filename.lower().endswith('.tar.gz'):
raise ValidationError('.tar.gz files only!')
diff --git a/app/contributions/routes.py b/app/contributions/routes.py
index 40b21203..3bc37eb8 100644
--- a/app/contributions/routes.py
+++ b/app/contributions/routes.py
@@ -104,7 +104,7 @@ def create_tesseract_ocr_pipeline_model():
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
- shared=False,
+ is_public=False,
title=form.title.data,
version=form.version.data,
user=current_user
@@ -131,7 +131,7 @@ def toggle_tesseract_ocr_pipeline_model_public_status(tesseract_ocr_pipeline_mod
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (tesseract_ocr_pipeline_model.user == current_user or current_user.is_administrator()):
abort(403)
- tesseract_ocr_pipeline_model.shared = not tesseract_ocr_pipeline_model.shared
+ tesseract_ocr_pipeline_model.is_public = not tesseract_ocr_pipeline_model.is_public
db.session.commit()
return {}, 201
@@ -201,7 +201,7 @@ def create_spacy_nlp_pipeline_model():
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
- shared=False,
+ is_public=False,
title=form.title.data,
version=form.version.data,
user=current_user
@@ -228,6 +228,6 @@ def toggle_spacy_nlp_pipeline_model_public_status(spacy_nlp_pipeline_model_id):
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (spacy_nlp_pipeline_model.user == current_user or current_user.is_administrator()):
abort(403)
- spacy_nlp_pipeline_model.shared = not spacy_nlp_pipeline_model.shared
+ spacy_nlp_pipeline_model.is_public = not spacy_nlp_pipeline_model.is_public
db.session.commit()
return {}, 201
diff --git a/app/corpora/forms.py b/app/corpora/forms.py
index db46b0ad..12ec1d9c 100644
--- a/app/corpora/forms.py
+++ b/app/corpora/forms.py
@@ -1,11 +1,17 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
-from wtforms import StringField, SubmitField, ValidationError, IntegerField
+from wtforms import (
+ StringField,
+ SubmitField,
+ TextAreaField,
+ ValidationError,
+ IntegerField
+)
from wtforms.validators import InputRequired, Length
-class CreateCorpusForm(FlaskForm):
- description = StringField(
+class CorpusBaseForm(FlaskForm):
+ description = TextAreaField(
'Description',
validators=[InputRequired(), Length(max=255)]
)
@@ -13,6 +19,20 @@ class CreateCorpusForm(FlaskForm):
submit = SubmitField()
+class CreateCorpusForm(CorpusBaseForm):
+ def __init__(self, *args, **kwargs):
+ if 'prefix' not in kwargs:
+ kwargs['prefix'] = 'create-corpus-form'
+ super().__init__(*args, **kwargs)
+
+
+class UpdateCorpusForm(CorpusBaseForm):
+ def __init__(self, *args, **kwargs):
+ if 'prefix' not in kwargs:
+ kwargs['prefix'] = 'update-corpus-form'
+ super().__init__(*args, **kwargs)
+
+
class CorpusFileBaseForm(FlaskForm):
author = StringField(
'Author',
@@ -41,13 +61,21 @@ class CorpusFileBaseForm(FlaskForm):
class CreateCorpusFileForm(CorpusFileBaseForm):
vrt = FileField('File', validators=[FileRequired()])
+ def __init__(self, *args, **kwargs):
+ if 'prefix' not in kwargs:
+ kwargs['prefix'] = 'create-corpus-file-form'
+ super().__init__(*args, **kwargs)
+
def validate_vrt(self, field):
if not field.data.filename.lower().endswith('.vrt'):
raise ValidationError('VRT files only!')
-class EditCorpusFileForm(CorpusFileBaseForm):
- pass
+class UpdateCorpusFileForm(CorpusFileBaseForm):
+ def __init__(self, *args, **kwargs):
+ if 'prefix' not in kwargs:
+ kwargs['prefix'] = 'update-corpus-file-form'
+ super().__init__(*args, **kwargs)
class ImportCorpusForm(FlaskForm):
diff --git a/app/corpora/routes.py b/app/corpora/routes.py
index 2e9047c3..a2d14af7 100644
--- a/app/corpora/routes.py
+++ b/app/corpora/routes.py
@@ -13,7 +13,26 @@ import os
from app import db
from app.models import Corpus, CorpusFile, CorpusStatus
from . import bp
-from .forms import CreateCorpusFileForm, CreateCorpusForm, EditCorpusFileForm
+from .forms import CreateCorpusFileForm, CreateCorpusForm, UpdateCorpusFileForm
+
+
+def user_can_read_corpus(user, corpus):
+ return corpus.user == user or user.is_administrator() or corpus.is_public
+
+
+def user_can_update_corpus(user, corpus):
+ return corpus.user == user or user.is_administrator()
+
+
+def user_can_delete_corpus(user, corpus):
+ return user_can_update_corpus(user, corpus)
+
+
+@bp.route('')
+@login_required
+def corpora():
+ corpora = Corpus.query.filter(Corpus.user_id == current_user.id | Corpus.is_public == True).all()
+ return render_template('corpora/corpora.html', corpora=corpora)
@bp.route('/create', methods=['GET', 'POST'])
@@ -46,7 +65,7 @@ def create_corpus():
@login_required
def corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
- if not (corpus.user == current_user or current_user.is_administrator()):
+ if not user_can_read_corpus(current_user, corpus):
abort(403)
return render_template(
'corpora/corpus.html.j2',
@@ -55,6 +74,19 @@ def corpus(corpus_id):
)
+# @bp.route('//update')
+# @login_required
+# def update_corpus(corpus_id):
+# corpus = Corpus.query.get_or_404(corpus_id)
+# if not user_can_update_corpus(current_user, corpus):
+# abort(403)
+# return render_template(
+# 'corpora/update_corpus.html.j2',
+# corpus=corpus,
+# title='Corpus'
+# )
+
+
@bp.route('/', methods=['DELETE'])
@login_required
def delete_corpus(corpus_id):
@@ -65,7 +97,7 @@ def delete_corpus(corpus_id):
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
- if not (corpus.user == current_user or current_user.is_administrator()):
+ if not user_can_delete_corpus(current_user, corpus):
abort(403)
thread = Thread(
target=_delete_corpus,
@@ -79,6 +111,8 @@ def delete_corpus(corpus_id):
@login_required
def analyse_corpus(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
+ if not user_can_read_corpus(current_user, corpus):
+ abort(403)
return render_template(
'corpora/analyse_corpus.html.j2',
corpus=corpus,
@@ -96,7 +130,7 @@ def build_corpus(corpus_id):
db.session.commit()
corpus = Corpus.query.get_or_404(corpus_id)
- if not (corpus.user == current_user or current_user.is_administrator()):
+ if not user_can_update_corpus(current_user, corpus):
abort(403)
# Check if the corpus has corpus files
if not corpus.files.all():
@@ -114,7 +148,7 @@ def build_corpus(corpus_id):
@login_required
def create_corpus_file(corpus_id):
corpus = Corpus.query.get_or_404(corpus_id)
- if not (corpus.user == current_user or current_user.is_administrator()):
+ if not user_can_update_corpus(current_user, corpus):
abort(403)
form = CreateCorpusFileForm(prefix='create-corpus-file-form')
if form.is_submitted():
@@ -157,16 +191,13 @@ def create_corpus_file(corpus_id):
)
-@bp.route('//files/',
- methods=['GET', 'POST'])
+@bp.route('//files/', methods=['GET', 'POST'])
@login_required
def corpus_file(corpus_id, corpus_file_id):
- corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
- if corpus_file.corpus.id != corpus_id:
- abort(404)
+ corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
- form = EditCorpusFileForm(
+ form = UpdateCorpusFileForm(
data=corpus_file.to_json_serializeable(),
prefix='edit-corpus-file-form'
)
@@ -196,9 +227,7 @@ def delete_corpus_file(corpus_id, corpus_file_id):
corpus_file.delete()
db.session.commit()
- corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
- if corpus_file.corpus.id != corpus_id:
- abort(404)
+ corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
@@ -212,9 +241,7 @@ def delete_corpus_file(corpus_id, corpus_file_id):
@bp.route('//files//download')
@login_required
def download_corpus_file(corpus_id, corpus_file_id):
- corpus_file = CorpusFile.query.get_or_404(corpus_file_id)
- if corpus_file.corpus.id != corpus_id:
- abort(404)
+ corpus_file = CorpusFile.query.filter_by(corpus_id = corpus_id, id=corpus_file_id).first_or_404()
if not (corpus_file.corpus.user == current_user or current_user.is_administrator()):
abort(403)
return send_from_directory(
diff --git a/app/templates/_scripts.html.j2 b/app/templates/_scripts.html.j2
index 323c576c..7cc8a8f8 100644
--- a/app/templates/_scripts.html.j2
+++ b/app/templates/_scripts.html.j2
@@ -46,13 +46,13 @@
}
// Set the data-length attribute on inputs with the maxlength attribute
- for (let inputElement of document.querySelectorAll('input[maxlength]')) {
+ for (let inputElement of document.querySelectorAll('input[maxlength], textarea[maxlength]')) {
inputElement.dataset.length = inputElement.getAttribute('maxlength');
}
// Initialize components
M.AutoInit();
- M.CharacterCounter.init(document.querySelectorAll('input[data-length][type="text"], input[data-length][type="email"], input[data-length][type="search"], input[data-length][type="password"], input[data-length][type="tel"], input[data-length][type="url"]'));
+ M.CharacterCounter.init(document.querySelectorAll('input[data-length][type="text"], input[data-length][type="email"], input[data-length][type="search"], input[data-length][type="password"], input[data-length][type="tel"], input[data-length][type="url"], textarea[data-length]'));
M.Dropdown.init(
document.querySelectorAll('#nav-more-dropdown-trigger'),
{alignment: 'right', constrainWidth: false, coverTrigger: false}
diff --git a/app/templates/corpora/create_corpus.html.j2 b/app/templates/corpora/create_corpus.html.j2
index 50cbc6e7..ccb49877 100644
--- a/app/templates/corpora/create_corpus.html.j2
+++ b/app/templates/corpora/create_corpus.html.j2
@@ -9,28 +9,19 @@
{{ title }}
+
Here you can create a new corpus, just choose a title and a description which will help to identify it later. After the corpus has been created, annotated texts in verticalized text format can be added to it on the corpus overview page.
-
-
Fill out the following form to add a corpus to your corpora.