From 05340ea7ff0a282660529df9d50f8641b711a40f Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Mon, 7 Nov 2022 09:15:38 +0100 Subject: [PATCH] Contribution Package Spacy NLP --- app/SpaCyNLPPipelineModel.defaults.yml | 56 ++++++++ app/contributions/forms.py | 43 ++++-- app/contributions/routes.py | 112 +++++++++++++++- app/models.py | 17 ++- .../js/RessourceLists/SpacyNLPModelList.js | 76 +++++++++++ .../RessourceLists/TesseractOCRModelList.js | 5 +- app/templates/_scripts.html.j2 | 1 + .../contributions/_breadcrumbs.html.j2 | 13 ++ .../contributions/contribute.html.j2 | 32 ----- .../contribute_spacy_nlp_models.html.j2 | 124 ++++++++++++++++++ .../contribution_overview.html.j2 | 58 +++++++- .../spacy_nlp_pipeline_model.html.j2 | 56 ++++++++ migrations/versions/721829b5dd25_.py | 28 ++++ 13 files changed, 564 insertions(+), 57 deletions(-) create mode 100644 app/static/js/RessourceLists/SpacyNLPModelList.js delete mode 100644 app/templates/contributions/contribute.html.j2 create mode 100644 app/templates/contributions/contribute_spacy_nlp_models.html.j2 create mode 100644 app/templates/contributions/spacy_nlp_pipeline_model.html.j2 create mode 100644 migrations/versions/721829b5dd25_.py diff --git a/app/SpaCyNLPPipelineModel.defaults.yml b/app/SpaCyNLPPipelineModel.defaults.yml index 576f85e4..ed4ea3bd 100644 --- a/app/SpaCyNLPPipelineModel.defaults.yml +++ b/app/SpaCyNLPPipelineModel.defaults.yml @@ -5,6 +5,62 @@ publisher_url: 'https://github.com/explosion' publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' publishing_year: 2022 + pipeline_name: 'de_core_news_md' version: '3.4.0' compatible_service_versions: - '0.1.0' +- title: 'en_core_web_md-3.4.1' + description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1' + publishing_year: 2022 + pipeline_name: 'en_core_web_md' + version: '3.4.1' + compatible_service_versions: + - '0.1.0' +- title: 'uk_core_news_md-3.4.0' + description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'uk_core_news_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'zh_core_web_md-3.4.0' + description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' + url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'zh_core_web_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'ru_core_news_md-3.4.0' + description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'ru_core_news_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'la_core_cltk_sm-0.1.0' + description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz' + publisher: 'DIY Classics' + publisher_url: 'https://github.com/diyclassics/' + publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm' + publishing_year: 2022 + pipeline_name: 'la_core_cltk_sm' + version: '0.1.0' + compatible_service_versions: + - '0.1.0' diff --git a/app/contributions/forms.py b/app/contributions/forms.py index 8577ee97..dcdfaea8 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -46,6 +46,18 @@ class CreateContributionBaseForm(FlaskForm): ) submit = SubmitField() +class EditForm(CreateContributionBaseForm): + def prefill(self, model_file): + ''' Pre-fill the form with data of an exististing corpus file ''' + self.title.data = model_file.title + self.description.data = model_file.description + self.publisher.data = model_file.publisher + self.publishing_year.data = model_file.publishing_year + self.publisher_url.data = model_file.publisher_url + self.publishing_url.data = model_file.publishing_url + self.version.data = model_file.version + self.shared.data = model_file.shared + class TesseractOCRModelContributionForm(CreateContributionBaseForm): tesseract_model_file = FileField( 'File', @@ -67,16 +79,23 @@ class TesseractOCRModelContributionForm(CreateContributionBaseForm): ] self.compatible_service_versions.default = '' -class TesseractOCRModelEditForm(CreateContributionBaseForm): - def prefill(self, model_file): - ''' Pre-fill the form with data of an exististing corpus file ''' - self.title.data = model_file.title - self.description.data = model_file.description - self.publisher.data = model_file.publisher - self.publishing_year.data = model_file.publishing_year - self.publisher_url.data = model_file.publisher_url - self.publishing_url.data = model_file.publishing_url - self.version.data = model_file.version - self.shared.data = model_file.shared - +class SpacyNLPModelContributionForm(CreateContributionBaseForm): + spacy_model_file = FileField( + 'File', + validators=[FileRequired()] + ) + compatible_service_versions = SelectMultipleField( + 'Compatible service versions' + ) + def validate_spacy(self, field): + if field.data.mimetype != '.tar.gz': + raise ValidationError('.tar.gz files only!') + def __init__(self, *args, **kwargs): + service_manifest = SERVICES['spacy-nlp-pipeline'] + super().__init__(*args, **kwargs) + self.compatible_service_versions.choices = [('', 'Choose your option')] + self.compatible_service_versions.choices += [ + (x, x) for x in service_manifest['versions'].keys() + ] + self.compatible_service_versions.default = '' diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 385e1eec..b63a43ec 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -1,11 +1,11 @@ -from flask import abort, current_app, flash, Markup, redirect, render_template, url_for +from flask import abort, current_app, flash, Markup, render_template, url_for from flask_login import login_required, current_user from threading import Thread from app import db from app.decorators import admin_required, permission_required -from app.models import TesseractOCRPipelineModel, Permission +from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel from . import bp -from .forms import TesseractOCRModelContributionForm, TesseractOCRModelEditForm +from .forms import TesseractOCRModelContributionForm, EditForm, SpacyNLPModelContributionForm @bp.before_request @@ -22,20 +22,26 @@ def contributions(): tesseract_ocr_user_models = [ x for x in current_user.tesseract_ocr_pipeline_models ] + spacy_nlp_user_models = [ + x for x in current_user.spacy_nlp_pipeline_models + ] + spacy_models = SpaCyNLPPipelineModel.query.all() + print(spacy_models) return render_template( 'contributions/contribution_overview.html.j2', - tesseractOCRUserModels=tesseract_ocr_user_models, + tesseract_ocr_user_models=tesseract_ocr_user_models, + spacy_nlp_user_models=spacy_nlp_user_models, userId = current_user.hashid, title='Contribution Overview' ) -@bp.route('/', methods=['GET', 'POST']) +@bp.route('/edit-tesseract-model/', methods=['GET', 'POST']) @login_required def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( tesseract_ocr_pipeline_model_id ) - form = TesseractOCRModelEditForm(prefix='tesseract-ocr-model-edit-form') + form = EditForm(prefix='tesseract-ocr-model-edit-form') if form.validate_on_submit(): if tesseract_ocr_pipeline_model.title != form.title.data: tesseract_ocr_pipeline_model.title = form.title.data @@ -65,7 +71,7 @@ def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): title='Edit your Tesseract OCR model' ) -@bp.route('/', methods=['DELETE']) +@bp.route('/edit-tesseract-model/', methods=['DELETE']) @login_required def delete_tesseract_model(tesseract_ocr_pipeline_model_id): def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id): @@ -123,3 +129,95 @@ def add_tesseract_ocr_pipeline_model(): tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, title='Tesseract OCR Model Contribution' ) + +@bp.route('/edit-spacy-model//', methods=['GET', 'POST']) +@login_required +def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404( + spacy_nlp_pipeline_model_id + ) + form = EditForm(prefix='spacy-nlp-model-edit-form') + if form.validate_on_submit(): + if spacy_nlp_pipeline_model.title != form.title.data: + spacy_nlp_pipeline_model.title = form.title.data + if spacy_nlp_pipeline_model.description != form.description.data: + spacy_nlp_pipeline_model.description = form.description.data + if spacy_nlp_pipeline_model.publisher != form.publisher.data: + spacy_nlp_pipeline_model.publisher = form.publisher.data + if spacy_nlp_pipeline_model.publishing_year != form.publishing_year.data: + spacy_nlp_pipeline_model.publishing_year = form.publishing_year.data + if spacy_nlp_pipeline_model.publisher_url != form.publisher_url.data: + spacy_nlp_pipeline_model.publisher_url = form.publisher_url.data + if spacy_nlp_pipeline_model.publishing_url != form.publishing_url.data: + spacy_nlp_pipeline_model.publishing_url = form.publishing_url.data + if spacy_nlp_pipeline_model.version != form.version.data: + spacy_nlp_pipeline_model.version = form.version.data + if spacy_nlp_pipeline_model.shared != form.shared.data: + spacy_nlp_pipeline_model.shared = form.shared.data + db.session.commit() + message = Markup(f'Model "{spacy_nlp_pipeline_model.title}" updated') + flash(message, category='corpus') + return {}, 201, {'Location': url_for('contributions.contributions')} + form.prefill(spacy_nlp_pipeline_model) + return render_template( + 'contributions/spacy_nlp_pipeline_model.html.j2', + spacy_nlp_pipeline_model=spacy_nlp_pipeline_model, + form=form, + title='Edit your spaCy NLP model' + ) + +@bp.route('/edit-spacy-model/', methods=['DELETE']) +@login_required +def delete_spacy_model(spacy_nlp_pipeline_model_id): + def _delete_spacy_model(app, spacy_nlp_pipeline_model_id): + with app.app_context(): + model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id) + model.delete() + db.session.commit() + + model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) + if not (model.user == current_user or current_user.is_administrator()): + abort(403) + thread = Thread( + target=_delete_spacy_model, + args=(current_app._get_current_object(), spacy_nlp_pipeline_model_id) + ) + thread.start() + return {}, 202 + +@bp.route('/add-spacy-nlp-pipeline-model', methods=['GET', 'POST']) +def add_spacy_nlp_pipeline_model(): + form = SpacyNLPModelContributionForm(prefix='contribute-spacy-nlp-pipeline-model-form') + if form.is_submitted(): + if not form.validate(): + response = {'errors': form.errors} + return response, 400 + try: + spacy_nlp_model = SpaCyNLPPipelineModel.create( + form.spacy_model_file.data, + compatible_service_versions=form.compatible_service_versions.data, + description=form.description.data, + publisher=form.publisher.data, + publisher_url=form.publisher_url.data, + publishing_url=form.publishing_url.data, + publishing_year=form.publishing_year.data, + shared=form.shared.data, + title=form.title.data, + version=form.version.data, + user=current_user + ) + except OSError: + abort(500) + db.session.commit() + message = Markup(f'Model "{spacy_nlp_model.title}" created') + flash(message) + return {}, 201, {'Location': url_for('contributions.contributions')} + spacy_nlp_pipeline_models = [ + x for x in SpaCyNLPPipelineModel.query.all() + ] + return render_template( + 'contributions/contribute_spacy_nlp_models.html.j2', + form=form, + spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, + title='spaCy NLP Model Contribution' + ) diff --git a/app/models.py b/app/models.py index e1acf6de..90d16f48 100644 --- a/app/models.py +++ b/app/models.py @@ -520,6 +520,10 @@ class User(HashidMixin, UserMixin, db.Model): x.hashid: x.to_json(relationships=True) for x in self.tesseract_ocr_pipeline_models } + _json['spacy_nlp_pipeline_models'] = { + x.hashid: x.to_json(relationships=True) + for x in self.spacy_nlp_pipeline_models + } return _json class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): @@ -643,6 +647,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): publisher_url = db.Column(db.String(512)) publishing_url = db.Column(db.String(512)) publishing_year = db.Column(db.Integer) + pipeline_name = db.Column(db.String(64)) shared = db.Column(db.Boolean, default=False) # Backrefs: user: User @@ -675,6 +680,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): model.shared = True model.title = m['title'] model.version = m['version'] + model.pipeline_name = m['pipeline_name'] continue model = SpaCyNLPPipelineModel( compatible_service_versions=m['compatible_service_versions'], @@ -686,7 +692,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): shared=True, title=m['title'], user=nopaque_user, - version=m['version'] + version=m['version'], + pipeline_name=m['pipeline_name'] ) db.session.add(model) db.session.flush(objects=[model]) @@ -708,6 +715,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): f.write(chunk) pbar.close() db.session.commit() + + def delete(self): + try: + os.remove(self.path) + except OSError as e: + current_app.logger.error(e) + db.session.delete(self) def to_json(self, backrefs=False, relationships=False): _json = { @@ -718,6 +732,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): 'publisher_url': self.publisher_url, 'publishing_url': self.publishing_url, 'publishing_year': self.publishing_year, + 'pipeline_name': self.pipeline_name, 'shared': self.shared, 'title': self.title, **self.file_mixin_to_json() diff --git a/app/static/js/RessourceLists/SpacyNLPModelList.js b/app/static/js/RessourceLists/SpacyNLPModelList.js new file mode 100644 index 00000000..0e20191b --- /dev/null +++ b/app/static/js/RessourceLists/SpacyNLPModelList.js @@ -0,0 +1,76 @@ +class SpacyNLPModelList { + constructor () { + + this.elements = { + spacyNLPModelList: document.querySelector('#spacy-nlp-model-list'), + deleteButtons: document.querySelectorAll('.delete-spacy-model-button'), + editButtons: document.querySelectorAll('.edit-spacy-model-button'), + + } + } + + init () { + let userId = this.elements.spacyNLPModelList.dataset.userId; + + for (let deleteButton of this.elements.deleteButtons) { + deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); + } + + for (let editButton of this.elements.editButtons) { + editButton.addEventListener('click', () => {this.editModel(editButton);}); + } + } + + deleteModel(deleteButton, userId) { + return new Promise((resolve, reject) => { + let modelId = deleteButton.dataset.modelId; + let model = app.data.users[userId].spacy_nlp_pipeline_models[modelId]; + let modalElement = Utils.elementFromString( + ` + + ` + ); + document.querySelector('#modals').appendChild(modalElement); + let modal = M.Modal.init( + modalElement, + { + dismissible: false, + onCloseEnd: () => { + modal.destroy(); + modalElement.remove(); + } + } + ); + let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); + confirmElement.addEventListener('click', (event) => { + let modelTitle = model.title; + fetch(`/contributions/edit-spacy-model/${modelId}`, {method: 'DELETE'}) + .then( + (response) => { + app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); + resolve(response); + }, + (response) => { + if (response.status === 403) {app.flash('Forbidden', 'error');} + if (response.status === 404) {app.flash('Not Found', 'error');} + reject(response); + } + ); + }); + modal.open(); + }); + } + + editModel(editButton) { + window.location.href = `/contributions/edit-spacy-model/${editButton.dataset.modelId}`; + } +} diff --git a/app/static/js/RessourceLists/TesseractOCRModelList.js b/app/static/js/RessourceLists/TesseractOCRModelList.js index 9080447e..782f5d7e 100644 --- a/app/static/js/RessourceLists/TesseractOCRModelList.js +++ b/app/static/js/RessourceLists/TesseractOCRModelList.js @@ -25,7 +25,6 @@ class TesseractOCRModelList { return new Promise((resolve, reject) => { let modelId = deleteButton.dataset.modelId; let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId]; - let modalElement = Utils.elementFromString( ` + {# spaCy NLP Models #} +
+

My spaCy NLP Pipeline Models

+

Here you can see and edit the models that you have created. You can also create new models.

+ +
+
+
+
+
+ + + + + + + + + + + {% if spacy_nlp_user_models|length > 0 %} + {% for m in spacy_nlp_user_models %} + + + {% if m.description == '' %} + + {% else %} + + {% endif %} + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
TitleDescriptionBiblio
{{ m.title }}Description is not available.{{ m.description }}{{ m.publisher }} ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, {{ m.publishing_url }} + delete + edit +
No models available.
+
+
+ +
+
+
+
@@ -71,5 +123,7 @@ {% endblock scripts %} diff --git a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 new file mode 100644 index 00000000..04d7506f --- /dev/null +++ b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 @@ -0,0 +1,56 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} + +{% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
+
+
+

{{ title }}

+
+ +
+
+
+
+ {{ form.hidden_tag() }} +
+
+ {{ wtf.render_field(form.title, material_icon='title') }} +
+
+ {{ wtf.render_field(form.description, material_icon='description') }} +
+
+ {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
+
+ {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
+
+ {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
+
+ {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
+
+ {{ wtf.render_field(form.version, material_icon='apps') }} +
+
+

+
+ {{ wtf.render_field(form.shared) }} +
+
+
+
+ {{ wtf.render_field(form.submit, material_icon='send') }} +
+
+
+
+
+
+{% endblock page_content %} diff --git a/migrations/versions/721829b5dd25_.py b/migrations/versions/721829b5dd25_.py new file mode 100644 index 00000000..124ca07d --- /dev/null +++ b/migrations/versions/721829b5dd25_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: 721829b5dd25 +Revises: 31dd42e5ea6f +Create Date: 2022-11-04 13:58:13.008301 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '721829b5dd25' +down_revision = '31dd42e5ea6f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('spacy_nlp_pipeline_models', sa.Column('pipeline_name', sa.String(length=64), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('spacy_nlp_pipeline_models', 'pipeline_name') + # ### end Alembic commands ###