From 46ba14b923e8e062b2dfd89b7b08486d07596a51 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Thu, 3 Nov 2022 15:38:35 +0100 Subject: [PATCH 01/29] Contribution Package Tesseract OCR --- app/contributions/forms.py | 40 ++++-- app/contributions/routes.py | 95 ++++++++++++-- app/models.py | 14 +- app/static/js/Forms/CreateContributionForm.js | 18 +++ app/static/js/Forms/Form.js | 1 + .../RessourceLists/TesseractOCRModelList.js | 77 +++++++++++ app/templates/_scripts.html.j2 | 2 + .../contributions/_breadcrumbs.html.j2 | 18 +++ .../contribute_tesseract_ocr_models.html.j2 | 124 ++++++++++++++++++ .../contribution_overview.html.j2 | 75 +++++++++++ .../tesseract_ocr_pipeline_model.html.j2 | 56 ++++++++ 11 files changed, 495 insertions(+), 25 deletions(-) create mode 100644 app/static/js/Forms/CreateContributionForm.js create mode 100644 app/static/js/RessourceLists/TesseractOCRModelList.js create mode 100644 app/templates/contributions/_breadcrumbs.html.j2 create mode 100644 app/templates/contributions/contribute_tesseract_ocr_models.html.j2 create mode 100644 app/templates/contributions/contribution_overview.html.j2 create mode 100644 app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 diff --git a/app/contributions/forms.py b/app/contributions/forms.py index 44279a1d..8577ee97 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -1,3 +1,4 @@ +from xml.dom import ValidationErr from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( @@ -5,13 +6,13 @@ from wtforms import ( StringField, SubmitField, SelectMultipleField, - IntegerField + IntegerField, + ValidationError ) from wtforms.validators import InputRequired, Length from app.services import SERVICES - -class TesseractOCRModelContributionForm(FlaskForm): +class CreateContributionBaseForm(FlaskForm): title = StringField( 'Title', validators=[InputRequired(), Length(max=64)] @@ -24,9 +25,6 @@ class TesseractOCRModelContributionForm(FlaskForm): 'Version', validators=[InputRequired(), Length(max=16)] ) - compatible_service_versions = SelectMultipleField( - 'Compatible service versions' - ) publisher = StringField( 'Publisher', validators=[InputRequired(), Length(max=128)] @@ -43,10 +41,22 @@ class TesseractOCRModelContributionForm(FlaskForm): 'Publishing year', validators=[InputRequired()] ) - shared = BooleanField('Shared', validators=[InputRequired()]) - model_file = FileField('File',validators=[FileRequired()]) + shared = BooleanField( + 'Shared' + ) submit = SubmitField() +class TesseractOCRModelContributionForm(CreateContributionBaseForm): + tesseract_model_file = FileField( + 'File', + validators=[FileRequired()] + ) + compatible_service_versions = SelectMultipleField( + 'Compatible service versions' + ) + def validate_traineddata(self, field): + if field.data.mimetype != '.traineddata': + raise ValidationError('traineddata files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['tesseract-ocr-pipeline'] @@ -56,3 +66,17 @@ class TesseractOCRModelContributionForm(FlaskForm): (x, x) for x in service_manifest['versions'].keys() ] self.compatible_service_versions.default = '' + +class TesseractOCRModelEditForm(CreateContributionBaseForm): + def prefill(self, model_file): + ''' Pre-fill the form with data of an exististing corpus file ''' + self.title.data = model_file.title + self.description.data = model_file.description + self.publisher.data = model_file.publisher + self.publishing_year.data = model_file.publishing_year + self.publisher_url.data = model_file.publisher_url + self.publishing_url.data = model_file.publishing_url + self.version.data = model_file.version + self.shared.data = model_file.shared + + diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 287eda18..385e1eec 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -1,10 +1,11 @@ -from flask import abort, flash, Markup, render_template, url_for -from flask_login import login_required +from flask import abort, current_app, flash, Markup, redirect, render_template, url_for +from flask_login import login_required, current_user +from threading import Thread from app import db -from app.decorators import permission_required +from app.decorators import admin_required, permission_required from app.models import TesseractOCRPipelineModel, Permission from . import bp -from .forms import TesseractOCRModelContributionForm +from .forms import TesseractOCRModelContributionForm, TesseractOCRModelEditForm @bp.before_request @@ -14,13 +15,77 @@ def before_request(): pass -@bp.route('') +@bp.route('/') +@login_required +@admin_required def contributions(): - pass + tesseract_ocr_user_models = [ + x for x in current_user.tesseract_ocr_pipeline_models + ] + return render_template( + 'contributions/contribution_overview.html.j2', + tesseractOCRUserModels=tesseract_ocr_user_models, + userId = current_user.hashid, + title='Contribution Overview' + ) +@bp.route('/', methods=['GET', 'POST']) +@login_required +def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( + tesseract_ocr_pipeline_model_id + ) + form = TesseractOCRModelEditForm(prefix='tesseract-ocr-model-edit-form') + if form.validate_on_submit(): + if tesseract_ocr_pipeline_model.title != form.title.data: + tesseract_ocr_pipeline_model.title = form.title.data + if tesseract_ocr_pipeline_model.description != form.description.data: + tesseract_ocr_pipeline_model.description = form.description.data + if tesseract_ocr_pipeline_model.publisher != form.publisher.data: + tesseract_ocr_pipeline_model.publisher = form.publisher.data + if tesseract_ocr_pipeline_model.publishing_year != form.publishing_year.data: + tesseract_ocr_pipeline_model.publishing_year = form.publishing_year.data + if tesseract_ocr_pipeline_model.publisher_url != form.publisher_url.data: + tesseract_ocr_pipeline_model.publisher_url = form.publisher_url.data + if tesseract_ocr_pipeline_model.publishing_url != form.publishing_url.data: + tesseract_ocr_pipeline_model.publishing_url = form.publishing_url.data + if tesseract_ocr_pipeline_model.version != form.version.data: + tesseract_ocr_pipeline_model.version = form.version.data + if tesseract_ocr_pipeline_model.shared != form.shared.data: + tesseract_ocr_pipeline_model.shared = form.shared.data + db.session.commit() + message = Markup(f'Model "{tesseract_ocr_pipeline_model.title}" updated') + flash(message, category='corpus') + return {}, 201, {'Location': url_for('contributions.contributions')} + form.prefill(tesseract_ocr_pipeline_model) + return render_template( + 'contributions/tesseract_ocr_pipeline_model.html.j2', + tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model, + form=form, + title='Edit your Tesseract OCR model' + ) -@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST']) -def tesseract_ocr_pipeline_models(): +@bp.route('/', methods=['DELETE']) +@login_required +def delete_tesseract_model(tesseract_ocr_pipeline_model_id): + def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id): + with app.app_context(): + model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id) + model.delete() + db.session.commit() + + model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) + if not (model.user == current_user or current_user.is_administrator()): + abort(403) + thread = Thread( + target=_delete_tesseract_model, + args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id) + ) + thread.start() + return {}, 202 + +@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST']) +def add_tesseract_ocr_pipeline_model(): form = TesseractOCRModelContributionForm( prefix='contribute-tesseract-ocr-pipeline-model-form' ) @@ -30,7 +95,7 @@ def tesseract_ocr_pipeline_models(): return response, 400 try: tesseract_ocr_model = TesseractOCRPipelineModel.create( - form.file.data, + form.tesseract_model_file.data, compatible_service_versions=form.compatible_service_versions.data, description=form.description.data, publisher=form.publisher.data, @@ -39,7 +104,8 @@ def tesseract_ocr_pipeline_models(): publishing_year=form.publishing_year.data, shared=form.shared.data, title=form.title.data, - version=form.version.data + version=form.version.data, + user=current_user ) except OSError: abort(500) @@ -47,8 +113,13 @@ def tesseract_ocr_pipeline_models(): message = Markup(f'Model "{tesseract_ocr_model.title}" created') flash(message) return {}, 201, {'Location': url_for('contributions.contributions')} + tesseract_ocr_pipeline_models = [ + x for x in TesseractOCRPipelineModel.query.all() + ] + return render_template( - 'contributions/contribute.html.j2', + 'contributions/contribute_tesseract_ocr_models.html.j2', form=form, - title='Contribution' + tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, + title='Tesseract OCR Model Contribution' ) diff --git a/app/models.py b/app/models.py index cc5d60ce..e1acf6de 100644 --- a/app/models.py +++ b/app/models.py @@ -603,6 +603,13 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): pbar.close() db.session.commit() + def delete(self): + try: + os.remove(self.path) + except OSError as e: + current_app.logger.error(e) + db.session.delete(self) + def to_json(self, backrefs=False, relationships=False): _json = { 'id': self.hashid, @@ -1023,11 +1030,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model): def delete(self): try: os.remove(self.path) - except OSError: - current_app.logger.error( - f'Removing {self.path} led to an OSError!' - ) - pass + except OSError as e: + current_app.logger.error(e) db.session.delete(self) self.corpus.status = CorpusStatus.UNPREPARED diff --git a/app/static/js/Forms/CreateContributionForm.js b/app/static/js/Forms/CreateContributionForm.js new file mode 100644 index 00000000..e7651ab0 --- /dev/null +++ b/app/static/js/Forms/CreateContributionForm.js @@ -0,0 +1,18 @@ +class CreateContributionForm extends Form { + static autoInit() { + let createContributionFormElements = document.querySelectorAll('.create-contribution-form'); + for (let createContributionFormElement of createContributionFormElements) { + new CreateContributionForm(createContributionFormElement); + } + } + + constructor(formElement) { + super(formElement); + + this.addEventListener('requestLoad', (event) => { + if (event.target.status === 201) { + window.location.href = event.target.getResponseHeader('Location'); + } + }); + } +} diff --git a/app/static/js/Forms/Form.js b/app/static/js/Forms/Form.js index 9a21e986..d93f3e2c 100644 --- a/app/static/js/Forms/Form.js +++ b/app/static/js/Forms/Form.js @@ -1,5 +1,6 @@ class Form { static autoInit() { + CreateContributionForm.autoInit(); CreateCorpusFileForm.autoInit(); CreateJobForm.autoInit(); } diff --git a/app/static/js/RessourceLists/TesseractOCRModelList.js b/app/static/js/RessourceLists/TesseractOCRModelList.js new file mode 100644 index 00000000..9080447e --- /dev/null +++ b/app/static/js/RessourceLists/TesseractOCRModelList.js @@ -0,0 +1,77 @@ +class TesseractOCRModelList { + constructor () { + + this.elements = { + tesseractOCRModelList: document.querySelector('#tesseract-ocr-model-list'), + deleteButtons: document.querySelectorAll('.delete-button'), + editButtons: document.querySelectorAll('.edit-button'), + + } + } + + init () { + let userId = this.elements.tesseractOCRModelList.dataset.userId; + + for (let deleteButton of this.elements.deleteButtons) { + deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); + } + + for (let editButton of this.elements.editButtons) { + editButton.addEventListener('click', () => {this.editModel(editButton);}); + } + } + + deleteModel(deleteButton, userId) { + return new Promise((resolve, reject) => { + let modelId = deleteButton.dataset.modelId; + let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId]; + + let modalElement = Utils.elementFromString( + ` + + ` + ); + document.querySelector('#modals').appendChild(modalElement); + let modal = M.Modal.init( + modalElement, + { + dismissible: false, + onCloseEnd: () => { + modal.destroy(); + modalElement.remove(); + } + } + ); + let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); + confirmElement.addEventListener('click', (event) => { + let modelTitle = model.title; + fetch(`/contributions/${modelId}`, {method: 'DELETE'}) + .then( + (response) => { + app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); + resolve(response); + }, + (response) => { + if (response.status === 403) {app.flash('Forbidden', 'error');} + if (response.status === 404) {app.flash('Not Found', 'error');} + reject(response); + } + ); + }); + modal.open(); + }); + } + + editModel(editButton) { + window.location.href = `/contributions/${editButton.dataset.modelId}`; + } +} diff --git a/app/templates/_scripts.html.j2 b/app/templates/_scripts.html.j2 index ccc32a05..3b93ef66 100644 --- a/app/templates/_scripts.html.j2 +++ b/app/templates/_scripts.html.j2 @@ -9,6 +9,7 @@ 'js/Forms/Form.js', 'js/Forms/CreateCorpusFileForm.js', 'js/Forms/CreateJobForm.js', + 'js/Forms/CreateContributionForm.js', 'js/CorpusAnalysis/CQiClient.js', 'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/CorpusAnalysis/CorpusAnalysisConcordance.js', @@ -24,6 +25,7 @@ 'js/RessourceLists/JobInputList.js', 'js/RessourceLists/JobResultList.js', 'js/RessourceLists/QueryResultList.js', + 'js/RessourceLists/TesseractOCRModelList.js', 'js/RessourceLists/UserList.js' %} diff --git a/app/templates/contributions/_breadcrumbs.html.j2 b/app/templates/contributions/_breadcrumbs.html.j2 new file mode 100644 index 00000000..9d49da68 --- /dev/null +++ b/app/templates/contributions/_breadcrumbs.html.j2 @@ -0,0 +1,18 @@ +{% set breadcrumbs %} +
  • navigate_next
  • +{% if request.path == url_for('.contributions') %} +
  • Contributions Overview
  • +{% elif request.path == url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • + + Edit {{ tesseract_ocr_pipeline_model.title }} + +
  • +{% elif request.path == url_for('.add_tesseract_ocr_pipeline_model, tesseract_ocr_pipeline_model=nn') %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • {{ title }}
  • +{% endif %} +{% endset %} diff --git a/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 new file mode 100644 index 00000000..d7c8bd41 --- /dev/null +++ b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 @@ -0,0 +1,124 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} + +{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +

     

    +

     

    + + + +
    +
    + +
    +
    +
    +
    +
    +
    + layersTesseract OCR Models +

    You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.

    +

    Information about the already existing models.

    +

    Edit already uploaded models

    +
    +
    +
    +
    +
    +
    + +
    +

    Add a model

    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.tesseract_model_file, accept='.traineddata', placeholder='Choose a .traineddata file') }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    + {{ wtf.render_field(form.compatible_service_versions) }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} + +{% block modals %} +{{ super() }} + +{% endblock modals %} diff --git a/app/templates/contributions/contribution_overview.html.j2 b/app/templates/contributions/contribution_overview.html.j2 new file mode 100644 index 00000000..6a1ebb1e --- /dev/null +++ b/app/templates/contributions/contribution_overview.html.j2 @@ -0,0 +1,75 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    + + {# Tesseract OCR Models #} +
    +

    My Tesseract OCR Pipeline Models

    +

    Here you can see and edit the models that you have created. You can also create new models.

    + +
    +
    +
    +
    +
    + + + + + + + + + + + {% if tesseractOCRUserModels|length > 0 %} + {% for m in tesseractOCRUserModels %} + + + {% if m.description == '' %} + + {% else %} + + {% endif %} + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
    TitleDescriptionBiblio
    {{ m.title }}Description is not available.{{ m.description }}{{ m.publisher }} ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, {{ m.publishing_url }} + delete + edit +
    No models available.
    +
    +
    + +
    +
    +
    +
    + +
    +
    +
    +{% endblock page_content %} + +{% block scripts %} +{{ super() }} + +{% endblock scripts %} diff --git a/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 b/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 new file mode 100644 index 00000000..4db82349 --- /dev/null +++ b/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 @@ -0,0 +1,56 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} From 05340ea7ff0a282660529df9d50f8641b711a40f Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Mon, 7 Nov 2022 09:15:38 +0100 Subject: [PATCH 02/29] Contribution Package Spacy NLP --- app/SpaCyNLPPipelineModel.defaults.yml | 56 ++++++++ app/contributions/forms.py | 43 ++++-- app/contributions/routes.py | 112 +++++++++++++++- app/models.py | 17 ++- .../js/RessourceLists/SpacyNLPModelList.js | 76 +++++++++++ .../RessourceLists/TesseractOCRModelList.js | 5 +- app/templates/_scripts.html.j2 | 1 + .../contributions/_breadcrumbs.html.j2 | 13 ++ .../contributions/contribute.html.j2 | 32 ----- .../contribute_spacy_nlp_models.html.j2 | 124 ++++++++++++++++++ .../contribution_overview.html.j2 | 58 +++++++- .../spacy_nlp_pipeline_model.html.j2 | 56 ++++++++ migrations/versions/721829b5dd25_.py | 28 ++++ 13 files changed, 564 insertions(+), 57 deletions(-) create mode 100644 app/static/js/RessourceLists/SpacyNLPModelList.js delete mode 100644 app/templates/contributions/contribute.html.j2 create mode 100644 app/templates/contributions/contribute_spacy_nlp_models.html.j2 create mode 100644 app/templates/contributions/spacy_nlp_pipeline_model.html.j2 create mode 100644 migrations/versions/721829b5dd25_.py diff --git a/app/SpaCyNLPPipelineModel.defaults.yml b/app/SpaCyNLPPipelineModel.defaults.yml index 576f85e4..ed4ea3bd 100644 --- a/app/SpaCyNLPPipelineModel.defaults.yml +++ b/app/SpaCyNLPPipelineModel.defaults.yml @@ -5,6 +5,62 @@ publisher_url: 'https://github.com/explosion' publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' publishing_year: 2022 + pipeline_name: 'de_core_news_md' version: '3.4.0' compatible_service_versions: - '0.1.0' +- title: 'en_core_web_md-3.4.1' + description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1' + publishing_year: 2022 + pipeline_name: 'en_core_web_md' + version: '3.4.1' + compatible_service_versions: + - '0.1.0' +- title: 'uk_core_news_md-3.4.0' + description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'uk_core_news_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'zh_core_web_md-3.4.0' + description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' + url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'zh_core_web_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'ru_core_news_md-3.4.0' + description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0' + publishing_year: 2022 + pipeline_name: 'ru_core_news_md' + version: '3.4.0' + compatible_service_versions: + - '0.1.0' +- title: 'la_core_cltk_sm-0.1.0' + description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz' + publisher: 'DIY Classics' + publisher_url: 'https://github.com/diyclassics/' + publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm' + publishing_year: 2022 + pipeline_name: 'la_core_cltk_sm' + version: '0.1.0' + compatible_service_versions: + - '0.1.0' diff --git a/app/contributions/forms.py b/app/contributions/forms.py index 8577ee97..dcdfaea8 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -46,6 +46,18 @@ class CreateContributionBaseForm(FlaskForm): ) submit = SubmitField() +class EditForm(CreateContributionBaseForm): + def prefill(self, model_file): + ''' Pre-fill the form with data of an exististing corpus file ''' + self.title.data = model_file.title + self.description.data = model_file.description + self.publisher.data = model_file.publisher + self.publishing_year.data = model_file.publishing_year + self.publisher_url.data = model_file.publisher_url + self.publishing_url.data = model_file.publishing_url + self.version.data = model_file.version + self.shared.data = model_file.shared + class TesseractOCRModelContributionForm(CreateContributionBaseForm): tesseract_model_file = FileField( 'File', @@ -67,16 +79,23 @@ class TesseractOCRModelContributionForm(CreateContributionBaseForm): ] self.compatible_service_versions.default = '' -class TesseractOCRModelEditForm(CreateContributionBaseForm): - def prefill(self, model_file): - ''' Pre-fill the form with data of an exististing corpus file ''' - self.title.data = model_file.title - self.description.data = model_file.description - self.publisher.data = model_file.publisher - self.publishing_year.data = model_file.publishing_year - self.publisher_url.data = model_file.publisher_url - self.publishing_url.data = model_file.publishing_url - self.version.data = model_file.version - self.shared.data = model_file.shared - +class SpacyNLPModelContributionForm(CreateContributionBaseForm): + spacy_model_file = FileField( + 'File', + validators=[FileRequired()] + ) + compatible_service_versions = SelectMultipleField( + 'Compatible service versions' + ) + def validate_spacy(self, field): + if field.data.mimetype != '.tar.gz': + raise ValidationError('.tar.gz files only!') + def __init__(self, *args, **kwargs): + service_manifest = SERVICES['spacy-nlp-pipeline'] + super().__init__(*args, **kwargs) + self.compatible_service_versions.choices = [('', 'Choose your option')] + self.compatible_service_versions.choices += [ + (x, x) for x in service_manifest['versions'].keys() + ] + self.compatible_service_versions.default = '' diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 385e1eec..b63a43ec 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -1,11 +1,11 @@ -from flask import abort, current_app, flash, Markup, redirect, render_template, url_for +from flask import abort, current_app, flash, Markup, render_template, url_for from flask_login import login_required, current_user from threading import Thread from app import db from app.decorators import admin_required, permission_required -from app.models import TesseractOCRPipelineModel, Permission +from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel from . import bp -from .forms import TesseractOCRModelContributionForm, TesseractOCRModelEditForm +from .forms import TesseractOCRModelContributionForm, EditForm, SpacyNLPModelContributionForm @bp.before_request @@ -22,20 +22,26 @@ def contributions(): tesseract_ocr_user_models = [ x for x in current_user.tesseract_ocr_pipeline_models ] + spacy_nlp_user_models = [ + x for x in current_user.spacy_nlp_pipeline_models + ] + spacy_models = SpaCyNLPPipelineModel.query.all() + print(spacy_models) return render_template( 'contributions/contribution_overview.html.j2', - tesseractOCRUserModels=tesseract_ocr_user_models, + tesseract_ocr_user_models=tesseract_ocr_user_models, + spacy_nlp_user_models=spacy_nlp_user_models, userId = current_user.hashid, title='Contribution Overview' ) -@bp.route('/', methods=['GET', 'POST']) +@bp.route('/edit-tesseract-model/', methods=['GET', 'POST']) @login_required def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( tesseract_ocr_pipeline_model_id ) - form = TesseractOCRModelEditForm(prefix='tesseract-ocr-model-edit-form') + form = EditForm(prefix='tesseract-ocr-model-edit-form') if form.validate_on_submit(): if tesseract_ocr_pipeline_model.title != form.title.data: tesseract_ocr_pipeline_model.title = form.title.data @@ -65,7 +71,7 @@ def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): title='Edit your Tesseract OCR model' ) -@bp.route('/', methods=['DELETE']) +@bp.route('/edit-tesseract-model/', methods=['DELETE']) @login_required def delete_tesseract_model(tesseract_ocr_pipeline_model_id): def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id): @@ -123,3 +129,95 @@ def add_tesseract_ocr_pipeline_model(): tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, title='Tesseract OCR Model Contribution' ) + +@bp.route('/edit-spacy-model//', methods=['GET', 'POST']) +@login_required +def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404( + spacy_nlp_pipeline_model_id + ) + form = EditForm(prefix='spacy-nlp-model-edit-form') + if form.validate_on_submit(): + if spacy_nlp_pipeline_model.title != form.title.data: + spacy_nlp_pipeline_model.title = form.title.data + if spacy_nlp_pipeline_model.description != form.description.data: + spacy_nlp_pipeline_model.description = form.description.data + if spacy_nlp_pipeline_model.publisher != form.publisher.data: + spacy_nlp_pipeline_model.publisher = form.publisher.data + if spacy_nlp_pipeline_model.publishing_year != form.publishing_year.data: + spacy_nlp_pipeline_model.publishing_year = form.publishing_year.data + if spacy_nlp_pipeline_model.publisher_url != form.publisher_url.data: + spacy_nlp_pipeline_model.publisher_url = form.publisher_url.data + if spacy_nlp_pipeline_model.publishing_url != form.publishing_url.data: + spacy_nlp_pipeline_model.publishing_url = form.publishing_url.data + if spacy_nlp_pipeline_model.version != form.version.data: + spacy_nlp_pipeline_model.version = form.version.data + if spacy_nlp_pipeline_model.shared != form.shared.data: + spacy_nlp_pipeline_model.shared = form.shared.data + db.session.commit() + message = Markup(f'Model "{spacy_nlp_pipeline_model.title}" updated') + flash(message, category='corpus') + return {}, 201, {'Location': url_for('contributions.contributions')} + form.prefill(spacy_nlp_pipeline_model) + return render_template( + 'contributions/spacy_nlp_pipeline_model.html.j2', + spacy_nlp_pipeline_model=spacy_nlp_pipeline_model, + form=form, + title='Edit your spaCy NLP model' + ) + +@bp.route('/edit-spacy-model/', methods=['DELETE']) +@login_required +def delete_spacy_model(spacy_nlp_pipeline_model_id): + def _delete_spacy_model(app, spacy_nlp_pipeline_model_id): + with app.app_context(): + model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id) + model.delete() + db.session.commit() + + model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) + if not (model.user == current_user or current_user.is_administrator()): + abort(403) + thread = Thread( + target=_delete_spacy_model, + args=(current_app._get_current_object(), spacy_nlp_pipeline_model_id) + ) + thread.start() + return {}, 202 + +@bp.route('/add-spacy-nlp-pipeline-model', methods=['GET', 'POST']) +def add_spacy_nlp_pipeline_model(): + form = SpacyNLPModelContributionForm(prefix='contribute-spacy-nlp-pipeline-model-form') + if form.is_submitted(): + if not form.validate(): + response = {'errors': form.errors} + return response, 400 + try: + spacy_nlp_model = SpaCyNLPPipelineModel.create( + form.spacy_model_file.data, + compatible_service_versions=form.compatible_service_versions.data, + description=form.description.data, + publisher=form.publisher.data, + publisher_url=form.publisher_url.data, + publishing_url=form.publishing_url.data, + publishing_year=form.publishing_year.data, + shared=form.shared.data, + title=form.title.data, + version=form.version.data, + user=current_user + ) + except OSError: + abort(500) + db.session.commit() + message = Markup(f'Model "{spacy_nlp_model.title}" created') + flash(message) + return {}, 201, {'Location': url_for('contributions.contributions')} + spacy_nlp_pipeline_models = [ + x for x in SpaCyNLPPipelineModel.query.all() + ] + return render_template( + 'contributions/contribute_spacy_nlp_models.html.j2', + form=form, + spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, + title='spaCy NLP Model Contribution' + ) diff --git a/app/models.py b/app/models.py index e1acf6de..90d16f48 100644 --- a/app/models.py +++ b/app/models.py @@ -520,6 +520,10 @@ class User(HashidMixin, UserMixin, db.Model): x.hashid: x.to_json(relationships=True) for x in self.tesseract_ocr_pipeline_models } + _json['spacy_nlp_pipeline_models'] = { + x.hashid: x.to_json(relationships=True) + for x in self.spacy_nlp_pipeline_models + } return _json class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): @@ -643,6 +647,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): publisher_url = db.Column(db.String(512)) publishing_url = db.Column(db.String(512)) publishing_year = db.Column(db.Integer) + pipeline_name = db.Column(db.String(64)) shared = db.Column(db.Boolean, default=False) # Backrefs: user: User @@ -675,6 +680,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): model.shared = True model.title = m['title'] model.version = m['version'] + model.pipeline_name = m['pipeline_name'] continue model = SpaCyNLPPipelineModel( compatible_service_versions=m['compatible_service_versions'], @@ -686,7 +692,8 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): shared=True, title=m['title'], user=nopaque_user, - version=m['version'] + version=m['version'], + pipeline_name=m['pipeline_name'] ) db.session.add(model) db.session.flush(objects=[model]) @@ -708,6 +715,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): f.write(chunk) pbar.close() db.session.commit() + + def delete(self): + try: + os.remove(self.path) + except OSError as e: + current_app.logger.error(e) + db.session.delete(self) def to_json(self, backrefs=False, relationships=False): _json = { @@ -718,6 +732,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): 'publisher_url': self.publisher_url, 'publishing_url': self.publishing_url, 'publishing_year': self.publishing_year, + 'pipeline_name': self.pipeline_name, 'shared': self.shared, 'title': self.title, **self.file_mixin_to_json() diff --git a/app/static/js/RessourceLists/SpacyNLPModelList.js b/app/static/js/RessourceLists/SpacyNLPModelList.js new file mode 100644 index 00000000..0e20191b --- /dev/null +++ b/app/static/js/RessourceLists/SpacyNLPModelList.js @@ -0,0 +1,76 @@ +class SpacyNLPModelList { + constructor () { + + this.elements = { + spacyNLPModelList: document.querySelector('#spacy-nlp-model-list'), + deleteButtons: document.querySelectorAll('.delete-spacy-model-button'), + editButtons: document.querySelectorAll('.edit-spacy-model-button'), + + } + } + + init () { + let userId = this.elements.spacyNLPModelList.dataset.userId; + + for (let deleteButton of this.elements.deleteButtons) { + deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); + } + + for (let editButton of this.elements.editButtons) { + editButton.addEventListener('click', () => {this.editModel(editButton);}); + } + } + + deleteModel(deleteButton, userId) { + return new Promise((resolve, reject) => { + let modelId = deleteButton.dataset.modelId; + let model = app.data.users[userId].spacy_nlp_pipeline_models[modelId]; + let modalElement = Utils.elementFromString( + ` + + ` + ); + document.querySelector('#modals').appendChild(modalElement); + let modal = M.Modal.init( + modalElement, + { + dismissible: false, + onCloseEnd: () => { + modal.destroy(); + modalElement.remove(); + } + } + ); + let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); + confirmElement.addEventListener('click', (event) => { + let modelTitle = model.title; + fetch(`/contributions/edit-spacy-model/${modelId}`, {method: 'DELETE'}) + .then( + (response) => { + app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); + resolve(response); + }, + (response) => { + if (response.status === 403) {app.flash('Forbidden', 'error');} + if (response.status === 404) {app.flash('Not Found', 'error');} + reject(response); + } + ); + }); + modal.open(); + }); + } + + editModel(editButton) { + window.location.href = `/contributions/edit-spacy-model/${editButton.dataset.modelId}`; + } +} diff --git a/app/static/js/RessourceLists/TesseractOCRModelList.js b/app/static/js/RessourceLists/TesseractOCRModelList.js index 9080447e..782f5d7e 100644 --- a/app/static/js/RessourceLists/TesseractOCRModelList.js +++ b/app/static/js/RessourceLists/TesseractOCRModelList.js @@ -25,7 +25,6 @@ class TesseractOCRModelList { return new Promise((resolve, reject) => { let modelId = deleteButton.dataset.modelId; let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId]; - let modalElement = Utils.elementFromString( ` + {# spaCy NLP Models #} +
    +

    My spaCy NLP Pipeline Models

    +

    Here you can see and edit the models that you have created. You can also create new models.

    + +
    +
    +
    +
    +
    + + + + + + + + + + + {% if spacy_nlp_user_models|length > 0 %} + {% for m in spacy_nlp_user_models %} + + + {% if m.description == '' %} + + {% else %} + + {% endif %} + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
    TitleDescriptionBiblio
    {{ m.title }}Description is not available.{{ m.description }}{{ m.publisher }} ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, {{ m.publishing_url }} + delete + edit +
    No models available.
    +
    +
    + +
    +
    +
    +
    @@ -71,5 +123,7 @@ {% endblock scripts %} diff --git a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 new file mode 100644 index 00000000..04d7506f --- /dev/null +++ b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 @@ -0,0 +1,56 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} + +{% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} diff --git a/migrations/versions/721829b5dd25_.py b/migrations/versions/721829b5dd25_.py new file mode 100644 index 00000000..124ca07d --- /dev/null +++ b/migrations/versions/721829b5dd25_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: 721829b5dd25 +Revises: 31dd42e5ea6f +Create Date: 2022-11-04 13:58:13.008301 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '721829b5dd25' +down_revision = '31dd42e5ea6f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('spacy_nlp_pipeline_models', sa.Column('pipeline_name', sa.String(length=64), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('spacy_nlp_pipeline_models', 'pipeline_name') + # ### end Alembic commands ### From 033872718e12ee29904160d6114b8dafda1ab6a1 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Tue, 8 Nov 2022 14:11:57 +0100 Subject: [PATCH 03/29] Small changes in the contribution package --- app/SpaCyNLPPipelineModel.defaults.yml | 191 ++++++++++++++---- app/contributions/forms.py | 12 +- app/services/forms.py | 14 +- app/services/routes.py | 5 +- .../contributions/_breadcrumbs.html.j2 | 26 +-- .../contribute_spacy_nlp_models.html.j2 | 2 +- .../contribute_tesseract_ocr_models.html.j2 | 2 +- .../spacy_nlp_pipeline_model.html.j2 | 2 +- .../services/spacy_nlp_pipeline.html.j2 | 43 +++- .../services/tesseract_ocr_pipeline.html.j2 | 3 +- 10 files changed, 230 insertions(+), 70 deletions(-) diff --git a/app/SpaCyNLPPipelineModel.defaults.yml b/app/SpaCyNLPPipelineModel.defaults.yml index ed4ea3bd..00031ed0 100644 --- a/app/SpaCyNLPPipelineModel.defaults.yml +++ b/app/SpaCyNLPPipelineModel.defaults.yml @@ -1,66 +1,177 @@ -- title: 'de_core_news_md-3.4.0' - description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' - url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' +- title: 'Catalan' + description: 'Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/ca_core_news_md-3.2.0/ca_core_news_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' - publishing_year: 2022 + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ca_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'ca_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'German' + description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.2.0' + publishing_year: 2021 pipeline_name: 'de_core_news_md' - version: '3.4.0' + version: '3.2.0' compatible_service_versions: - - '0.1.0' -- title: 'en_core_web_md-3.4.1' + - '0.1.0' +- title: 'Greek' + description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/el_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'el_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'English' description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' - url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' + url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1' - publishing_year: 2022 + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.2.0' + publishing_year: 2021 pipeline_name: 'en_core_web_md' - version: '3.4.1' + version: '3.2.0' compatible_service_versions: - '0.1.0' -- title: 'uk_core_news_md-3.4.0' - description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' - url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz' +- title: 'Spanish' + description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0' - publishing_year: 2022 - pipeline_name: 'uk_core_news_md' - version: '3.4.0' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/es_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'es_core_news_md' + version: '3.2.0' compatible_service_versions: - '0.1.0' -- title: 'zh_core_web_md-3.4.0' - description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' - url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' +- title: 'French' + description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.2.0/fr_core_news_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0' - publishing_year: 2022 - pipeline_name: 'zh_core_web_md' - version: '3.4.0' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/fr_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'fr_core_news_md' + version: '3.2.0' compatible_service_versions: - '0.1.0' -- title: 'ru_core_news_md-3.4.0' +- title: 'Italian' + description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.2.0/it_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/it_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'it_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Polish' + description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.2.0/pl_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/pl_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'pl_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Russian' description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' - url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' + url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.2.0/ru_core_news_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0' - publishing_year: 2022 + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.2.0' + publishing_year: 2021 pipeline_name: 'ru_core_news_md' - version: '3.4.0' + version: '3.2.0' compatible_service_versions: - '0.1.0' -- title: 'la_core_cltk_sm-0.1.0' - description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' - url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz' - publisher: 'DIY Classics' - publisher_url: 'https://github.com/diyclassics/' - publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm' - publishing_year: 2022 - pipeline_name: 'la_core_cltk_sm' - version: '0.1.0' +- title: 'Chinese' + description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' + url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.2.0/zh_core_web_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'zh_core_web_md' + version: '3.2.0' compatible_service_versions: - '0.1.0' + +# - title: 'de_core_news_md-3.4.0' +# description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' +# url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'de_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'en_core_web_md-3.4.1' +# description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1' +# publishing_year: 2022 +# pipeline_name: 'en_core_web_md' +# version: '3.4.1' +# compatible_service_versions: +# - '0.1.0' +# - title: 'uk_core_news_md-3.4.0' +# description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'uk_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'zh_core_web_md-3.4.0' +# description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' +# url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'zh_core_web_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'ru_core_news_md-3.4.0' +# description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'ru_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'la_core_cltk_sm-0.1.0' +# description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz' +# publisher: 'DIY Classics' +# publisher_url: 'https://github.com/diyclassics/' +# publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm' +# publishing_year: 2022 +# pipeline_name: 'la_core_cltk_sm' +# version: '0.1.0' +# compatible_service_versions: +# - '0.1.0' diff --git a/app/contributions/forms.py b/app/contributions/forms.py index dcdfaea8..c0611e17 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -1,4 +1,4 @@ -from xml.dom import ValidationErr +from flask import current_app from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( @@ -66,8 +66,9 @@ class TesseractOCRModelContributionForm(CreateContributionBaseForm): compatible_service_versions = SelectMultipleField( 'Compatible service versions' ) - def validate_traineddata(self, field): - if field.data.mimetype != '.traineddata': + def validate_tesseract_model_file(self, field): + current_app.logger.warning(field.data.filename) + if not field.data.filename.lower().endswith('.traineddata'): raise ValidationError('traineddata files only!') def __init__(self, *args, **kwargs): @@ -87,8 +88,9 @@ class SpacyNLPModelContributionForm(CreateContributionBaseForm): compatible_service_versions = SelectMultipleField( 'Compatible service versions' ) - def validate_spacy(self, field): - if field.data.mimetype != '.tar.gz': + def validate_spacy_model_file(self, field): + current_app.logger.warning(field.data.filename) + if not field.data.filename.lower().endswith('.tar.gz'): raise ValidationError('.tar.gz files only!') def __init__(self, *args, **kwargs): diff --git a/app/services/forms.py b/app/services/forms.py index 5c0af906..134e9456 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -10,7 +10,7 @@ from wtforms import ( ValidationError ) from wtforms.validators import InputRequired, Length -from app.models import TesseractOCRPipelineModel +from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel from . import SERVICES @@ -73,11 +73,11 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TesseractOCRPipelineModel.query.filter().all() + x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] self.model.choices = [('', 'Choose your option')] - self.model.choices += [(x.hashid, x.title) for x in models] + self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version @@ -127,7 +127,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True}) txt = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) - + def validate_encoding_detection(self, field): service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data] if field.data: @@ -153,8 +153,12 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): if 'encoding_detection' in service_info['methods']: if 'disabled' in self.encoding_detection.render_kw: del self.encoding_detection.render_kw['disabled'] + models = [ + x for x in SpaCyNLPPipelineModel.query.order_by(SpaCyNLPPipelineModel.title).all() + if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) + ] self.model.choices = [('', 'Choose your option')] - self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa + self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version diff --git a/app/services/routes.py b/app/services/routes.py index b34d0619..4bfca9bb 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -6,7 +6,8 @@ from app.models import ( Job, JobInput, JobStatus, - TesseractOCRPipelineModel + TesseractOCRPipelineModel, + SpaCyNLPPipelineModel ) from . import bp, SERVICES from .forms import ( @@ -172,6 +173,7 @@ def spacy_nlp_pipeline(): if version not in service_manifest['versions']: abort(404) form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version) + spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all() if form.is_submitted(): if not form.validate(): response = {'errors': form.errors} @@ -202,6 +204,7 @@ def spacy_nlp_pipeline(): return render_template( 'services/spacy_nlp_pipeline.html.j2', form=form, + spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, title=service_manifest['name'] ) diff --git a/app/templates/contributions/_breadcrumbs.html.j2 b/app/templates/contributions/_breadcrumbs.html.j2 index 4ccfad3b..327d0578 100644 --- a/app/templates/contributions/_breadcrumbs.html.j2 +++ b/app/templates/contributions/_breadcrumbs.html.j2 @@ -2,30 +2,30 @@
  • navigate_next
  • {% if request.path == url_for('.contributions') %}
  • Contributions Overview
  • -{% elif request.path == url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) %} + +{% elif request.path == url_for('.add_tesseract_ocr_pipeline_model') %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • {{ title }}
  • +{% elif request.path == url_for('.add_spacy_nlp_pipeline_model') %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • {{ title }}
  • +{% elif tesseract_ocr_pipeline_model and request.path == url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) %}
  • Contributions Overview
  • navigate_next
  • - + Edit {{ tesseract_ocr_pipeline_model.title }}
  • -{% elif request.path == url_for('.add_tesseract_ocr_pipeline_model, tesseract_ocr_pipeline_model=nn') %} -
  • Contributions Overview
  • -
  • navigate_next
  • -
  • {{ title }}
  • - -{% elif request.path == url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id) %} +{% elif spacy_nlp_pipeline_model and request.path == url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id) %}
  • Contributions Overview
  • navigate_next
  • - + Edit {{ spacy_nlp_pipeline_model.title }}
  • -{% elif request.path == url_for('.add_spacy_nlp_pipeline_model, spacy_nlp_pipeline_model=nn') %} -
  • Contributions Overview
  • -
  • navigate_next
  • -
  • {{ title }}
  • {% endif %} {% endset %} diff --git a/app/templates/contributions/contribute_spacy_nlp_models.html.j2 b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 index 97475c40..b5ab9b0d 100644 --- a/app/templates/contributions/contribute_spacy_nlp_models.html.j2 +++ b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 @@ -1,6 +1,6 @@ {% extends "base.html.j2" %} {% import "materialize/wtf.html.j2" as wtf %} -{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} {% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} diff --git a/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 index d7c8bd41..1e50585a 100644 --- a/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 +++ b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 @@ -1,6 +1,6 @@ {% extends "base.html.j2" %} {% import "materialize/wtf.html.j2" as wtf %} -{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} {% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} diff --git a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 index 04d7506f..82fd6862 100644 --- a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 +++ b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 @@ -1,6 +1,6 @@ {% extends "base.html.j2" %} {% import "materialize/wtf.html.j2" as wtf %} -{# {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} #} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} {% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %} diff --git a/app/templates/services/spacy_nlp_pipeline.html.j2 b/app/templates/services/spacy_nlp_pipeline.html.j2 index 37f70210..5ad1e97a 100644 --- a/app/templates/services/spacy_nlp_pipeline.html.j2 +++ b/app/templates/services/spacy_nlp_pipeline.html.j2 @@ -70,8 +70,16 @@
    {{ wtf.render_field(form.txt, accept='text/plain', placeholder='Choose a plain text file') }}
    -
    - {{ wtf.render_field(form.model, material_icon='language') }} +
    +
    + language + {{ form.model() }} + {{ form.model.label }} + + help_outline + new_label + +
    {{ wtf.render_field(form.version, material_icon='apps') }} @@ -122,4 +130,35 @@ Cancel
    + + {% endblock modals %} diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 982265bc..b66e968b 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -58,7 +58,8 @@ {{ form.model() }} {{ form.model.label }} - More details about models + help_outline + new_label {% for error in form.model.errors %} {{ error }} From ad17ec2cc82c9b9e3435ef4a6b5ed7373e4cece6 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Wed, 9 Nov 2022 13:51:42 +0100 Subject: [PATCH 04/29] Preliminary work for nlp model runtime integration --- app/contributions/routes.py | 2 -- app/daemon/job_utils.py | 13 ++++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/app/contributions/routes.py b/app/contributions/routes.py index b63a43ec..c20774d7 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -25,8 +25,6 @@ def contributions(): spacy_nlp_user_models = [ x for x in current_user.spacy_nlp_pipeline_models ] - spacy_models = SpaCyNLPPipelineModel.query.all() - print(spacy_models) return render_template( 'contributions/contribution_overview.html.j2', tesseract_ocr_user_models=tesseract_ocr_user_models, diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 32def73d..518100f7 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -3,7 +3,8 @@ from app.models import ( Job, JobResult, JobStatus, - TesseractOCRPipelineModel + TesseractOCRPipelineModel, + SpaCyNLPPipelineModel ) from datetime import datetime from flask import current_app @@ -103,6 +104,16 @@ def _create_job_service(job): models_mount_target = f'/usr/local/share/tessdata/{model.filename}' models_mount = f'{models_mount_source}:{models_mount_target}:ro' mounts.append(models_mount) + elif job.service == 'spacy-nlp-pipeline': + model_id = hashids.decode(job.service_args['model']) + model = SpaCyNLPPipelineModel.query.get(model_id) + if model is None: + job.status = JobStatus.FAILED + return + models_mount_source = model.path + models_mount_target = f'/usr/local/share/spacy/models/{model.filename}' + models_mount = f'{models_mount_source}:{models_mount_target}:ro' + mounts.append(models_mount) ''' ### Output mount ### ''' output_mount_source = os.path.join(job.path, 'results') output_mount_target = '/output' From f4f0628b60b5d88dd5cd5d42aa355c99ec073294 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Thu, 10 Nov 2022 12:14:03 +0100 Subject: [PATCH 05/29] Ocropus nlbin threshold extension --- app/services/forms.py | 4 ++++ app/services/routes.py | 3 ++- app/templates/services/tesseract_ocr_pipeline.html.j2 | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/services/forms.py b/app/services/forms.py index 5c0af906..9d7edf80 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -3,6 +3,7 @@ from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( BooleanField, + IntegerRangeField, MultipleFileField, SelectField, StringField, @@ -49,6 +50,9 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) + ocropus_nlbin_threshold = IntegerRangeField( + render_kw={'tooltips':'false', 'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5]} + ) def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] diff --git a/app/services/routes.py b/app/services/routes.py index b34d0619..7fe7a0eb 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -78,7 +78,8 @@ def tesseract_ocr_pipeline(): service=service_name, service_args={ 'binarization': form.binarization.data, - 'model': hashids.decode(form.model.data) + 'model': hashids.decode(form.model.data), + 'ocropus_nlbin_threshold': form.ocropus_nlbin_threshold.data }, service_version=form.version.data, user=current_user diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 982265bc..31bf155c 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -83,6 +83,11 @@ +

     

    +
    +

    Intensity

    +

    {{ form.ocropus_nlbin_threshold() }}

    +

     

     

    From a8af1f3d23ca5b2c2b18f5fc041d170156c6981e Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Thu, 10 Nov 2022 13:33:13 +0100 Subject: [PATCH 06/29] vtest image testing --- app/SpaCyNLPPipelineModel.defaults.yml | 1 + app/services/forms.py | 1 + app/services/services.yml | 27 +++++++++++++++----------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/app/SpaCyNLPPipelineModel.defaults.yml b/app/SpaCyNLPPipelineModel.defaults.yml index 00031ed0..055c6181 100644 --- a/app/SpaCyNLPPipelineModel.defaults.yml +++ b/app/SpaCyNLPPipelineModel.defaults.yml @@ -42,6 +42,7 @@ version: '3.2.0' compatible_service_versions: - '0.1.0' + - 'test' - title: 'Spanish' description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz' diff --git a/app/services/forms.py b/app/services/forms.py index 134e9456..58bab164 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -146,6 +146,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] + print(service_info) if self.encoding_detection.render_kw is None: self.encoding_detection.render_kw = {} self.encoding_detection.render_kw['disabled'] = True diff --git a/app/services/services.yml b/app/services/services.yml index e8db1b33..1692407e 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -45,16 +45,21 @@ spacy-nlp-pipeline: 0.1.0: methods: - 'encoding_detection' - models: - ca: 'Catalan' - de: 'German' - el: 'Greek' - en: 'English' - es: 'Spanish' - fr: 'French' - it: 'Italian' - pl: 'Polish' - ru: 'Russian' - zh: 'Chinese' + # models: + # ca: 'Catalan' + # de: 'German' + # el: 'Greek' + # en: 'English' + # es: 'Spanish' + # fr: 'French' + # it: 'Italian' + # pl: 'Polish' + # ru: 'Russian' + # zh: 'Chinese' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0' + test: + methods: + - 'encoding_detection' + publishing_year: 2022 + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/ From 79d76f158f754ed5ec1b4937b910d9ed88995624 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Thu, 10 Nov 2022 16:19:58 +0100 Subject: [PATCH 07/29] update binarization threshold --- app/daemon/job_utils.py | 2 ++ app/services/forms.py | 6 +++--- app/services/routes.py | 2 +- app/templates/services/tesseract_ocr_pipeline.html.j2 | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 32def73d..99a6ee75 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -59,6 +59,8 @@ def _create_job_service(job): command += f' -m {job.service_args["model"]}' if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' + value = job.service_args['ocropus_nlbin_threshold'] + command += f' --ocropus-nlbin-threshold {value}' elif job.service == 'transkribus-htr-pipeline': transkribus_htr_pipeline_model_id = job.service_args['model'] command += f' -m {transkribus_htr_pipeline_model_id}' diff --git a/app/services/forms.py b/app/services/forms.py index 9d7edf80..97218ddb 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -3,7 +3,7 @@ from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( BooleanField, - IntegerRangeField, + DecimalRangeField, MultipleFileField, SelectField, StringField, @@ -50,8 +50,8 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): binarization = BooleanField('Binarization') pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) - ocropus_nlbin_threshold = IntegerRangeField( - render_kw={'tooltips':'false', 'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5]} + ocropus_nlbin_threshold = DecimalRangeField( + render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5]} ) def validate_binarization(self, field): diff --git a/app/services/routes.py b/app/services/routes.py index 7fe7a0eb..4e0266d8 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -79,7 +79,7 @@ def tesseract_ocr_pipeline(): service_args={ 'binarization': form.binarization.data, 'model': hashids.decode(form.model.data), - 'ocropus_nlbin_threshold': form.ocropus_nlbin_threshold.data + 'ocropus_nlbin_threshold': float(form.ocropus_nlbin_threshold.data) }, service_version=form.version.data, user=current_user diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 31bf155c..8f4f2cc4 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -85,7 +85,7 @@

     

    -

    Intensity

    +

    Intensity (between 0 and 1)

    {{ form.ocropus_nlbin_threshold() }}

     

    From ca4abc326966f4323c105f9738b16eef4ba426a9 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Fri, 11 Nov 2022 13:38:41 +0100 Subject: [PATCH 08/29] Small fixes --- app/daemon/job_utils.py | 9 ++++++++- app/models.py | 2 +- app/services/services.yml | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 518100f7..ab17e760 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -53,7 +53,12 @@ def _create_job_service(job): command += f' --mem-mb {mem_mb}' command += f' --n-cores {n_cores}' if job.service == 'spacy-nlp-pipeline': - command += f' -m {job.service_args["model"]}' + model_id = hashids.decode(job.service_args['model']) + model = SpaCyNLPPipelineModel.query.get(model_id) + if model is None: + job.status = JobStatus.FAILED + return + command += f' -m {model.pipeline_name}' if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']: command += ' --check-encoding' elif job.service == 'tesseract-ocr-pipeline': @@ -139,6 +144,8 @@ def _create_job_service(job): ) ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() + print(command) + print(mounts) try: docker_client.services.create( image, diff --git a/app/models.py b/app/models.py index 90d16f48..93a23461 100644 --- a/app/models.py +++ b/app/models.py @@ -698,7 +698,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): db.session.add(model) db.session.flush(objects=[model]) db.session.refresh(model) - model.filename = f'{model.id}.traineddata' + model.filename = m['url'].split('/')[-1] r = requests.get(m['url'], stream=True) pbar = tqdm( desc=f'{model.title} ({model.filename})', diff --git a/app/services/services.yml b/app/services/services.yml index 1692407e..c9d61e08 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -62,4 +62,4 @@ spacy-nlp-pipeline: methods: - 'encoding_detection' publishing_year: 2022 - url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/ + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/' From 346f8f1dc50335ecf3296d9491880996f43e9aae Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Fri, 11 Nov 2022 14:59:14 +0100 Subject: [PATCH 09/29] Update Dockerfile logic --- .dockerignore | 41 ++++------- .gitignore | 186 ++++++++++++++++++++++++++++++++++++++++++-------- Dockerfile | 94 +++++++++++++------------ boot.sh | 4 +- 4 files changed, 219 insertions(+), 106 deletions(-) diff --git a/.dockerignore b/.dockerignore index 07d50b4b..9960fd26 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,29 +1,12 @@ -**/__pycache__ -**/.venv -**/.classpath -**/.dockerignore -**/.env -**/.git -**/.gitignore -**/.project -**/.settings -**/.toolstarget -**/.vs -**/.vscode -**/*.*proj.user -**/*.dbmdl -**/*.jfm -**/bin -**/charts -**/docker-compose* -**/compose* -**/Dockerfile* -**/node_modules -**/npm-debug.log -**/obj -**/secrets.dev.yaml -**/values.dev.yaml -README.md - - -data +# Exclude everything +* + +# Include what we need +!app +!migrations +!tests +!.flaskenv +!boot.sh +!config.py +!nopaque.py +!requirements.txt diff --git a/.gitignore b/.gitignore index 14a22fe1..59ada396 100644 --- a/.gitignore +++ b/.gitignore @@ -1,42 +1,168 @@ +# nopaque specifics +app/static/gen/ +data/ +docker-compose.override.yml +logs/ +!logs/dummy +*.env + +# Byte-compiled / optimized / DLL files +__pycache__/ *.py[cod] +*$py.class # C extensions *.so -# Flask-Assets files -.webassets-cache -app/static/gen +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST -# Docker related files -docker-compose.override.yml -data/** - -# Environment files -*.env +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec # Installer logs pip-log.txt +pip-delete-this-directory.txt -# Logs in log folder -logs/* -!logs/dummy +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -lib -lib64 -__pycache__ +# Translations +*.mo +*.pot -# Virtual environment -venv -.idea +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/Dockerfile b/Dockerfile index 0a2309f5..8ccdf7f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,44 +1,50 @@ -FROM python:3.8.10-slim-buster - - -LABEL authors="Patrick Jentsch " - - -ARG DOCKER_GID -ARG UID -ARG GID - - -ENV FLASK_APP nopaque.py -ENV LANG=C.UTF-8 -ENV PYTHONDONTWRITEBYTECODE=1 -ENV PYTHONUNBUFFERED=1 - - -RUN apt-get update \ - && apt-get install --no-install-recommends --yes \ - build-essential \ - libpq-dev \ - && rm -r /var/lib/apt/lists/* - - -RUN groupadd --gid ${DOCKER_GID} --system docker \ - && groupadd --gid ${GID} --system nopaque \ - && useradd --create-home --gid ${GID} --groups ${DOCKER_GID} --no-log-init --system --uid ${UID} nopaque -USER nopaque -WORKDIR /home/nopaque - -COPY --chown=nopaque:nopaque requirements.txt ./ -RUN python -m venv venv \ - && venv/bin/pip install --requirement requirements.txt - - -COPY --chown=nopaque:nopaque app app -COPY --chown=nopaque:nopaque migrations migrations -COPY --chown=nopaque:nopaque tests tests -COPY --chown=nopaque:nopaque boot.sh config.py nopaque.py ./ - - -# run-time configuration -EXPOSE 5000 -ENTRYPOINT ["./boot.sh"] +FROM python:3.9.15-slim-bullseye + + +LABEL authors="Patrick Jentsch " + + +ARG DOCKER_GID +ARG UID +ARG GID + + +ENV LANG="C.UTF-8" +ENV PYTHONDONTWRITEBYTECODE="1" +ENV PYTHONUNBUFFERED="1" + + +RUN apt-get update \ + && apt-get install --no-install-recommends --yes \ + build-essential \ + libpq-dev \ + && rm --recursive /var/lib/apt/lists/* + + +RUN groupadd --gid "${DOCKER_GID}" docker \ + && groupadd --gid "${GID}" nopaque \ + && useradd --create-home --gid nopaque --groups "${DOCKER_GID}" --no-log-init --uid "${UID}" nopaque +USER nopaque +WORKDIR /home/nopaque + + +ENV PYTHON3_VENV_PATH="/home/nopaque/venv" +RUN python3 -m venv "${PYTHON3_VENV_PATH}" +ENV PATH="${PYTHON3_VENV_PATH}/bin:${PATH}" + + +COPY --chown=nopaque:nopaque requirements.txt . +RUN python3 -m pip install --requirement requirements.txt \ + && rm requirements.txt + + +COPY --chown=nopaque:nopaque app app +COPY --chown=nopaque:nopaque migrations migrations +COPY --chown=nopaque:nopaque tests tests +COPY --chown=nopaque:nopaque .flaskenv boot.sh config.py nopaque.py ./ + + +EXPOSE 5000 + + +ENTRYPOINT ["./boot.sh"] diff --git a/boot.sh b/boot.sh index 1d63652f..f96d7155 100755 --- a/boot.sh +++ b/boot.sh @@ -1,7 +1,5 @@ #!/bin/bash -source venv/bin/activate - display_help() { local script_name=$(basename "${0}") echo "" @@ -26,7 +24,7 @@ if [[ "${#}" -eq 0 ]]; then sleep 5 done fi - python nopaque.py + python3 nopaque.py elif [[ "${1}" == "flask" ]]; then flask "${@:2}" elif [[ "${1}" == "--help" || "${1}" == "-h" ]]; then From 6164c3e7d96245e3441e51e2a8bb815d944d2187 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Fri, 11 Nov 2022 15:01:32 +0100 Subject: [PATCH 10/29] Add migration script message and formatting --- migrations/versions/721829b5dd25_.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/migrations/versions/721829b5dd25_.py b/migrations/versions/721829b5dd25_.py index 124ca07d..d5ba1139 100644 --- a/migrations/versions/721829b5dd25_.py +++ b/migrations/versions/721829b5dd25_.py @@ -1,4 +1,4 @@ -"""empty message +"""Add pipeline_name column to spacy_nlp_pipeline_models table Revision ID: 721829b5dd25 Revises: 31dd42e5ea6f @@ -17,12 +17,11 @@ depends_on = None def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('spacy_nlp_pipeline_models', sa.Column('pipeline_name', sa.String(length=64), nullable=True)) - # ### end Alembic commands ### + op.add_column( + 'spacy_nlp_pipeline_models', + sa.Column('pipeline_name', sa.String(length=64), nullable=True) + ) def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### op.drop_column('spacy_nlp_pipeline_models', 'pipeline_name') - # ### end Alembic commands ### From 176a67757aa1dcdab8cebc713cdeb131a94aea12 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Mon, 14 Nov 2022 12:25:26 +0100 Subject: [PATCH 11/29] small fixes --- app/daemon/job_utils.py | 3 +-- app/services/forms.py | 25 +++++++++++++------------ app/services/services.yml | 1 + 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index ce43a563..cfb362db 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -65,6 +65,7 @@ def _create_job_service(job): command += f' -m {job.service_args["model"]}' if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' + if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']: value = job.service_args['ocropus_nlbin_threshold'] command += f' --ocropus-nlbin-threshold {value}' elif job.service == 'transkribus-htr-pipeline': @@ -146,8 +147,6 @@ def _create_job_service(job): ) ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() - print(command) - print(mounts) try: docker_client.services.create( image, diff --git a/app/services/forms.py b/app/services/forms.py index 60ac9a50..96caecc4 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -1,17 +1,12 @@ from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired -from wtforms import ( - BooleanField, - DecimalRangeField, - MultipleFileField, - SelectField, - StringField, - SubmitField, - ValidationError -) +from wtforms import (BooleanField, DecimalRangeField, MultipleFileField, + SelectField, StringField, SubmitField, ValidationError) from wtforms.validators import InputRequired, Length -from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel + +from app.models import SpaCyNLPPipelineModel, TesseractOCRPipelineModel + from . import SERVICES @@ -51,7 +46,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): pdf = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) ocropus_nlbin_threshold = DecimalRangeField( - render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5]} + render_kw={'min': 0, 'max': 1, 'step': 0.1, 'start': [0.5], 'disabled': True} ) def validate_binarization(self, field): @@ -59,7 +54,7 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if field.data: if not('methods' in service_info and 'binarization' in service_info['methods']): raise ValidationError('Binarization is not available') - + def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': raise ValidationError('PDF files only!') @@ -72,10 +67,16 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if self.binarization.render_kw is None: self.binarization.render_kw = {} self.binarization.render_kw['disabled'] = True + if self.ocropus_nlbin_threshold.render_kw is None: + self.ocropus_nlbin_threshold.render_kw = {} + self.ocropus_nlbin_threshold.render_kw['disabled'] = True if 'methods' in service_info: if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] + if 'ocropus_nlbin_threshold' in service_info['methods']: + if 'disabled' in self.ocropus_nlbin_threshold.render_kw: + del self.ocropus_nlbin_threshold.render_kw['disabled'] models = [ x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) diff --git a/app/services/services.yml b/app/services/services.yml index c9d61e08..8a8377d5 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -20,6 +20,7 @@ tesseract-ocr-pipeline: 0.1.1: methods: - 'binarization' + - 'ocropus_nlbin_threshold' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' transkribus-htr-pipeline: From df5ae19e687fb974e83d085064bef249ba5dd952 Mon Sep 17 00:00:00 2001 From: Inga Kirschnick Date: Mon, 14 Nov 2022 15:02:41 +0100 Subject: [PATCH 12/29] Addition pipeline_name in Contribution Package --- app/contributions/forms.py | 16 ++++++++++++++++ app/contributions/routes.py | 10 +++++++--- .../contribute_spacy_nlp_models.html.j2 | 7 +++++-- .../spacy_nlp_pipeline_model.html.j2 | 3 +++ 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/app/contributions/forms.py b/app/contributions/forms.py index c0611e17..8eb44842 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -58,6 +58,18 @@ class EditForm(CreateContributionBaseForm): self.version.data = model_file.version self.shared.data = model_file.shared +class EditTesseractOCRModelForm(EditForm): + pass + +class EditSpaCyNLPPipelineModelForm(EditForm): + pipeline_name = StringField( + 'Pipeline name', + validators=[InputRequired(), Length(max=64)] + ) + def prefill(self, model_file): + super().prefill(model_file) + self.pipeline_name.data = model_file.pipeline_name + class TesseractOCRModelContributionForm(CreateContributionBaseForm): tesseract_model_file = FileField( 'File', @@ -88,6 +100,10 @@ class SpacyNLPModelContributionForm(CreateContributionBaseForm): compatible_service_versions = SelectMultipleField( 'Compatible service versions' ) + pipeline_name = StringField( + 'Pipeline name', + validators=[InputRequired(), Length(max=64)] + ) def validate_spacy_model_file(self, field): current_app.logger.warning(field.data.filename) if not field.data.filename.lower().endswith('.tar.gz'): diff --git a/app/contributions/routes.py b/app/contributions/routes.py index c20774d7..33355b2d 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -5,7 +5,7 @@ from app import db from app.decorators import admin_required, permission_required from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel from . import bp -from .forms import TesseractOCRModelContributionForm, EditForm, SpacyNLPModelContributionForm +from .forms import TesseractOCRModelContributionForm, EditSpaCyNLPPipelineModelForm, EditTesseractOCRModelForm, SpacyNLPModelContributionForm @bp.before_request @@ -39,7 +39,7 @@ def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( tesseract_ocr_pipeline_model_id ) - form = EditForm(prefix='tesseract-ocr-model-edit-form') + form = EditTesseractOCRModelForm(prefix='tesseract-ocr-model-edit-form') if form.validate_on_submit(): if tesseract_ocr_pipeline_model.title != form.title.data: tesseract_ocr_pipeline_model.title = form.title.data @@ -134,12 +134,14 @@ def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404( spacy_nlp_pipeline_model_id ) - form = EditForm(prefix='spacy-nlp-model-edit-form') + form = EditSpaCyNLPPipelineModelForm(prefix='spacy-nlp-model-edit-form') if form.validate_on_submit(): if spacy_nlp_pipeline_model.title != form.title.data: spacy_nlp_pipeline_model.title = form.title.data if spacy_nlp_pipeline_model.description != form.description.data: spacy_nlp_pipeline_model.description = form.description.data + if spacy_nlp_pipeline_model.pipeline_name != form.pipeline_name.data: + spacy_nlp_pipeline_model.pipeline_name = form.pipeline_name.data if spacy_nlp_pipeline_model.publisher != form.publisher.data: spacy_nlp_pipeline_model.publisher = form.publisher.data if spacy_nlp_pipeline_model.publishing_year != form.publishing_year.data: @@ -156,6 +158,7 @@ def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): message = Markup(f'Model "{spacy_nlp_pipeline_model.title}" updated') flash(message, category='corpus') return {}, 201, {'Location': url_for('contributions.contributions')} + print(spacy_nlp_pipeline_model.to_json()) form.prefill(spacy_nlp_pipeline_model) return render_template( 'contributions/spacy_nlp_pipeline_model.html.j2', @@ -195,6 +198,7 @@ def add_spacy_nlp_pipeline_model(): form.spacy_model_file.data, compatible_service_versions=form.compatible_service_versions.data, description=form.description.data, + pipeline_name=form.pipeline_name.data, publisher=form.publisher.data, publisher_url=form.publisher_url.data, publishing_url=form.publishing_url.data, diff --git a/app/templates/contributions/contribute_spacy_nlp_models.html.j2 b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 index b5ab9b0d..d1e3cd11 100644 --- a/app/templates/contributions/contribute_spacy_nlp_models.html.j2 +++ b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 @@ -2,7 +2,7 @@ {% import "materialize/wtf.html.j2" as wtf %} {% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} -{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} +{% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %} {% block page_content %}
    @@ -28,7 +28,7 @@
    layersspaCy NLP Models -

    You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.

    +

    You can add more spaCy NLP models using the form below. They will automatically appear in the list of usable models.

    Edit already uploaded models

    Information about the already existing models.

    @@ -54,6 +54,9 @@
    {{ wtf.render_field(form.description, material_icon='description') }}
    +
    + {{ wtf.render_field(form.pipeline_name, material_icon='emoji_objects') }} +
    {{ wtf.render_field(form.publisher, material_icon='account_balance') }}
    diff --git a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 index 82fd6862..37248448 100644 --- a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 +++ b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 @@ -23,6 +23,9 @@
    {{ wtf.render_field(form.description, material_icon='description') }}
    +
    + {{ wtf.render_field(form.pipeline_name, material_icon='emoji_objects') }} +
    {{ wtf.render_field(form.publisher, material_icon='account_balance') }}
    From cd72614c0fd0226a2285d5a50918068d50667026 Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Tue, 15 Nov 2022 15:11:16 +0100 Subject: [PATCH 13/29] Contributions update revised --- app/contributions/forms.py | 61 ++++--- app/contributions/routes.py | 162 +++++++++--------- app/models.py | 2 + app/services/services.yml | 4 +- app/static/js/RessourceLists/RessourceList.js | 2 + .../js/RessourceLists/SpacyNLPModelList.js | 76 -------- .../SpacyNLPPipelineModelList.js | 99 +++++++++++ .../RessourceLists/TesseractOCRModelList.js | 76 -------- .../TesseractOCRPipelineModelList.js | 99 +++++++++++ app/static/js/Utils.js | 100 ++++++++++- app/templates/_scripts.html.j2 | 4 +- .../contributions/_breadcrumbs.html.j2 | 8 +- .../contribution_overview.html.j2 | 129 -------------- .../contributions/contributions.html.j2 | 51 ++++++ ...> create_spacy_nlp_pipeline_model.html.j2} | 41 +---- ...eate_tesseract_ocr_pipeline_model.html.j2} | 7 +- .../spacy_nlp_pipeline_model.html.j2 | 2 +- .../tesseract_ocr_pipeline_model.html.j2 | 2 +- 18 files changed, 481 insertions(+), 444 deletions(-) delete mode 100644 app/static/js/RessourceLists/SpacyNLPModelList.js create mode 100644 app/static/js/RessourceLists/SpacyNLPPipelineModelList.js delete mode 100644 app/static/js/RessourceLists/TesseractOCRModelList.js create mode 100644 app/static/js/RessourceLists/TesseractOCRPipelineModelList.js delete mode 100644 app/templates/contributions/contribution_overview.html.j2 create mode 100644 app/templates/contributions/contributions.html.j2 rename app/templates/contributions/{contribute_spacy_nlp_models.html.j2 => create_spacy_nlp_pipeline_model.html.j2} (70%) rename app/templates/contributions/{contribute_tesseract_ocr_models.html.j2 => create_tesseract_ocr_pipeline_model.html.j2} (91%) diff --git a/app/contributions/forms.py b/app/contributions/forms.py index 8eb44842..04030fd0 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -12,6 +12,7 @@ from wtforms import ( from wtforms.validators import InputRequired, Length from app.services import SERVICES + class CreateContributionBaseForm(FlaskForm): title = StringField( 'Title', @@ -46,31 +47,8 @@ class CreateContributionBaseForm(FlaskForm): ) submit = SubmitField() -class EditForm(CreateContributionBaseForm): - def prefill(self, model_file): - ''' Pre-fill the form with data of an exististing corpus file ''' - self.title.data = model_file.title - self.description.data = model_file.description - self.publisher.data = model_file.publisher - self.publishing_year.data = model_file.publishing_year - self.publisher_url.data = model_file.publisher_url - self.publishing_url.data = model_file.publishing_url - self.version.data = model_file.version - self.shared.data = model_file.shared -class EditTesseractOCRModelForm(EditForm): - pass - -class EditSpaCyNLPPipelineModelForm(EditForm): - pipeline_name = StringField( - 'Pipeline name', - validators=[InputRequired(), Length(max=64)] - ) - def prefill(self, model_file): - super().prefill(model_file) - self.pipeline_name.data = model_file.pipeline_name - -class TesseractOCRModelContributionForm(CreateContributionBaseForm): +class CreateTesseractOCRPipelineModelForm(CreateContributionBaseForm): tesseract_model_file = FileField( 'File', validators=[FileRequired()] @@ -78,6 +56,7 @@ class TesseractOCRModelContributionForm(CreateContributionBaseForm): compatible_service_versions = SelectMultipleField( 'Compatible service versions' ) + def validate_tesseract_model_file(self, field): current_app.logger.warning(field.data.filename) if not field.data.filename.lower().endswith('.traineddata'): @@ -92,7 +71,8 @@ class TesseractOCRModelContributionForm(CreateContributionBaseForm): ] self.compatible_service_versions.default = '' -class SpacyNLPModelContributionForm(CreateContributionBaseForm): + +class CreateSpaCyNLPPipelineModelForm(CreateContributionBaseForm): spacy_model_file = FileField( 'File', validators=[FileRequired()] @@ -104,16 +84,45 @@ class SpacyNLPModelContributionForm(CreateContributionBaseForm): 'Pipeline name', validators=[InputRequired(), Length(max=64)] ) + def validate_spacy_model_file(self, field): current_app.logger.warning(field.data.filename) if not field.data.filename.lower().endswith('.tar.gz'): raise ValidationError('.tar.gz files only!') def __init__(self, *args, **kwargs): - service_manifest = SERVICES['spacy-nlp-pipeline'] super().__init__(*args, **kwargs) + service_manifest = SERVICES['spacy-nlp-pipeline'] self.compatible_service_versions.choices = [('', 'Choose your option')] self.compatible_service_versions.choices += [ (x, x) for x in service_manifest['versions'].keys() ] self.compatible_service_versions.default = '' + + +class EditContributionBaseForm(CreateContributionBaseForm): + def prefill(self, model_file): + ''' Pre-fill the form with data of an exististing corpus file ''' + self.title.data = model_file.title + self.description.data = model_file.description + self.publisher.data = model_file.publisher + self.publishing_year.data = model_file.publishing_year + self.publisher_url.data = model_file.publisher_url + self.publishing_url.data = model_file.publishing_url + self.version.data = model_file.version + self.shared.data = model_file.shared + + +class EditTesseractOCRPipelineModelForm(EditContributionBaseForm): + pass + + +class EditSpaCyNLPPipelineModelForm(EditContributionBaseForm): + pipeline_name = StringField( + 'Pipeline name', + validators=[InputRequired(), Length(max=64)] + ) + + def prefill(self, model_file): + super().prefill(model_file) + self.pipeline_name.data = model_file.pipeline_name diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 33355b2d..121fc101 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -2,10 +2,19 @@ from flask import abort, current_app, flash, Markup, render_template, url_for from flask_login import login_required, current_user from threading import Thread from app import db -from app.decorators import admin_required, permission_required -from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel +from app.decorators import permission_required +from app.models import ( + Permission, + SpaCyNLPPipelineModel, + TesseractOCRPipelineModel +) from . import bp -from .forms import TesseractOCRModelContributionForm, EditSpaCyNLPPipelineModelForm, EditTesseractOCRModelForm, SpacyNLPModelContributionForm +from .forms import ( + CreateSpaCyNLPPipelineModelForm, + CreateTesseractOCRPipelineModelForm, + EditSpaCyNLPPipelineModelForm, + EditTesseractOCRPipelineModelForm +) @bp.before_request @@ -16,30 +25,17 @@ def before_request(): @bp.route('/') -@login_required -@admin_required def contributions(): - tesseract_ocr_user_models = [ - x for x in current_user.tesseract_ocr_pipeline_models - ] - spacy_nlp_user_models = [ - x for x in current_user.spacy_nlp_pipeline_models - ] return render_template( - 'contributions/contribution_overview.html.j2', - tesseract_ocr_user_models=tesseract_ocr_user_models, - spacy_nlp_user_models=spacy_nlp_user_models, - userId = current_user.hashid, - title='Contribution Overview' + 'contributions/contributions.html.j2', + title='Contributions' ) -@bp.route('/edit-tesseract-model/', methods=['GET', 'POST']) -@login_required + +@bp.route('/tesseract_ocr_pipeline_models/', methods=['GET', 'POST']) def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): - tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( - tesseract_ocr_pipeline_model_id - ) - form = EditTesseractOCRModelForm(prefix='tesseract-ocr-model-edit-form') + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) + form = EditTesseractOCRPipelineModelForm(prefix='edit-tesseract-ocr-pipeline-model-form') if form.validate_on_submit(): if tesseract_ocr_pipeline_model.title != form.title.data: tesseract_ocr_pipeline_model.title = form.title.data @@ -58,47 +54,50 @@ def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): if tesseract_ocr_pipeline_model.shared != form.shared.data: tesseract_ocr_pipeline_model.shared = form.shared.data db.session.commit() - message = Markup(f'Model "{tesseract_ocr_pipeline_model.title}" updated') - flash(message, category='corpus') - return {}, 201, {'Location': url_for('contributions.contributions')} + tesseract_ocr_pipeline_model_url = url_for( + '.tesseract_ocr_pipeline_model', + tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id + ) + message = Markup(f'Tesseract OCR Pipeline model "{tesseract_ocr_pipeline_model.title}" updated') + flash(message) + return {}, 201, {'Location': tesseract_ocr_pipeline_model_url} form.prefill(tesseract_ocr_pipeline_model) return render_template( 'contributions/tesseract_ocr_pipeline_model.html.j2', - tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model, form=form, - title='Edit your Tesseract OCR model' + tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model, + title='Edit Tesseract OCR Pipeline Model' ) -@bp.route('/edit-tesseract-model/', methods=['DELETE']) -@login_required + +@bp.route('/tesseract_ocr_pipeline_models/', methods=['DELETE']) def delete_tesseract_model(tesseract_ocr_pipeline_model_id): - def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id): + def _delete_tesseract_ocr_pipeline_model(app, tesseract_ocr_pipeline_model_id): with app.app_context(): - model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id) - model.delete() + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id) + tesseract_ocr_pipeline_model.delete() db.session.commit() - - model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) - if not (model.user == current_user or current_user.is_administrator()): + + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) + if not (tesseract_ocr_pipeline_model.user == current_user or current_user.is_administrator()): abort(403) thread = Thread( - target=_delete_tesseract_model, + target=_delete_tesseract_ocr_pipeline_model, args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id) ) thread.start() return {}, 202 -@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST']) -def add_tesseract_ocr_pipeline_model(): - form = TesseractOCRModelContributionForm( - prefix='contribute-tesseract-ocr-pipeline-model-form' - ) + +@bp.route('/tesseract_ocr_pipeline_models/create', methods=['GET', 'POST']) +def create_tesseract_ocr_pipeline_model(): + form = CreateTesseractOCRPipelineModelForm(prefix='create-tesseract-ocr-pipeline-model-form') if form.is_submitted(): if not form.validate(): response = {'errors': form.errors} return response, 400 try: - tesseract_ocr_model = TesseractOCRPipelineModel.create( + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.create( form.tesseract_model_file.data, compatible_service_versions=form.compatible_service_versions.data, description=form.description.data, @@ -114,27 +113,24 @@ def add_tesseract_ocr_pipeline_model(): except OSError: abort(500) db.session.commit() - message = Markup(f'Model "{tesseract_ocr_model.title}" created') + tesseract_ocr_pipeline_model_url = url_for( + '.tesseract_ocr_pipeline_model', + tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id + ) + message = Markup(f'Tesseract OCR Pipeline model "{tesseract_ocr_pipeline_model.title}" created') flash(message) - return {}, 201, {'Location': url_for('contributions.contributions')} - tesseract_ocr_pipeline_models = [ - x for x in TesseractOCRPipelineModel.query.all() - ] - + return {}, 201, {'Location': tesseract_ocr_pipeline_model_url} return render_template( - 'contributions/contribute_tesseract_ocr_models.html.j2', + 'contributions/create_tesseract_ocr_pipeline_model.html.j2', form=form, - tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, - title='Tesseract OCR Model Contribution' + title='Create Tesseract OCR Pipeline Model' ) -@bp.route('/edit-spacy-model//', methods=['GET', 'POST']) -@login_required + +@bp.route('/spacy-nlp-pipeline-models/', methods=['GET', 'POST']) def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): - spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404( - spacy_nlp_pipeline_model_id - ) - form = EditSpaCyNLPPipelineModelForm(prefix='spacy-nlp-model-edit-form') + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) + form = EditSpaCyNLPPipelineModelForm(prefix='edit-spacy-nlp-pipeline-model-form') if form.validate_on_submit(): if spacy_nlp_pipeline_model.title != form.title.data: spacy_nlp_pipeline_model.title = form.title.data @@ -154,30 +150,33 @@ def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): spacy_nlp_pipeline_model.version = form.version.data if spacy_nlp_pipeline_model.shared != form.shared.data: spacy_nlp_pipeline_model.shared = form.shared.data + current_app.logger.warning(db.session.dirty) db.session.commit() - message = Markup(f'Model "{spacy_nlp_pipeline_model.title}" updated') - flash(message, category='corpus') - return {}, 201, {'Location': url_for('contributions.contributions')} - print(spacy_nlp_pipeline_model.to_json()) + spacy_nlp_pipeline_model_url = url_for( + '.spacy_nlp_pipeline_model', + spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id + ) + message = Markup(f'SpaCy NLP Pipeline model "{spacy_nlp_pipeline_model.title}" updated') + flash(message) + return {}, 201, {'Location': url_for('.contributions')} form.prefill(spacy_nlp_pipeline_model) return render_template( 'contributions/spacy_nlp_pipeline_model.html.j2', - spacy_nlp_pipeline_model=spacy_nlp_pipeline_model, form=form, - title='Edit your spaCy NLP model' + spacy_nlp_pipeline_model=spacy_nlp_pipeline_model, + title=f'{spacy_nlp_pipeline_model.title} [{spacy_nlp_pipeline_model.version}]' ) -@bp.route('/edit-spacy-model/', methods=['DELETE']) -@login_required +@bp.route('/spacy-nlp-pipeline-models/', methods=['DELETE']) def delete_spacy_model(spacy_nlp_pipeline_model_id): def _delete_spacy_model(app, spacy_nlp_pipeline_model_id): with app.app_context(): - model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id) - model.delete() + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id) + spacy_nlp_pipeline_model.delete() db.session.commit() - model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) - if not (model.user == current_user or current_user.is_administrator()): + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) + if not (spacy_nlp_pipeline_model.user == current_user or current_user.is_administrator()): abort(403) thread = Thread( target=_delete_spacy_model, @@ -186,15 +185,16 @@ def delete_spacy_model(spacy_nlp_pipeline_model_id): thread.start() return {}, 202 -@bp.route('/add-spacy-nlp-pipeline-model', methods=['GET', 'POST']) -def add_spacy_nlp_pipeline_model(): - form = SpacyNLPModelContributionForm(prefix='contribute-spacy-nlp-pipeline-model-form') + +@bp.route('/spacy-nlp-pipeline-models/create', methods=['GET', 'POST']) +def create_spacy_nlp_pipeline_model(): + form = CreateSpaCyNLPPipelineModelForm(prefix='create-spacy-nlp-pipeline-model-form') if form.is_submitted(): if not form.validate(): response = {'errors': form.errors} return response, 400 try: - spacy_nlp_model = SpaCyNLPPipelineModel.create( + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.create( form.spacy_model_file.data, compatible_service_versions=form.compatible_service_versions.data, description=form.description.data, @@ -211,15 +211,15 @@ def add_spacy_nlp_pipeline_model(): except OSError: abort(500) db.session.commit() - message = Markup(f'Model "{spacy_nlp_model.title}" created') + spacy_nlp_pipeline_model_url = url_for( + '.spacy_nlp_pipeline_model', + spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id + ) + message = Markup(f'SpaCy NLP Pipeline model "{spacy_nlp_pipeline_model.title}" created') flash(message) - return {}, 201, {'Location': url_for('contributions.contributions')} - spacy_nlp_pipeline_models = [ - x for x in SpaCyNLPPipelineModel.query.all() - ] + return {}, 201, {'Location': spacy_nlp_pipeline_model_url} return render_template( - 'contributions/contribute_spacy_nlp_models.html.j2', + 'contributions/create_spacy_nlp_pipeline_model.html.j2', form=form, - spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, - title='spaCy NLP Model Contribution' + title='Create SpaCy NLP Pipeline Model' ) diff --git a/app/models.py b/app/models.py index 93a23461..3bf7f8a7 100644 --- a/app/models.py +++ b/app/models.py @@ -625,6 +625,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): 'publishing_year': self.publishing_year, 'shared': self.shared, 'title': self.title, + 'version': self.version, **self.file_mixin_to_json() } if backrefs: @@ -735,6 +736,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): 'pipeline_name': self.pipeline_name, 'shared': self.shared, 'title': self.title, + 'version': self.version, **self.file_mixin_to_json() } if backrefs: diff --git a/app/services/services.yml b/app/services/services.yml index c9d61e08..9d99d688 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -1,6 +1,6 @@ # TODO: This could also be done via GitLab/GitHub APIs file-setup-pipeline: - name: 'File setup pipeline' + name: 'File Setup Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF' latest_version: '0.1.0' versions: @@ -38,7 +38,7 @@ transkribus-htr-pipeline: publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1' spacy-nlp-pipeline: - name: 'spaCy NLP Pipeline' + name: 'SpaCy NLP Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF' latest_version: '0.1.0' versions: diff --git a/app/static/js/RessourceLists/RessourceList.js b/app/static/js/RessourceLists/RessourceList.js index 824db3d1..a2242054 100644 --- a/app/static/js/RessourceLists/RessourceList.js +++ b/app/static/js/RessourceLists/RessourceList.js @@ -10,6 +10,8 @@ class RessourceList { JobList.autoInit(); JobInputList.autoInit(); JobResultList.autoInit(); + SpacyNLPPipelineModelList.autoInit(); + TesseractOCRPipelineModelList.autoInit(); QueryResultList.autoInit(); UserList.autoInit(); } diff --git a/app/static/js/RessourceLists/SpacyNLPModelList.js b/app/static/js/RessourceLists/SpacyNLPModelList.js deleted file mode 100644 index 0e20191b..00000000 --- a/app/static/js/RessourceLists/SpacyNLPModelList.js +++ /dev/null @@ -1,76 +0,0 @@ -class SpacyNLPModelList { - constructor () { - - this.elements = { - spacyNLPModelList: document.querySelector('#spacy-nlp-model-list'), - deleteButtons: document.querySelectorAll('.delete-spacy-model-button'), - editButtons: document.querySelectorAll('.edit-spacy-model-button'), - - } - } - - init () { - let userId = this.elements.spacyNLPModelList.dataset.userId; - - for (let deleteButton of this.elements.deleteButtons) { - deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); - } - - for (let editButton of this.elements.editButtons) { - editButton.addEventListener('click', () => {this.editModel(editButton);}); - } - } - - deleteModel(deleteButton, userId) { - return new Promise((resolve, reject) => { - let modelId = deleteButton.dataset.modelId; - let model = app.data.users[userId].spacy_nlp_pipeline_models[modelId]; - let modalElement = Utils.elementFromString( - ` - - ` - ); - document.querySelector('#modals').appendChild(modalElement); - let modal = M.Modal.init( - modalElement, - { - dismissible: false, - onCloseEnd: () => { - modal.destroy(); - modalElement.remove(); - } - } - ); - let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); - confirmElement.addEventListener('click', (event) => { - let modelTitle = model.title; - fetch(`/contributions/edit-spacy-model/${modelId}`, {method: 'DELETE'}) - .then( - (response) => { - app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); - resolve(response); - }, - (response) => { - if (response.status === 403) {app.flash('Forbidden', 'error');} - if (response.status === 404) {app.flash('Not Found', 'error');} - reject(response); - } - ); - }); - modal.open(); - }); - } - - editModel(editButton) { - window.location.href = `/contributions/edit-spacy-model/${editButton.dataset.modelId}`; - } -} diff --git a/app/static/js/RessourceLists/SpacyNLPPipelineModelList.js b/app/static/js/RessourceLists/SpacyNLPPipelineModelList.js new file mode 100644 index 00000000..fcc68a0d --- /dev/null +++ b/app/static/js/RessourceLists/SpacyNLPPipelineModelList.js @@ -0,0 +1,99 @@ +class SpacyNLPPipelineModelList extends RessourceList { + static autoInit() { + for (let spaCyNLPPipelineModelListElement of document.querySelectorAll('.spacy-nlp-pipeline-model-list:not(.no-autoinit)')) { + new SpacyNLPPipelineModelList(spaCyNLPPipelineModelListElement); + } + } + + static options = { + initialHtmlGenerator: (id) => { + return ` +
    + search + + +
    + + + + + + + + + + +
    TitleDescriptionBiblio
    +
      + `.trim(); + }, + item: ` + + + + (), , + + delete + send + + + `.trim(), + ressourceMapper: (spaCyNLPPipelineModel) => { + return { + 'id': spaCyNLPPipelineModel.id, + 'creation-date': spaCyNLPPipelineModel.creation_date, + 'description': spaCyNLPPipelineModel.description, + 'publisher': spaCyNLPPipelineModel.publisher, + 'publisher-url': spaCyNLPPipelineModel.publisher_url, + 'publishing-url': spaCyNLPPipelineModel.publishing_url, + 'publishing-url-2': spaCyNLPPipelineModel.publishing_url, + 'publishing-year': spaCyNLPPipelineModel.publishing_year, + 'title': spaCyNLPPipelineModel.title, + 'title-2': spaCyNLPPipelineModel.title, + 'version': spaCyNLPPipelineModel.version + }; + }, + sortArgs: ['creation-date', {order: 'desc'}], + valueNames: [ + {data: ['id']}, + {data: ['creation-date']}, + {name: 'publisher-url', attr: 'href'}, + {name: 'publishing-url', attr: 'href'}, + 'description', + 'publisher', + 'publishing-url-2', + 'publishing-year', + 'title', + 'title-2', + 'version' + ] + }; + + constructor(listElement, options = {}) { + super(listElement, {...SpacyNLPPipelineModelList.options, ...options}); + } + + init (user) { + this._init(user.spacy_nlp_pipeline_models); + } + + onClick(event) { + let actionButtonElement = event.target.closest('.action-button'); + let action = actionButtonElement === null ? 'view' : actionButtonElement.dataset.action; + let spaCyNLPPipelineModelElement = event.target.closest('tr'); + let spaCyNLPPipelineModelId = spaCyNLPPipelineModelElement.dataset.id; + switch (action) { + case 'delete-request': { + Utils.deleteSpaCyNLPPipelineModelRequest(this.userId, spaCyNLPPipelineModelId); + break; + } + case 'view': { + window.location.href = `/contributions/spacy-nlp-pipeline-models/${spaCyNLPPipelineModelId}`; + break; + } + default: { + break; + } + } + } +} diff --git a/app/static/js/RessourceLists/TesseractOCRModelList.js b/app/static/js/RessourceLists/TesseractOCRModelList.js deleted file mode 100644 index 782f5d7e..00000000 --- a/app/static/js/RessourceLists/TesseractOCRModelList.js +++ /dev/null @@ -1,76 +0,0 @@ -class TesseractOCRModelList { - constructor () { - - this.elements = { - tesseractOCRModelList: document.querySelector('#tesseract-ocr-model-list'), - deleteButtons: document.querySelectorAll('.delete-button'), - editButtons: document.querySelectorAll('.edit-button'), - - } - } - - init () { - let userId = this.elements.tesseractOCRModelList.dataset.userId; - - for (let deleteButton of this.elements.deleteButtons) { - deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); - } - - for (let editButton of this.elements.editButtons) { - editButton.addEventListener('click', () => {this.editModel(editButton);}); - } - } - - deleteModel(deleteButton, userId) { - return new Promise((resolve, reject) => { - let modelId = deleteButton.dataset.modelId; - let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId]; - let modalElement = Utils.elementFromString( - ` - - ` - ); - document.querySelector('#modals').appendChild(modalElement); - let modal = M.Modal.init( - modalElement, - { - dismissible: false, - onCloseEnd: () => { - modal.destroy(); - modalElement.remove(); - } - } - ); - let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); - confirmElement.addEventListener('click', (event) => { - let modelTitle = model.title; - fetch(`/contributions/edit-tesseract-model/${modelId}`, {method: 'DELETE'}) - .then( - (response) => { - app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); - resolve(response); - }, - (response) => { - if (response.status === 403) {app.flash('Forbidden', 'error');} - if (response.status === 404) {app.flash('Not Found', 'error');} - reject(response); - } - ); - }); - modal.open(); - }); - } - - editModel(editButton) { - window.location.href = `/contributions/edit-tesseract-model/${editButton.dataset.modelId}`; - } -} diff --git a/app/static/js/RessourceLists/TesseractOCRPipelineModelList.js b/app/static/js/RessourceLists/TesseractOCRPipelineModelList.js new file mode 100644 index 00000000..36dea105 --- /dev/null +++ b/app/static/js/RessourceLists/TesseractOCRPipelineModelList.js @@ -0,0 +1,99 @@ +class TesseractOCRPipelineModelList extends RessourceList { + static autoInit() { + for (let tesseractOCRPipelineModelListElement of document.querySelectorAll('.tesseract-ocr-pipeline-model-list:not(.no-autoinit)')) { + new TesseractOCRPipelineModelList(tesseractOCRPipelineModelListElement); + } + } + + static options = { + initialHtmlGenerator: (id) => { + return ` +
      + search + + +
      + + + + + + + + + + +
      TitleDescriptionBiblio
      +
        + `.trim(); + }, + item: ` + + + + (), , + + delete + send + + + `.trim(), + ressourceMapper: (tesseractOCRPipelineModel) => { + return { + 'id': tesseractOCRPipelineModel.id, + 'creation-date': tesseractOCRPipelineModel.creation_date, + 'description': tesseractOCRPipelineModel.description, + 'publisher': tesseractOCRPipelineModel.publisher, + 'publisher-url': tesseractOCRPipelineModel.publisher_url, + 'publishing-url': tesseractOCRPipelineModel.publishing_url, + 'publishing-url-2': tesseractOCRPipelineModel.publishing_url, + 'publishing-year': tesseractOCRPipelineModel.publishing_year, + 'title': tesseractOCRPipelineModel.title, + 'title-2': tesseractOCRPipelineModel.title, + 'version': tesseractOCRPipelineModel.version + }; + }, + sortArgs: ['creation-date', {order: 'desc'}], + valueNames: [ + {data: ['id']}, + {data: ['creation-date']}, + {name: 'publisher-url', attr: 'href'}, + {name: 'publishing-url', attr: 'href'}, + 'description', + 'publisher', + 'publishing-url-2', + 'publishing-year', + 'title', + 'title-2', + 'version' + ] + }; + + constructor(listElement, options = {}) { + super(listElement, {...TesseractOCRPipelineModelList.options, ...options}); + } + + init (user) { + this._init(user.tesseract_ocr_pipeline_models); + } + + onClick(event) { + let actionButtonElement = event.target.closest('.action-button'); + let action = actionButtonElement === null ? 'view' : actionButtonElement.dataset.action; + let tesseractOCRPipelineModelElement = event.target.closest('tr'); + let tesseractOCRPipelineModelId = tesseractOCRPipelineModelElement.dataset.id; + switch (action) { + case 'delete-request': { + Utils.deleteTesseractOCRPipelineModelRequest(this.userId, tesseractOCRPipelineModelId); + break; + } + case 'view': { + window.location.href = `/contributions/tesseract-ocr-pipeline-models/${tesseractOCRPipelineModelId}`; + break; + } + default: { + break; + } + } + } +} diff --git a/app/static/js/Utils.js b/app/static/js/Utils.js index 2e7cbd4c..e6076e43 100644 --- a/app/static/js/Utils.js +++ b/app/static/js/Utils.js @@ -84,8 +84,8 @@ class Utils { `