diff --git a/app/SpaCyNLPPipelineModel.defaults.yml b/app/SpaCyNLPPipelineModel.defaults.yml index 576f85e4..055c6181 100644 --- a/app/SpaCyNLPPipelineModel.defaults.yml +++ b/app/SpaCyNLPPipelineModel.defaults.yml @@ -1,10 +1,178 @@ -- title: 'de_core_news_md-3.4.0' - description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' - url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' +- title: 'Catalan' + description: 'Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/ca_core_news_md-3.2.0/ca_core_news_md-3.2.0.tar.gz' publisher: 'Explosion' publisher_url: 'https://github.com/explosion' - publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' - publishing_year: 2022 - version: '3.4.0' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ca_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'ca_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'German' + description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'de_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Greek' + description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/el_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'el_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'English' + description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'en_core_web_md' + version: '3.2.0' compatible_service_versions: - '0.1.0' + - 'test' +- title: 'Spanish' + description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/es_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'es_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'French' + description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.2.0/fr_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/fr_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'fr_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Italian' + description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.2.0/it_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/it_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'it_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Polish' + description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.2.0/pl_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/pl_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'pl_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Russian' + description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' + url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.2.0/ru_core_news_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'ru_core_news_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' +- title: 'Chinese' + description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' + url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.2.0/zh_core_web_md-3.2.0.tar.gz' + publisher: 'Explosion' + publisher_url: 'https://github.com/explosion' + publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.2.0' + publishing_year: 2021 + pipeline_name: 'zh_core_web_md' + version: '3.2.0' + compatible_service_versions: + - '0.1.0' + +# - title: 'de_core_news_md-3.4.0' +# description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' +# url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'de_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'en_core_web_md-3.4.1' +# description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1' +# publishing_year: 2022 +# pipeline_name: 'en_core_web_md' +# version: '3.4.1' +# compatible_service_versions: +# - '0.1.0' +# - title: 'uk_core_news_md-3.4.0' +# description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'uk_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'zh_core_web_md-3.4.0' +# description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' +# url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'zh_core_web_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'ru_core_news_md-3.4.0' +# description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' +# publisher: 'Explosion' +# publisher_url: 'https://github.com/explosion' +# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0' +# publishing_year: 2022 +# pipeline_name: 'ru_core_news_md' +# version: '3.4.0' +# compatible_service_versions: +# - '0.1.0' +# - title: 'la_core_cltk_sm-0.1.0' +# description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' +# url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz' +# publisher: 'DIY Classics' +# publisher_url: 'https://github.com/diyclassics/' +# publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm' +# publishing_year: 2022 +# pipeline_name: 'la_core_cltk_sm' +# version: '0.1.0' +# compatible_service_versions: +# - '0.1.0' diff --git a/app/contributions/forms.py b/app/contributions/forms.py index 44279a1d..c0611e17 100644 --- a/app/contributions/forms.py +++ b/app/contributions/forms.py @@ -1,3 +1,4 @@ +from flask import current_app from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired from wtforms import ( @@ -5,13 +6,13 @@ from wtforms import ( StringField, SubmitField, SelectMultipleField, - IntegerField + IntegerField, + ValidationError ) from wtforms.validators import InputRequired, Length from app.services import SERVICES - -class TesseractOCRModelContributionForm(FlaskForm): +class CreateContributionBaseForm(FlaskForm): title = StringField( 'Title', validators=[InputRequired(), Length(max=64)] @@ -24,9 +25,6 @@ class TesseractOCRModelContributionForm(FlaskForm): 'Version', validators=[InputRequired(), Length(max=16)] ) - compatible_service_versions = SelectMultipleField( - 'Compatible service versions' - ) publisher = StringField( 'Publisher', validators=[InputRequired(), Length(max=128)] @@ -43,10 +41,35 @@ class TesseractOCRModelContributionForm(FlaskForm): 'Publishing year', validators=[InputRequired()] ) - shared = BooleanField('Shared', validators=[InputRequired()]) - model_file = FileField('File',validators=[FileRequired()]) + shared = BooleanField( + 'Shared' + ) submit = SubmitField() +class EditForm(CreateContributionBaseForm): + def prefill(self, model_file): + ''' Pre-fill the form with data of an exististing corpus file ''' + self.title.data = model_file.title + self.description.data = model_file.description + self.publisher.data = model_file.publisher + self.publishing_year.data = model_file.publishing_year + self.publisher_url.data = model_file.publisher_url + self.publishing_url.data = model_file.publishing_url + self.version.data = model_file.version + self.shared.data = model_file.shared + +class TesseractOCRModelContributionForm(CreateContributionBaseForm): + tesseract_model_file = FileField( + 'File', + validators=[FileRequired()] + ) + compatible_service_versions = SelectMultipleField( + 'Compatible service versions' + ) + def validate_tesseract_model_file(self, field): + current_app.logger.warning(field.data.filename) + if not field.data.filename.lower().endswith('.traineddata'): + raise ValidationError('traineddata files only!') def __init__(self, *args, **kwargs): service_manifest = SERVICES['tesseract-ocr-pipeline'] @@ -56,3 +79,25 @@ class TesseractOCRModelContributionForm(FlaskForm): (x, x) for x in service_manifest['versions'].keys() ] self.compatible_service_versions.default = '' + +class SpacyNLPModelContributionForm(CreateContributionBaseForm): + spacy_model_file = FileField( + 'File', + validators=[FileRequired()] + ) + compatible_service_versions = SelectMultipleField( + 'Compatible service versions' + ) + def validate_spacy_model_file(self, field): + current_app.logger.warning(field.data.filename) + if not field.data.filename.lower().endswith('.tar.gz'): + raise ValidationError('.tar.gz files only!') + + def __init__(self, *args, **kwargs): + service_manifest = SERVICES['spacy-nlp-pipeline'] + super().__init__(*args, **kwargs) + self.compatible_service_versions.choices = [('', 'Choose your option')] + self.compatible_service_versions.choices += [ + (x, x) for x in service_manifest['versions'].keys() + ] + self.compatible_service_versions.default = '' diff --git a/app/contributions/routes.py b/app/contributions/routes.py index 287eda18..c20774d7 100644 --- a/app/contributions/routes.py +++ b/app/contributions/routes.py @@ -1,10 +1,11 @@ -from flask import abort, flash, Markup, render_template, url_for -from flask_login import login_required +from flask import abort, current_app, flash, Markup, render_template, url_for +from flask_login import login_required, current_user +from threading import Thread from app import db -from app.decorators import permission_required -from app.models import TesseractOCRPipelineModel, Permission +from app.decorators import admin_required, permission_required +from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel from . import bp -from .forms import TesseractOCRModelContributionForm +from .forms import TesseractOCRModelContributionForm, EditForm, SpacyNLPModelContributionForm @bp.before_request @@ -14,13 +15,81 @@ def before_request(): pass -@bp.route('') +@bp.route('/') +@login_required +@admin_required def contributions(): - pass + tesseract_ocr_user_models = [ + x for x in current_user.tesseract_ocr_pipeline_models + ] + spacy_nlp_user_models = [ + x for x in current_user.spacy_nlp_pipeline_models + ] + return render_template( + 'contributions/contribution_overview.html.j2', + tesseract_ocr_user_models=tesseract_ocr_user_models, + spacy_nlp_user_models=spacy_nlp_user_models, + userId = current_user.hashid, + title='Contribution Overview' + ) +@bp.route('/edit-tesseract-model/', methods=['GET', 'POST']) +@login_required +def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id): + tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404( + tesseract_ocr_pipeline_model_id + ) + form = EditForm(prefix='tesseract-ocr-model-edit-form') + if form.validate_on_submit(): + if tesseract_ocr_pipeline_model.title != form.title.data: + tesseract_ocr_pipeline_model.title = form.title.data + if tesseract_ocr_pipeline_model.description != form.description.data: + tesseract_ocr_pipeline_model.description = form.description.data + if tesseract_ocr_pipeline_model.publisher != form.publisher.data: + tesseract_ocr_pipeline_model.publisher = form.publisher.data + if tesseract_ocr_pipeline_model.publishing_year != form.publishing_year.data: + tesseract_ocr_pipeline_model.publishing_year = form.publishing_year.data + if tesseract_ocr_pipeline_model.publisher_url != form.publisher_url.data: + tesseract_ocr_pipeline_model.publisher_url = form.publisher_url.data + if tesseract_ocr_pipeline_model.publishing_url != form.publishing_url.data: + tesseract_ocr_pipeline_model.publishing_url = form.publishing_url.data + if tesseract_ocr_pipeline_model.version != form.version.data: + tesseract_ocr_pipeline_model.version = form.version.data + if tesseract_ocr_pipeline_model.shared != form.shared.data: + tesseract_ocr_pipeline_model.shared = form.shared.data + db.session.commit() + message = Markup(f'Model "{tesseract_ocr_pipeline_model.title}" updated') + flash(message, category='corpus') + return {}, 201, {'Location': url_for('contributions.contributions')} + form.prefill(tesseract_ocr_pipeline_model) + return render_template( + 'contributions/tesseract_ocr_pipeline_model.html.j2', + tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model, + form=form, + title='Edit your Tesseract OCR model' + ) -@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST']) -def tesseract_ocr_pipeline_models(): +@bp.route('/edit-tesseract-model/', methods=['DELETE']) +@login_required +def delete_tesseract_model(tesseract_ocr_pipeline_model_id): + def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id): + with app.app_context(): + model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id) + model.delete() + db.session.commit() + + model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id) + if not (model.user == current_user or current_user.is_administrator()): + abort(403) + thread = Thread( + target=_delete_tesseract_model, + args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id) + ) + thread.start() + return {}, 202 + +@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST']) +def add_tesseract_ocr_pipeline_model(): form = TesseractOCRModelContributionForm( prefix='contribute-tesseract-ocr-pipeline-model-form' ) @@ -30,7 +99,7 @@ def tesseract_ocr_pipeline_models(): return response, 400 try: tesseract_ocr_model = TesseractOCRPipelineModel.create( - form.file.data, + form.tesseract_model_file.data, compatible_service_versions=form.compatible_service_versions.data, description=form.description.data, publisher=form.publisher.data, @@ -39,7 +108,8 @@ def tesseract_ocr_pipeline_models(): publishing_year=form.publishing_year.data, shared=form.shared.data, title=form.title.data, - version=form.version.data + version=form.version.data, + user=current_user ) except OSError: abort(500) @@ -47,8 +117,105 @@ def tesseract_ocr_pipeline_models(): message = Markup(f'Model "{tesseract_ocr_model.title}" created') flash(message) return {}, 201, {'Location': url_for('contributions.contributions')} + tesseract_ocr_pipeline_models = [ + x for x in TesseractOCRPipelineModel.query.all() + ] + return render_template( - 'contributions/contribute.html.j2', + 'contributions/contribute_tesseract_ocr_models.html.j2', form=form, - title='Contribution' + tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models, + title='Tesseract OCR Model Contribution' + ) + +@bp.route('/edit-spacy-model//', methods=['GET', 'POST']) +@login_required +def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id): + spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404( + spacy_nlp_pipeline_model_id + ) + form = EditForm(prefix='spacy-nlp-model-edit-form') + if form.validate_on_submit(): + if spacy_nlp_pipeline_model.title != form.title.data: + spacy_nlp_pipeline_model.title = form.title.data + if spacy_nlp_pipeline_model.description != form.description.data: + spacy_nlp_pipeline_model.description = form.description.data + if spacy_nlp_pipeline_model.publisher != form.publisher.data: + spacy_nlp_pipeline_model.publisher = form.publisher.data + if spacy_nlp_pipeline_model.publishing_year != form.publishing_year.data: + spacy_nlp_pipeline_model.publishing_year = form.publishing_year.data + if spacy_nlp_pipeline_model.publisher_url != form.publisher_url.data: + spacy_nlp_pipeline_model.publisher_url = form.publisher_url.data + if spacy_nlp_pipeline_model.publishing_url != form.publishing_url.data: + spacy_nlp_pipeline_model.publishing_url = form.publishing_url.data + if spacy_nlp_pipeline_model.version != form.version.data: + spacy_nlp_pipeline_model.version = form.version.data + if spacy_nlp_pipeline_model.shared != form.shared.data: + spacy_nlp_pipeline_model.shared = form.shared.data + db.session.commit() + message = Markup(f'Model "{spacy_nlp_pipeline_model.title}" updated') + flash(message, category='corpus') + return {}, 201, {'Location': url_for('contributions.contributions')} + form.prefill(spacy_nlp_pipeline_model) + return render_template( + 'contributions/spacy_nlp_pipeline_model.html.j2', + spacy_nlp_pipeline_model=spacy_nlp_pipeline_model, + form=form, + title='Edit your spaCy NLP model' + ) + +@bp.route('/edit-spacy-model/', methods=['DELETE']) +@login_required +def delete_spacy_model(spacy_nlp_pipeline_model_id): + def _delete_spacy_model(app, spacy_nlp_pipeline_model_id): + with app.app_context(): + model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id) + model.delete() + db.session.commit() + + model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id) + if not (model.user == current_user or current_user.is_administrator()): + abort(403) + thread = Thread( + target=_delete_spacy_model, + args=(current_app._get_current_object(), spacy_nlp_pipeline_model_id) + ) + thread.start() + return {}, 202 + +@bp.route('/add-spacy-nlp-pipeline-model', methods=['GET', 'POST']) +def add_spacy_nlp_pipeline_model(): + form = SpacyNLPModelContributionForm(prefix='contribute-spacy-nlp-pipeline-model-form') + if form.is_submitted(): + if not form.validate(): + response = {'errors': form.errors} + return response, 400 + try: + spacy_nlp_model = SpaCyNLPPipelineModel.create( + form.spacy_model_file.data, + compatible_service_versions=form.compatible_service_versions.data, + description=form.description.data, + publisher=form.publisher.data, + publisher_url=form.publisher_url.data, + publishing_url=form.publishing_url.data, + publishing_year=form.publishing_year.data, + shared=form.shared.data, + title=form.title.data, + version=form.version.data, + user=current_user + ) + except OSError: + abort(500) + db.session.commit() + message = Markup(f'Model "{spacy_nlp_model.title}" created') + flash(message) + return {}, 201, {'Location': url_for('contributions.contributions')} + spacy_nlp_pipeline_models = [ + x for x in SpaCyNLPPipelineModel.query.all() + ] + return render_template( + 'contributions/contribute_spacy_nlp_models.html.j2', + form=form, + spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, + title='spaCy NLP Model Contribution' ) diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index 32def73d..ab17e760 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -3,7 +3,8 @@ from app.models import ( Job, JobResult, JobStatus, - TesseractOCRPipelineModel + TesseractOCRPipelineModel, + SpaCyNLPPipelineModel ) from datetime import datetime from flask import current_app @@ -52,7 +53,12 @@ def _create_job_service(job): command += f' --mem-mb {mem_mb}' command += f' --n-cores {n_cores}' if job.service == 'spacy-nlp-pipeline': - command += f' -m {job.service_args["model"]}' + model_id = hashids.decode(job.service_args['model']) + model = SpaCyNLPPipelineModel.query.get(model_id) + if model is None: + job.status = JobStatus.FAILED + return + command += f' -m {model.pipeline_name}' if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']: command += ' --check-encoding' elif job.service == 'tesseract-ocr-pipeline': @@ -103,6 +109,16 @@ def _create_job_service(job): models_mount_target = f'/usr/local/share/tessdata/{model.filename}' models_mount = f'{models_mount_source}:{models_mount_target}:ro' mounts.append(models_mount) + elif job.service == 'spacy-nlp-pipeline': + model_id = hashids.decode(job.service_args['model']) + model = SpaCyNLPPipelineModel.query.get(model_id) + if model is None: + job.status = JobStatus.FAILED + return + models_mount_source = model.path + models_mount_target = f'/usr/local/share/spacy/models/{model.filename}' + models_mount = f'{models_mount_source}:{models_mount_target}:ro' + mounts.append(models_mount) ''' ### Output mount ### ''' output_mount_source = os.path.join(job.path, 'results') output_mount_target = '/output' @@ -128,6 +144,8 @@ def _create_job_service(job): ) ''' ## Restart policy ## ''' restart_policy = docker.types.RestartPolicy() + print(command) + print(mounts) try: docker_client.services.create( image, diff --git a/app/models.py b/app/models.py index cc5d60ce..93a23461 100644 --- a/app/models.py +++ b/app/models.py @@ -520,6 +520,10 @@ class User(HashidMixin, UserMixin, db.Model): x.hashid: x.to_json(relationships=True) for x in self.tesseract_ocr_pipeline_models } + _json['spacy_nlp_pipeline_models'] = { + x.hashid: x.to_json(relationships=True) + for x in self.spacy_nlp_pipeline_models + } return _json class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): @@ -603,6 +607,13 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): pbar.close() db.session.commit() + def delete(self): + try: + os.remove(self.path) + except OSError as e: + current_app.logger.error(e) + db.session.delete(self) + def to_json(self, backrefs=False, relationships=False): _json = { 'id': self.hashid, @@ -636,6 +647,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): publisher_url = db.Column(db.String(512)) publishing_url = db.Column(db.String(512)) publishing_year = db.Column(db.Integer) + pipeline_name = db.Column(db.String(64)) shared = db.Column(db.Boolean, default=False) # Backrefs: user: User @@ -668,6 +680,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): model.shared = True model.title = m['title'] model.version = m['version'] + model.pipeline_name = m['pipeline_name'] continue model = SpaCyNLPPipelineModel( compatible_service_versions=m['compatible_service_versions'], @@ -679,12 +692,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): shared=True, title=m['title'], user=nopaque_user, - version=m['version'] + version=m['version'], + pipeline_name=m['pipeline_name'] ) db.session.add(model) db.session.flush(objects=[model]) db.session.refresh(model) - model.filename = f'{model.id}.traineddata' + model.filename = m['url'].split('/')[-1] r = requests.get(m['url'], stream=True) pbar = tqdm( desc=f'{model.title} ({model.filename})', @@ -701,6 +715,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): f.write(chunk) pbar.close() db.session.commit() + + def delete(self): + try: + os.remove(self.path) + except OSError as e: + current_app.logger.error(e) + db.session.delete(self) def to_json(self, backrefs=False, relationships=False): _json = { @@ -711,6 +732,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model): 'publisher_url': self.publisher_url, 'publishing_url': self.publishing_url, 'publishing_year': self.publishing_year, + 'pipeline_name': self.pipeline_name, 'shared': self.shared, 'title': self.title, **self.file_mixin_to_json() @@ -1023,11 +1045,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model): def delete(self): try: os.remove(self.path) - except OSError: - current_app.logger.error( - f'Removing {self.path} led to an OSError!' - ) - pass + except OSError as e: + current_app.logger.error(e) db.session.delete(self) self.corpus.status = CorpusStatus.UNPREPARED diff --git a/app/services/forms.py b/app/services/forms.py index 5c0af906..58bab164 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -10,7 +10,7 @@ from wtforms import ( ValidationError ) from wtforms.validators import InputRequired, Length -from app.models import TesseractOCRPipelineModel +from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel from . import SERVICES @@ -73,11 +73,11 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm): if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] models = [ - x for x in TesseractOCRPipelineModel.query.filter().all() + x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all() if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) ] self.model.choices = [('', 'Choose your option')] - self.model.choices += [(x.hashid, x.title) for x in models] + self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version @@ -127,7 +127,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True}) txt = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[InputRequired()]) - + def validate_encoding_detection(self, field): service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data] if field.data: @@ -146,6 +146,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] + print(service_info) if self.encoding_detection.render_kw is None: self.encoding_detection.render_kw = {} self.encoding_detection.render_kw['disabled'] = True @@ -153,8 +154,12 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm): if 'encoding_detection' in service_info['methods']: if 'disabled' in self.encoding_detection.render_kw: del self.encoding_detection.render_kw['disabled'] + models = [ + x for x in SpaCyNLPPipelineModel.query.order_by(SpaCyNLPPipelineModel.title).all() + if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) + ] self.model.choices = [('', 'Choose your option')] - self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa + self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version diff --git a/app/services/routes.py b/app/services/routes.py index b34d0619..4bfca9bb 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -6,7 +6,8 @@ from app.models import ( Job, JobInput, JobStatus, - TesseractOCRPipelineModel + TesseractOCRPipelineModel, + SpaCyNLPPipelineModel ) from . import bp, SERVICES from .forms import ( @@ -172,6 +173,7 @@ def spacy_nlp_pipeline(): if version not in service_manifest['versions']: abort(404) form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version) + spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all() if form.is_submitted(): if not form.validate(): response = {'errors': form.errors} @@ -202,6 +204,7 @@ def spacy_nlp_pipeline(): return render_template( 'services/spacy_nlp_pipeline.html.j2', form=form, + spacy_nlp_pipeline_models=spacy_nlp_pipeline_models, title=service_manifest['name'] ) diff --git a/app/services/services.yml b/app/services/services.yml index e8db1b33..c9d61e08 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -45,16 +45,21 @@ spacy-nlp-pipeline: 0.1.0: methods: - 'encoding_detection' - models: - ca: 'Catalan' - de: 'German' - el: 'Greek' - en: 'English' - es: 'Spanish' - fr: 'French' - it: 'Italian' - pl: 'Polish' - ru: 'Russian' - zh: 'Chinese' + # models: + # ca: 'Catalan' + # de: 'German' + # el: 'Greek' + # en: 'English' + # es: 'Spanish' + # fr: 'French' + # it: 'Italian' + # pl: 'Polish' + # ru: 'Russian' + # zh: 'Chinese' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0' + test: + methods: + - 'encoding_detection' + publishing_year: 2022 + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/' diff --git a/app/static/js/Forms/CreateContributionForm.js b/app/static/js/Forms/CreateContributionForm.js new file mode 100644 index 00000000..e7651ab0 --- /dev/null +++ b/app/static/js/Forms/CreateContributionForm.js @@ -0,0 +1,18 @@ +class CreateContributionForm extends Form { + static autoInit() { + let createContributionFormElements = document.querySelectorAll('.create-contribution-form'); + for (let createContributionFormElement of createContributionFormElements) { + new CreateContributionForm(createContributionFormElement); + } + } + + constructor(formElement) { + super(formElement); + + this.addEventListener('requestLoad', (event) => { + if (event.target.status === 201) { + window.location.href = event.target.getResponseHeader('Location'); + } + }); + } +} diff --git a/app/static/js/Forms/Form.js b/app/static/js/Forms/Form.js index 9a21e986..d93f3e2c 100644 --- a/app/static/js/Forms/Form.js +++ b/app/static/js/Forms/Form.js @@ -1,5 +1,6 @@ class Form { static autoInit() { + CreateContributionForm.autoInit(); CreateCorpusFileForm.autoInit(); CreateJobForm.autoInit(); } diff --git a/app/static/js/RessourceLists/SpacyNLPModelList.js b/app/static/js/RessourceLists/SpacyNLPModelList.js new file mode 100644 index 00000000..0e20191b --- /dev/null +++ b/app/static/js/RessourceLists/SpacyNLPModelList.js @@ -0,0 +1,76 @@ +class SpacyNLPModelList { + constructor () { + + this.elements = { + spacyNLPModelList: document.querySelector('#spacy-nlp-model-list'), + deleteButtons: document.querySelectorAll('.delete-spacy-model-button'), + editButtons: document.querySelectorAll('.edit-spacy-model-button'), + + } + } + + init () { + let userId = this.elements.spacyNLPModelList.dataset.userId; + + for (let deleteButton of this.elements.deleteButtons) { + deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); + } + + for (let editButton of this.elements.editButtons) { + editButton.addEventListener('click', () => {this.editModel(editButton);}); + } + } + + deleteModel(deleteButton, userId) { + return new Promise((resolve, reject) => { + let modelId = deleteButton.dataset.modelId; + let model = app.data.users[userId].spacy_nlp_pipeline_models[modelId]; + let modalElement = Utils.elementFromString( + ` + + ` + ); + document.querySelector('#modals').appendChild(modalElement); + let modal = M.Modal.init( + modalElement, + { + dismissible: false, + onCloseEnd: () => { + modal.destroy(); + modalElement.remove(); + } + } + ); + let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); + confirmElement.addEventListener('click', (event) => { + let modelTitle = model.title; + fetch(`/contributions/edit-spacy-model/${modelId}`, {method: 'DELETE'}) + .then( + (response) => { + app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); + resolve(response); + }, + (response) => { + if (response.status === 403) {app.flash('Forbidden', 'error');} + if (response.status === 404) {app.flash('Not Found', 'error');} + reject(response); + } + ); + }); + modal.open(); + }); + } + + editModel(editButton) { + window.location.href = `/contributions/edit-spacy-model/${editButton.dataset.modelId}`; + } +} diff --git a/app/static/js/RessourceLists/TesseractOCRModelList.js b/app/static/js/RessourceLists/TesseractOCRModelList.js new file mode 100644 index 00000000..782f5d7e --- /dev/null +++ b/app/static/js/RessourceLists/TesseractOCRModelList.js @@ -0,0 +1,76 @@ +class TesseractOCRModelList { + constructor () { + + this.elements = { + tesseractOCRModelList: document.querySelector('#tesseract-ocr-model-list'), + deleteButtons: document.querySelectorAll('.delete-button'), + editButtons: document.querySelectorAll('.edit-button'), + + } + } + + init () { + let userId = this.elements.tesseractOCRModelList.dataset.userId; + + for (let deleteButton of this.elements.deleteButtons) { + deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);}); + } + + for (let editButton of this.elements.editButtons) { + editButton.addEventListener('click', () => {this.editModel(editButton);}); + } + } + + deleteModel(deleteButton, userId) { + return new Promise((resolve, reject) => { + let modelId = deleteButton.dataset.modelId; + let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId]; + let modalElement = Utils.elementFromString( + ` + + ` + ); + document.querySelector('#modals').appendChild(modalElement); + let modal = M.Modal.init( + modalElement, + { + dismissible: false, + onCloseEnd: () => { + modal.destroy(); + modalElement.remove(); + } + } + ); + let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]'); + confirmElement.addEventListener('click', (event) => { + let modelTitle = model.title; + fetch(`/contributions/edit-tesseract-model/${modelId}`, {method: 'DELETE'}) + .then( + (response) => { + app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus'); + resolve(response); + }, + (response) => { + if (response.status === 403) {app.flash('Forbidden', 'error');} + if (response.status === 404) {app.flash('Not Found', 'error');} + reject(response); + } + ); + }); + modal.open(); + }); + } + + editModel(editButton) { + window.location.href = `/contributions/edit-tesseract-model/${editButton.dataset.modelId}`; + } +} diff --git a/app/templates/_scripts.html.j2 b/app/templates/_scripts.html.j2 index ccc32a05..798d2848 100644 --- a/app/templates/_scripts.html.j2 +++ b/app/templates/_scripts.html.j2 @@ -9,6 +9,7 @@ 'js/Forms/Form.js', 'js/Forms/CreateCorpusFileForm.js', 'js/Forms/CreateJobForm.js', + 'js/Forms/CreateContributionForm.js', 'js/CorpusAnalysis/CQiClient.js', 'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/CorpusAnalysis/CorpusAnalysisConcordance.js', @@ -24,6 +25,8 @@ 'js/RessourceLists/JobInputList.js', 'js/RessourceLists/JobResultList.js', 'js/RessourceLists/QueryResultList.js', + 'js/RessourceLists/SpacyNLPModelList.js', + 'js/RessourceLists/TesseractOCRModelList.js', 'js/RessourceLists/UserList.js' %} diff --git a/app/templates/contributions/_breadcrumbs.html.j2 b/app/templates/contributions/_breadcrumbs.html.j2 new file mode 100644 index 00000000..327d0578 --- /dev/null +++ b/app/templates/contributions/_breadcrumbs.html.j2 @@ -0,0 +1,31 @@ +{% set breadcrumbs %} +
  • navigate_next
  • +{% if request.path == url_for('.contributions') %} +
  • Contributions Overview
  • + +{% elif request.path == url_for('.add_tesseract_ocr_pipeline_model') %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • {{ title }}
  • +{% elif request.path == url_for('.add_spacy_nlp_pipeline_model') %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • {{ title }}
  • +{% elif tesseract_ocr_pipeline_model and request.path == url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • + + Edit {{ tesseract_ocr_pipeline_model.title }} + +
  • +{% elif spacy_nlp_pipeline_model and request.path == url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id) %} +
  • Contributions Overview
  • +
  • navigate_next
  • +
  • + + Edit {{ spacy_nlp_pipeline_model.title }} + +
  • +{% endif %} +{% endset %} diff --git a/app/templates/contributions/contribute.html.j2 b/app/templates/contributions/contribute.html.j2 deleted file mode 100644 index 6789e1f8..00000000 --- a/app/templates/contributions/contribute.html.j2 +++ /dev/null @@ -1,32 +0,0 @@ -{% extends "base.html.j2" %} -{% import "materialize/wtf.html.j2" as wtf %} - - -{% block page_content %} -
    -
    -
    -

    {{ title }}

    -

    - In order to add a new model, please fill in the form below. -

    - -
    -
    - {{ form.hidden_tag() }} - {{ wtf.render_field(form.title) }} - {{ wtf.render_field(form.description) }} - {{ wtf.render_field(form.publisher) }} - {{ wtf.render_field(form.publisher_url) }} - {{ wtf.render_field(form.publishing_url) }} - {{ wtf.render_field(form.publishing_year) }} - {{ wtf.render_field(form.shared) }} - {{ wtf.render_field(form.version) }} - {{ wtf.render_field(form.compatible_service_versions) }} - {{ wtf.render_field(form.submit, class_='width-100', material_icon='send') }} - -
    -
    -
    -
    -{% endblock page_content %} \ No newline at end of file diff --git a/app/templates/contributions/contribute_spacy_nlp_models.html.j2 b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 new file mode 100644 index 00000000..b5ab9b0d --- /dev/null +++ b/app/templates/contributions/contribute_spacy_nlp_models.html.j2 @@ -0,0 +1,124 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +

     

    +

     

    + + + +
    +
    + +
    +
    +
    +
    +
    +
    + layersspaCy NLP Models +

    You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.

    +

    Edit already uploaded models

    +

    Information about the already existing models.

    +
    +
    +
    +
    +
    +
    + +
    +

    Add a model

    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.spacy_model_file, accept='.tar.gz', placeholder='Choose a .tar.gz file') }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    + {{ wtf.render_field(form.compatible_service_versions) }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} + +{% block modals %} +{{ super() }} + +{% endblock modals %} diff --git a/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 new file mode 100644 index 00000000..1e50585a --- /dev/null +++ b/app/templates/contributions/contribute_tesseract_ocr_models.html.j2 @@ -0,0 +1,124 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +

     

    +

     

    + + + +
    +
    + +
    +
    +
    +
    +
    +
    + layersTesseract OCR Models +

    You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.

    +

    Information about the already existing models.

    +

    Edit already uploaded models

    +
    +
    +
    +
    +
    +
    + +
    +

    Add a model

    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.tesseract_model_file, accept='.traineddata', placeholder='Choose a .traineddata file') }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    + {{ wtf.render_field(form.compatible_service_versions) }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} + +{% block modals %} +{{ super() }} + +{% endblock modals %} diff --git a/app/templates/contributions/contribution_overview.html.j2 b/app/templates/contributions/contribution_overview.html.j2 new file mode 100644 index 00000000..c6facf9d --- /dev/null +++ b/app/templates/contributions/contribution_overview.html.j2 @@ -0,0 +1,129 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    + + {# Tesseract OCR Models #} +
    +

    My Tesseract OCR Pipeline Models

    +

    Here you can see and edit the models that you have created. You can also create new models.

    + +
    +
    +
    +
    +
    + + + + + + + + + + + {% if tesseract_ocr_user_models|length > 0 %} + {% for m in tesseract_ocr_user_models %} + + + {% if m.description == '' %} + + {% else %} + + {% endif %} + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
    TitleDescriptionBiblio
    {{ m.title }}Description is not available.{{ m.description }}{{ m.publisher }} ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, {{ m.publishing_url }} + delete + edit +
    No models available.
    +
    +
    + +
    +
    +
    +
    + + {# spaCy NLP Models #} +
    +

    My spaCy NLP Pipeline Models

    +

    Here you can see and edit the models that you have created. You can also create new models.

    + +
    +
    +
    +
    +
    + + + + + + + + + + + {% if spacy_nlp_user_models|length > 0 %} + {% for m in spacy_nlp_user_models %} + + + {% if m.description == '' %} + + {% else %} + + {% endif %} + + + + {% endfor %} + {% else %} + + + + {% endif %} + +
    TitleDescriptionBiblio
    {{ m.title }}Description is not available.{{ m.description }}{{ m.publisher }} ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, {{ m.publishing_url }} + delete + edit +
    No models available.
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +{% endblock page_content %} + +{% block scripts %} +{{ super() }} + +{% endblock scripts %} diff --git a/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 new file mode 100644 index 00000000..82fd6862 --- /dev/null +++ b/app/templates/contributions/spacy_nlp_pipeline_model.html.j2 @@ -0,0 +1,56 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} diff --git a/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 b/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 new file mode 100644 index 00000000..4db82349 --- /dev/null +++ b/app/templates/contributions/tesseract_ocr_pipeline_model.html.j2 @@ -0,0 +1,56 @@ +{% extends "base.html.j2" %} +{% import "materialize/wtf.html.j2" as wtf %} +{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %} + +{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %} + +{% block page_content %} +
    +
    +
    +

    {{ title }}

    +
    + +
    +
    +
    +
    + {{ form.hidden_tag() }} +
    +
    + {{ wtf.render_field(form.title, material_icon='title') }} +
    +
    + {{ wtf.render_field(form.description, material_icon='description') }} +
    +
    + {{ wtf.render_field(form.publisher, material_icon='account_balance') }} +
    +
    + {{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }} +
    +
    + {{ wtf.render_field(form.publisher_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.publishing_url, material_icon='link') }} +
    +
    + {{ wtf.render_field(form.version, material_icon='apps') }} +
    +
    +

    +
    + {{ wtf.render_field(form.shared) }} +
    +
    +
    +
    + {{ wtf.render_field(form.submit, material_icon='send') }} +
    +
    +
    +
    +
    +
    +{% endblock page_content %} diff --git a/app/templates/services/spacy_nlp_pipeline.html.j2 b/app/templates/services/spacy_nlp_pipeline.html.j2 index 37f70210..5ad1e97a 100644 --- a/app/templates/services/spacy_nlp_pipeline.html.j2 +++ b/app/templates/services/spacy_nlp_pipeline.html.j2 @@ -70,8 +70,16 @@
    {{ wtf.render_field(form.txt, accept='text/plain', placeholder='Choose a plain text file') }}
    -
    - {{ wtf.render_field(form.model, material_icon='language') }} +
    +
    + language + {{ form.model() }} + {{ form.model.label }} + + help_outline + new_label + +
    {{ wtf.render_field(form.version, material_icon='apps') }} @@ -122,4 +130,35 @@ Cancel
    + + {% endblock modals %} diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 982265bc..b66e968b 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -58,7 +58,8 @@ {{ form.model() }} {{ form.model.label }} - More details about models + help_outline + new_label {% for error in form.model.errors %} {{ error }} diff --git a/migrations/versions/721829b5dd25_.py b/migrations/versions/721829b5dd25_.py new file mode 100644 index 00000000..124ca07d --- /dev/null +++ b/migrations/versions/721829b5dd25_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: 721829b5dd25 +Revises: 31dd42e5ea6f +Create Date: 2022-11-04 13:58:13.008301 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '721829b5dd25' +down_revision = '31dd42e5ea6f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('spacy_nlp_pipeline_models', sa.Column('pipeline_name', sa.String(length=64), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('spacy_nlp_pipeline_models', 'pipeline_name') + # ### end Alembic commands ###