Merge branch 'development' into binarization-threshold

This commit is contained in:
Inga Kirschnick 2022-11-11 15:39:40 +01:00
commit 9502891536
23 changed files with 1248 additions and 88 deletions

View File

@ -1,10 +1,178 @@
- title: 'de_core_news_md-3.4.0' - title: 'Catalan'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' description: 'Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/ca_core_news_md-3.2.0/ca_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion' publisher: 'Explosion'
publisher_url: 'https://github.com/explosion' publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0' publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ca_core_news_md-3.2.0'
publishing_year: 2022 publishing_year: 2021
version: '3.4.0' pipeline_name: 'ca_core_news_md'
version: '3.2.0'
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'de_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/el_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'el_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.2.0'
publishing_year: 2021
pipeline_name: 'en_core_web_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- 'test'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/es_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'es_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.2.0/fr_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/fr_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'fr_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.2.0/it_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/it_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'it_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.2.0/pl_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/pl_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'pl_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.2.0/ru_core_news_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.2.0'
publishing_year: 2021
pipeline_name: 'ru_core_news_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.2.0/zh_core_web_md-3.2.0.tar.gz'
publisher: 'Explosion'
publisher_url: 'https://github.com/explosion'
publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.2.0'
publishing_year: 2021
pipeline_name: 'zh_core_web_md'
version: '3.2.0'
compatible_service_versions:
- '0.1.0'
# - title: 'de_core_news_md-3.4.0'
# description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
# url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
# publisher: 'Explosion'
# publisher_url: 'https://github.com/explosion'
# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/de_core_news_md-3.4.0'
# publishing_year: 2022
# pipeline_name: 'de_core_news_md'
# version: '3.4.0'
# compatible_service_versions:
# - '0.1.0'
# - title: 'en_core_web_md-3.4.1'
# description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
# url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
# publisher: 'Explosion'
# publisher_url: 'https://github.com/explosion'
# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/en_core_web_md-3.4.1'
# publishing_year: 2022
# pipeline_name: 'en_core_web_md'
# version: '3.4.1'
# compatible_service_versions:
# - '0.1.0'
# - title: 'uk_core_news_md-3.4.0'
# description: 'Ukrainian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
# url: 'https://github.com/explosion/spacy-models/releases/download/uk_core_news_md-3.4.0/uk_core_news_md-3.4.0.tar.gz'
# publisher: 'Explosion'
# publisher_url: 'https://github.com/explosion'
# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/uk_core_news_md-3.4.0'
# publishing_year: 2022
# pipeline_name: 'uk_core_news_md'
# version: '3.4.0'
# compatible_service_versions:
# - '0.1.0'
# - title: 'zh_core_web_md-3.4.0'
# description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
# url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
# publisher: 'Explosion'
# publisher_url: 'https://github.com/explosion'
# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/zh_core_web_md-3.4.0'
# publishing_year: 2022
# pipeline_name: 'zh_core_web_md'
# version: '3.4.0'
# compatible_service_versions:
# - '0.1.0'
# - title: 'ru_core_news_md-3.4.0'
# description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
# url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
# publisher: 'Explosion'
# publisher_url: 'https://github.com/explosion'
# publishing_url: 'https://github.com/explosion/spacy-models/releases/tag/ru_core_news_md-3.4.0'
# publishing_year: 2022
# pipeline_name: 'ru_core_news_md'
# version: '3.4.0'
# compatible_service_versions:
# - '0.1.0'
# - title: 'la_core_cltk_sm-0.1.0'
# description: 'Latin pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
# url: 'https://github.com/diyclassics/latin-spacy-models/raw/main/la_core_cltk_sm/la_core_cltk_sm-0.1.0.tar.gz'
# publisher: 'DIY Classics'
# publisher_url: 'https://github.com/diyclassics/'
# publishing_url: 'https://github.com/diyclassics/latin-spacy-models/tree/main/la_core_cltk_sm'
# publishing_year: 2022
# pipeline_name: 'la_core_cltk_sm'
# version: '0.1.0'
# compatible_service_versions:
# - '0.1.0'

View File

@ -1,3 +1,4 @@
from flask import current_app
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired from flask_wtf.file import FileField, FileRequired
from wtforms import ( from wtforms import (
@ -5,13 +6,13 @@ from wtforms import (
StringField, StringField,
SubmitField, SubmitField,
SelectMultipleField, SelectMultipleField,
IntegerField IntegerField,
ValidationError
) )
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.services import SERVICES from app.services import SERVICES
class CreateContributionBaseForm(FlaskForm):
class TesseractOCRModelContributionForm(FlaskForm):
title = StringField( title = StringField(
'Title', 'Title',
validators=[InputRequired(), Length(max=64)] validators=[InputRequired(), Length(max=64)]
@ -24,9 +25,6 @@ class TesseractOCRModelContributionForm(FlaskForm):
'Version', 'Version',
validators=[InputRequired(), Length(max=16)] validators=[InputRequired(), Length(max=16)]
) )
compatible_service_versions = SelectMultipleField(
'Compatible service versions'
)
publisher = StringField( publisher = StringField(
'Publisher', 'Publisher',
validators=[InputRequired(), Length(max=128)] validators=[InputRequired(), Length(max=128)]
@ -43,10 +41,35 @@ class TesseractOCRModelContributionForm(FlaskForm):
'Publishing year', 'Publishing year',
validators=[InputRequired()] validators=[InputRequired()]
) )
shared = BooleanField('Shared', validators=[InputRequired()]) shared = BooleanField(
model_file = FileField('File',validators=[FileRequired()]) 'Shared'
)
submit = SubmitField() submit = SubmitField()
class EditForm(CreateContributionBaseForm):
def prefill(self, model_file):
''' Pre-fill the form with data of an exististing corpus file '''
self.title.data = model_file.title
self.description.data = model_file.description
self.publisher.data = model_file.publisher
self.publishing_year.data = model_file.publishing_year
self.publisher_url.data = model_file.publisher_url
self.publishing_url.data = model_file.publishing_url
self.version.data = model_file.version
self.shared.data = model_file.shared
class TesseractOCRModelContributionForm(CreateContributionBaseForm):
tesseract_model_file = FileField(
'File',
validators=[FileRequired()]
)
compatible_service_versions = SelectMultipleField(
'Compatible service versions'
)
def validate_tesseract_model_file(self, field):
current_app.logger.warning(field.data.filename)
if not field.data.filename.lower().endswith('.traineddata'):
raise ValidationError('traineddata files only!')
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
service_manifest = SERVICES['tesseract-ocr-pipeline'] service_manifest = SERVICES['tesseract-ocr-pipeline']
@ -56,3 +79,25 @@ class TesseractOCRModelContributionForm(FlaskForm):
(x, x) for x in service_manifest['versions'].keys() (x, x) for x in service_manifest['versions'].keys()
] ]
self.compatible_service_versions.default = '' self.compatible_service_versions.default = ''
class SpacyNLPModelContributionForm(CreateContributionBaseForm):
spacy_model_file = FileField(
'File',
validators=[FileRequired()]
)
compatible_service_versions = SelectMultipleField(
'Compatible service versions'
)
def validate_spacy_model_file(self, field):
current_app.logger.warning(field.data.filename)
if not field.data.filename.lower().endswith('.tar.gz'):
raise ValidationError('.tar.gz files only!')
def __init__(self, *args, **kwargs):
service_manifest = SERVICES['spacy-nlp-pipeline']
super().__init__(*args, **kwargs)
self.compatible_service_versions.choices = [('', 'Choose your option')]
self.compatible_service_versions.choices += [
(x, x) for x in service_manifest['versions'].keys()
]
self.compatible_service_versions.default = ''

View File

@ -1,10 +1,11 @@
from flask import abort, flash, Markup, render_template, url_for from flask import abort, current_app, flash, Markup, render_template, url_for
from flask_login import login_required from flask_login import login_required, current_user
from threading import Thread
from app import db from app import db
from app.decorators import permission_required from app.decorators import admin_required, permission_required
from app.models import TesseractOCRPipelineModel, Permission from app.models import Permission, SpaCyNLPPipelineModel, TesseractOCRPipelineModel
from . import bp from . import bp
from .forms import TesseractOCRModelContributionForm from .forms import TesseractOCRModelContributionForm, EditForm, SpacyNLPModelContributionForm
@bp.before_request @bp.before_request
@ -14,13 +15,81 @@ def before_request():
pass pass
@bp.route('') @bp.route('/')
@login_required
@admin_required
def contributions(): def contributions():
pass tesseract_ocr_user_models = [
x for x in current_user.tesseract_ocr_pipeline_models
]
spacy_nlp_user_models = [
x for x in current_user.spacy_nlp_pipeline_models
]
return render_template(
'contributions/contribution_overview.html.j2',
tesseract_ocr_user_models=tesseract_ocr_user_models,
spacy_nlp_user_models=spacy_nlp_user_models,
userId = current_user.hashid,
title='Contribution Overview'
)
@bp.route('/edit-tesseract-model/<hashid:tesseract_ocr_pipeline_model_id>', methods=['GET', 'POST'])
@login_required
def tesseract_ocr_pipeline_model(tesseract_ocr_pipeline_model_id):
tesseract_ocr_pipeline_model = TesseractOCRPipelineModel.query.get_or_404(
tesseract_ocr_pipeline_model_id
)
form = EditForm(prefix='tesseract-ocr-model-edit-form')
if form.validate_on_submit():
if tesseract_ocr_pipeline_model.title != form.title.data:
tesseract_ocr_pipeline_model.title = form.title.data
if tesseract_ocr_pipeline_model.description != form.description.data:
tesseract_ocr_pipeline_model.description = form.description.data
if tesseract_ocr_pipeline_model.publisher != form.publisher.data:
tesseract_ocr_pipeline_model.publisher = form.publisher.data
if tesseract_ocr_pipeline_model.publishing_year != form.publishing_year.data:
tesseract_ocr_pipeline_model.publishing_year = form.publishing_year.data
if tesseract_ocr_pipeline_model.publisher_url != form.publisher_url.data:
tesseract_ocr_pipeline_model.publisher_url = form.publisher_url.data
if tesseract_ocr_pipeline_model.publishing_url != form.publishing_url.data:
tesseract_ocr_pipeline_model.publishing_url = form.publishing_url.data
if tesseract_ocr_pipeline_model.version != form.version.data:
tesseract_ocr_pipeline_model.version = form.version.data
if tesseract_ocr_pipeline_model.shared != form.shared.data:
tesseract_ocr_pipeline_model.shared = form.shared.data
db.session.commit()
message = Markup(f'Model "<a href="contribute/{tesseract_ocr_pipeline_model.hashid}">{tesseract_ocr_pipeline_model.title}</a>" updated')
flash(message, category='corpus')
return {}, 201, {'Location': url_for('contributions.contributions')}
form.prefill(tesseract_ocr_pipeline_model)
return render_template(
'contributions/tesseract_ocr_pipeline_model.html.j2',
tesseract_ocr_pipeline_model=tesseract_ocr_pipeline_model,
form=form,
title='Edit your Tesseract OCR model'
)
@bp.route('/tesseract-ocr-pipeline-models', methods=['GET', 'POST']) @bp.route('/edit-tesseract-model/<hashid:tesseract_ocr_pipeline_model_id>', methods=['DELETE'])
def tesseract_ocr_pipeline_models(): @login_required
def delete_tesseract_model(tesseract_ocr_pipeline_model_id):
def _delete_tesseract_model(app, tesseract_ocr_pipeline_model_id):
with app.app_context():
model = TesseractOCRPipelineModel.query.get(tesseract_ocr_pipeline_model_id)
model.delete()
db.session.commit()
model = TesseractOCRPipelineModel.query.get_or_404(tesseract_ocr_pipeline_model_id)
if not (model.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_tesseract_model,
args=(current_app._get_current_object(), tesseract_ocr_pipeline_model_id)
)
thread.start()
return {}, 202
@bp.route('/add-tesseract-ocr-pipeline-model', methods=['GET', 'POST'])
def add_tesseract_ocr_pipeline_model():
form = TesseractOCRModelContributionForm( form = TesseractOCRModelContributionForm(
prefix='contribute-tesseract-ocr-pipeline-model-form' prefix='contribute-tesseract-ocr-pipeline-model-form'
) )
@ -30,7 +99,7 @@ def tesseract_ocr_pipeline_models():
return response, 400 return response, 400
try: try:
tesseract_ocr_model = TesseractOCRPipelineModel.create( tesseract_ocr_model = TesseractOCRPipelineModel.create(
form.file.data, form.tesseract_model_file.data,
compatible_service_versions=form.compatible_service_versions.data, compatible_service_versions=form.compatible_service_versions.data,
description=form.description.data, description=form.description.data,
publisher=form.publisher.data, publisher=form.publisher.data,
@ -39,7 +108,8 @@ def tesseract_ocr_pipeline_models():
publishing_year=form.publishing_year.data, publishing_year=form.publishing_year.data,
shared=form.shared.data, shared=form.shared.data,
title=form.title.data, title=form.title.data,
version=form.version.data version=form.version.data,
user=current_user
) )
except OSError: except OSError:
abort(500) abort(500)
@ -47,8 +117,105 @@ def tesseract_ocr_pipeline_models():
message = Markup(f'Model "{tesseract_ocr_model.title}" created') message = Markup(f'Model "{tesseract_ocr_model.title}" created')
flash(message) flash(message)
return {}, 201, {'Location': url_for('contributions.contributions')} return {}, 201, {'Location': url_for('contributions.contributions')}
tesseract_ocr_pipeline_models = [
x for x in TesseractOCRPipelineModel.query.all()
]
return render_template( return render_template(
'contributions/contribute.html.j2', 'contributions/contribute_tesseract_ocr_models.html.j2',
form=form, form=form,
title='Contribution' tesseract_ocr_pipeline_models=tesseract_ocr_pipeline_models,
title='Tesseract OCR Model Contribution'
)
@bp.route('/edit-spacy-model//<hashid:spacy_nlp_pipeline_model_id>', methods=['GET', 'POST'])
@login_required
def spacy_nlp_pipeline_model(spacy_nlp_pipeline_model_id):
spacy_nlp_pipeline_model = SpaCyNLPPipelineModel.query.get_or_404(
spacy_nlp_pipeline_model_id
)
form = EditForm(prefix='spacy-nlp-model-edit-form')
if form.validate_on_submit():
if spacy_nlp_pipeline_model.title != form.title.data:
spacy_nlp_pipeline_model.title = form.title.data
if spacy_nlp_pipeline_model.description != form.description.data:
spacy_nlp_pipeline_model.description = form.description.data
if spacy_nlp_pipeline_model.publisher != form.publisher.data:
spacy_nlp_pipeline_model.publisher = form.publisher.data
if spacy_nlp_pipeline_model.publishing_year != form.publishing_year.data:
spacy_nlp_pipeline_model.publishing_year = form.publishing_year.data
if spacy_nlp_pipeline_model.publisher_url != form.publisher_url.data:
spacy_nlp_pipeline_model.publisher_url = form.publisher_url.data
if spacy_nlp_pipeline_model.publishing_url != form.publishing_url.data:
spacy_nlp_pipeline_model.publishing_url = form.publishing_url.data
if spacy_nlp_pipeline_model.version != form.version.data:
spacy_nlp_pipeline_model.version = form.version.data
if spacy_nlp_pipeline_model.shared != form.shared.data:
spacy_nlp_pipeline_model.shared = form.shared.data
db.session.commit()
message = Markup(f'Model "<a href="contribute/{spacy_nlp_pipeline_model.hashid}">{spacy_nlp_pipeline_model.title}</a>" updated')
flash(message, category='corpus')
return {}, 201, {'Location': url_for('contributions.contributions')}
form.prefill(spacy_nlp_pipeline_model)
return render_template(
'contributions/spacy_nlp_pipeline_model.html.j2',
spacy_nlp_pipeline_model=spacy_nlp_pipeline_model,
form=form,
title='Edit your spaCy NLP model'
)
@bp.route('/edit-spacy-model/<hashid:spacy_nlp_pipeline_model_id>', methods=['DELETE'])
@login_required
def delete_spacy_model(spacy_nlp_pipeline_model_id):
def _delete_spacy_model(app, spacy_nlp_pipeline_model_id):
with app.app_context():
model = SpaCyNLPPipelineModel.query.get(spacy_nlp_pipeline_model_id)
model.delete()
db.session.commit()
model = SpaCyNLPPipelineModel.query.get_or_404(spacy_nlp_pipeline_model_id)
if not (model.user == current_user or current_user.is_administrator()):
abort(403)
thread = Thread(
target=_delete_spacy_model,
args=(current_app._get_current_object(), spacy_nlp_pipeline_model_id)
)
thread.start()
return {}, 202
@bp.route('/add-spacy-nlp-pipeline-model', methods=['GET', 'POST'])
def add_spacy_nlp_pipeline_model():
form = SpacyNLPModelContributionForm(prefix='contribute-spacy-nlp-pipeline-model-form')
if form.is_submitted():
if not form.validate():
response = {'errors': form.errors}
return response, 400
try:
spacy_nlp_model = SpaCyNLPPipelineModel.create(
form.spacy_model_file.data,
compatible_service_versions=form.compatible_service_versions.data,
description=form.description.data,
publisher=form.publisher.data,
publisher_url=form.publisher_url.data,
publishing_url=form.publishing_url.data,
publishing_year=form.publishing_year.data,
shared=form.shared.data,
title=form.title.data,
version=form.version.data,
user=current_user
)
except OSError:
abort(500)
db.session.commit()
message = Markup(f'Model "{spacy_nlp_model.title}" created')
flash(message)
return {}, 201, {'Location': url_for('contributions.contributions')}
spacy_nlp_pipeline_models = [
x for x in SpaCyNLPPipelineModel.query.all()
]
return render_template(
'contributions/contribute_spacy_nlp_models.html.j2',
form=form,
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
title='spaCy NLP Model Contribution'
) )

View File

@ -3,7 +3,8 @@ from app.models import (
Job, Job,
JobResult, JobResult,
JobStatus, JobStatus,
TesseractOCRPipelineModel TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
) )
from datetime import datetime from datetime import datetime
from flask import current_app from flask import current_app
@ -52,7 +53,12 @@ def _create_job_service(job):
command += f' --mem-mb {mem_mb}' command += f' --mem-mb {mem_mb}'
command += f' --n-cores {n_cores}' command += f' --n-cores {n_cores}'
if job.service == 'spacy-nlp-pipeline': if job.service == 'spacy-nlp-pipeline':
command += f' -m {job.service_args["model"]}' model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
command += f' -m {model.pipeline_name}'
if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']: if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
command += ' --check-encoding' command += ' --check-encoding'
elif job.service == 'tesseract-ocr-pipeline': elif job.service == 'tesseract-ocr-pipeline':
@ -105,6 +111,16 @@ def _create_job_service(job):
models_mount_target = f'/usr/local/share/tessdata/{model.filename}' models_mount_target = f'/usr/local/share/tessdata/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro' models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount) mounts.append(models_mount)
elif job.service == 'spacy-nlp-pipeline':
model_id = hashids.decode(job.service_args['model'])
model = SpaCyNLPPipelineModel.query.get(model_id)
if model is None:
job.status = JobStatus.FAILED
return
models_mount_source = model.path
models_mount_target = f'/usr/local/share/spacy/models/{model.filename}'
models_mount = f'{models_mount_source}:{models_mount_target}:ro'
mounts.append(models_mount)
''' ### Output mount ### ''' ''' ### Output mount ### '''
output_mount_source = os.path.join(job.path, 'results') output_mount_source = os.path.join(job.path, 'results')
output_mount_target = '/output' output_mount_target = '/output'
@ -130,6 +146,8 @@ def _create_job_service(job):
) )
''' ## Restart policy ## ''' ''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy() restart_policy = docker.types.RestartPolicy()
print(command)
print(mounts)
try: try:
docker_client.services.create( docker_client.services.create(
image, image,

View File

@ -520,6 +520,10 @@ class User(HashidMixin, UserMixin, db.Model):
x.hashid: x.to_json(relationships=True) x.hashid: x.to_json(relationships=True)
for x in self.tesseract_ocr_pipeline_models for x in self.tesseract_ocr_pipeline_models
} }
_json['spacy_nlp_pipeline_models'] = {
x.hashid: x.to_json(relationships=True)
for x in self.spacy_nlp_pipeline_models
}
return _json return _json
class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model): class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
@ -603,6 +607,13 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
pbar.close() pbar.close()
db.session.commit() db.session.commit()
def delete(self):
try:
os.remove(self.path)
except OSError as e:
current_app.logger.error(e)
db.session.delete(self)
def to_json(self, backrefs=False, relationships=False): def to_json(self, backrefs=False, relationships=False):
_json = { _json = {
'id': self.hashid, 'id': self.hashid,
@ -636,6 +647,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
publisher_url = db.Column(db.String(512)) publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512)) publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer) publishing_year = db.Column(db.Integer)
pipeline_name = db.Column(db.String(64))
shared = db.Column(db.Boolean, default=False) shared = db.Column(db.Boolean, default=False)
# Backrefs: user: User # Backrefs: user: User
@ -668,6 +680,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.shared = True model.shared = True
model.title = m['title'] model.title = m['title']
model.version = m['version'] model.version = m['version']
model.pipeline_name = m['pipeline_name']
continue continue
model = SpaCyNLPPipelineModel( model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'], compatible_service_versions=m['compatible_service_versions'],
@ -679,12 +692,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
shared=True, shared=True,
title=m['title'], title=m['title'],
user=nopaque_user, user=nopaque_user,
version=m['version'] version=m['version'],
pipeline_name=m['pipeline_name']
) )
db.session.add(model) db.session.add(model)
db.session.flush(objects=[model]) db.session.flush(objects=[model])
db.session.refresh(model) db.session.refresh(model)
model.filename = f'{model.id}.traineddata' model.filename = m['url'].split('/')[-1]
r = requests.get(m['url'], stream=True) r = requests.get(m['url'], stream=True)
pbar = tqdm( pbar = tqdm(
desc=f'{model.title} ({model.filename})', desc=f'{model.title} ({model.filename})',
@ -702,6 +716,13 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
pbar.close() pbar.close()
db.session.commit() db.session.commit()
def delete(self):
try:
os.remove(self.path)
except OSError as e:
current_app.logger.error(e)
db.session.delete(self)
def to_json(self, backrefs=False, relationships=False): def to_json(self, backrefs=False, relationships=False):
_json = { _json = {
'id': self.hashid, 'id': self.hashid,
@ -711,6 +732,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
'publisher_url': self.publisher_url, 'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url, 'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year, 'publishing_year': self.publishing_year,
'pipeline_name': self.pipeline_name,
'shared': self.shared, 'shared': self.shared,
'title': self.title, 'title': self.title,
**self.file_mixin_to_json() **self.file_mixin_to_json()
@ -1023,11 +1045,8 @@ class CorpusFile(FileMixin, HashidMixin, db.Model):
def delete(self): def delete(self):
try: try:
os.remove(self.path) os.remove(self.path)
except OSError: except OSError as e:
current_app.logger.error( current_app.logger.error(e)
f'Removing {self.path} led to an OSError!'
)
pass
db.session.delete(self) db.session.delete(self)
self.corpus.status = CorpusStatus.UNPREPARED self.corpus.status = CorpusStatus.UNPREPARED

View File

@ -11,7 +11,7 @@ from wtforms import (
ValidationError ValidationError
) )
from wtforms.validators import InputRequired, Length from wtforms.validators import InputRequired, Length
from app.models import TesseractOCRPipelineModel from app.models import TesseractOCRPipelineModel, SpaCyNLPPipelineModel
from . import SERVICES from . import SERVICES
@ -77,11 +77,11 @@ class CreateTesseractOCRPipelineJobForm(CreateJobBaseForm):
if 'disabled' in self.binarization.render_kw: if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled'] del self.binarization.render_kw['disabled']
models = [ models = [
x for x in TesseractOCRPipelineModel.query.filter().all() x for x in TesseractOCRPipelineModel.query.order_by(TesseractOCRPipelineModel.title).all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user) if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
] ]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x.hashid, x.title) for x in models] self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models]
self.model.default = '' self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version self.version.data = version
@ -150,6 +150,7 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
version = kwargs.pop('version', service_manifest['latest_version']) version = kwargs.pop('version', service_manifest['latest_version'])
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
service_info = service_manifest['versions'][version] service_info = service_manifest['versions'][version]
print(service_info)
if self.encoding_detection.render_kw is None: if self.encoding_detection.render_kw is None:
self.encoding_detection.render_kw = {} self.encoding_detection.render_kw = {}
self.encoding_detection.render_kw['disabled'] = True self.encoding_detection.render_kw['disabled'] = True
@ -157,8 +158,12 @@ class CreateSpacyNLPPipelineJobForm(CreateJobBaseForm):
if 'encoding_detection' in service_info['methods']: if 'encoding_detection' in service_info['methods']:
if 'disabled' in self.encoding_detection.render_kw: if 'disabled' in self.encoding_detection.render_kw:
del self.encoding_detection.render_kw['disabled'] del self.encoding_detection.render_kw['disabled']
models = [
x for x in SpaCyNLPPipelineModel.query.order_by(SpaCyNLPPipelineModel.title).all()
if version in x.compatible_service_versions and (x.shared == True or x.user == current_user)
]
self.model.choices = [('', 'Choose your option')] self.model.choices = [('', 'Choose your option')]
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa self.model.choices += [(x.hashid, f'{x.title} [{x.version}]') for x in models]
self.model.default = '' self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version self.version.data = version

View File

@ -6,7 +6,8 @@ from app.models import (
Job, Job,
JobInput, JobInput,
JobStatus, JobStatus,
TesseractOCRPipelineModel TesseractOCRPipelineModel,
SpaCyNLPPipelineModel
) )
from . import bp, SERVICES from . import bp, SERVICES
from .forms import ( from .forms import (
@ -173,6 +174,7 @@ def spacy_nlp_pipeline():
if version not in service_manifest['versions']: if version not in service_manifest['versions']:
abort(404) abort(404)
form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version) form = CreateSpacyNLPPipelineJobForm(prefix='create-job-form', version=version)
spacy_nlp_pipeline_models = SpaCyNLPPipelineModel.query.all()
if form.is_submitted(): if form.is_submitted():
if not form.validate(): if not form.validate():
response = {'errors': form.errors} response = {'errors': form.errors}
@ -203,6 +205,7 @@ def spacy_nlp_pipeline():
return render_template( return render_template(
'services/spacy_nlp_pipeline.html.j2', 'services/spacy_nlp_pipeline.html.j2',
form=form, form=form,
spacy_nlp_pipeline_models=spacy_nlp_pipeline_models,
title=service_manifest['name'] title=service_manifest['name']
) )

View File

@ -45,16 +45,21 @@ spacy-nlp-pipeline:
0.1.0: 0.1.0:
methods: methods:
- 'encoding_detection' - 'encoding_detection'
models: # models:
ca: 'Catalan' # ca: 'Catalan'
de: 'German' # de: 'German'
el: 'Greek' # el: 'Greek'
en: 'English' # en: 'English'
es: 'Spanish' # es: 'Spanish'
fr: 'French' # fr: 'French'
it: 'Italian' # it: 'Italian'
pl: 'Polish' # pl: 'Polish'
ru: 'Russian' # ru: 'Russian'
zh: 'Chinese' # zh: 'Chinese'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
test:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/'

View File

@ -0,0 +1,18 @@
class CreateContributionForm extends Form {
static autoInit() {
let createContributionFormElements = document.querySelectorAll('.create-contribution-form');
for (let createContributionFormElement of createContributionFormElements) {
new CreateContributionForm(createContributionFormElement);
}
}
constructor(formElement) {
super(formElement);
this.addEventListener('requestLoad', (event) => {
if (event.target.status === 201) {
window.location.href = event.target.getResponseHeader('Location');
}
});
}
}

View File

@ -1,5 +1,6 @@
class Form { class Form {
static autoInit() { static autoInit() {
CreateContributionForm.autoInit();
CreateCorpusFileForm.autoInit(); CreateCorpusFileForm.autoInit();
CreateJobForm.autoInit(); CreateJobForm.autoInit();
} }

View File

@ -0,0 +1,76 @@
class SpacyNLPModelList {
constructor () {
this.elements = {
spacyNLPModelList: document.querySelector('#spacy-nlp-model-list'),
deleteButtons: document.querySelectorAll('.delete-spacy-model-button'),
editButtons: document.querySelectorAll('.edit-spacy-model-button'),
}
}
init () {
let userId = this.elements.spacyNLPModelList.dataset.userId;
for (let deleteButton of this.elements.deleteButtons) {
deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);});
}
for (let editButton of this.elements.editButtons) {
editButton.addEventListener('click', () => {this.editModel(editButton);});
}
}
deleteModel(deleteButton, userId) {
return new Promise((resolve, reject) => {
let modelId = deleteButton.dataset.modelId;
let model = app.data.users[userId].spacy_nlp_pipeline_models[modelId];
let modalElement = Utils.elementFromString(
`
<div class="modal">
<div class="modal-content">
<h4>Confirm job deletion</h4>
<p>Do you really want to delete <b>${model.title}</b>? All files will be permanently deleted!</p>
</div>
<div class="modal-footer">
<a class="action-button btn modal-close waves-effect waves-light" data-action="cancel">Cancel</a>
<a class="action-button btn modal-close red waves-effect waves-light" data-action="confirm">Delete</a>
</div>
</div>
`
);
document.querySelector('#modals').appendChild(modalElement);
let modal = M.Modal.init(
modalElement,
{
dismissible: false,
onCloseEnd: () => {
modal.destroy();
modalElement.remove();
}
}
);
let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]');
confirmElement.addEventListener('click', (event) => {
let modelTitle = model.title;
fetch(`/contributions/edit-spacy-model/${modelId}`, {method: 'DELETE'})
.then(
(response) => {
app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus');
resolve(response);
},
(response) => {
if (response.status === 403) {app.flash('Forbidden', 'error');}
if (response.status === 404) {app.flash('Not Found', 'error');}
reject(response);
}
);
});
modal.open();
});
}
editModel(editButton) {
window.location.href = `/contributions/edit-spacy-model/${editButton.dataset.modelId}`;
}
}

View File

@ -0,0 +1,76 @@
class TesseractOCRModelList {
constructor () {
this.elements = {
tesseractOCRModelList: document.querySelector('#tesseract-ocr-model-list'),
deleteButtons: document.querySelectorAll('.delete-button'),
editButtons: document.querySelectorAll('.edit-button'),
}
}
init () {
let userId = this.elements.tesseractOCRModelList.dataset.userId;
for (let deleteButton of this.elements.deleteButtons) {
deleteButton.addEventListener('click', () => {this.deleteModel(deleteButton, userId);});
}
for (let editButton of this.elements.editButtons) {
editButton.addEventListener('click', () => {this.editModel(editButton);});
}
}
deleteModel(deleteButton, userId) {
return new Promise((resolve, reject) => {
let modelId = deleteButton.dataset.modelId;
let model = app.data.users[userId].tesseract_ocr_pipeline_models[modelId];
let modalElement = Utils.elementFromString(
`
<div class="modal">
<div class="modal-content">
<h4>Confirm job deletion</h4>
<p>Do you really want to delete? All files will be permanently deleted!</p>
</div>
<div class="modal-footer">
<a class="action-button btn modal-close waves-effect waves-light" data-action="cancel">Cancel</a>
<a class="action-button btn modal-close red waves-effect waves-light" data-action="confirm">Delete</a>
</div>
</div>
`
);
document.querySelector('#modals').appendChild(modalElement);
let modal = M.Modal.init(
modalElement,
{
dismissible: false,
onCloseEnd: () => {
modal.destroy();
modalElement.remove();
}
}
);
let confirmElement = modalElement.querySelector('.action-button[data-action="confirm"]');
confirmElement.addEventListener('click', (event) => {
let modelTitle = model.title;
fetch(`/contributions/edit-tesseract-model/${modelId}`, {method: 'DELETE'})
.then(
(response) => {
app.flash(`Model "${modelTitle}" marked for deletion`, 'corpus');
resolve(response);
},
(response) => {
if (response.status === 403) {app.flash('Forbidden', 'error');}
if (response.status === 404) {app.flash('Not Found', 'error');}
reject(response);
}
);
});
modal.open();
});
}
editModel(editButton) {
window.location.href = `/contributions/edit-tesseract-model/${editButton.dataset.modelId}`;
}
}

View File

@ -9,6 +9,7 @@
'js/Forms/Form.js', 'js/Forms/Form.js',
'js/Forms/CreateCorpusFileForm.js', 'js/Forms/CreateCorpusFileForm.js',
'js/Forms/CreateJobForm.js', 'js/Forms/CreateJobForm.js',
'js/Forms/CreateContributionForm.js',
'js/CorpusAnalysis/CQiClient.js', 'js/CorpusAnalysis/CQiClient.js',
'js/CorpusAnalysis/CorpusAnalysisApp.js', 'js/CorpusAnalysis/CorpusAnalysisApp.js',
'js/CorpusAnalysis/CorpusAnalysisConcordance.js', 'js/CorpusAnalysis/CorpusAnalysisConcordance.js',
@ -24,6 +25,8 @@
'js/RessourceLists/JobInputList.js', 'js/RessourceLists/JobInputList.js',
'js/RessourceLists/JobResultList.js', 'js/RessourceLists/JobResultList.js',
'js/RessourceLists/QueryResultList.js', 'js/RessourceLists/QueryResultList.js',
'js/RessourceLists/SpacyNLPModelList.js',
'js/RessourceLists/TesseractOCRModelList.js',
'js/RessourceLists/UserList.js' 'js/RessourceLists/UserList.js'
%} %}
<script src="{{ ASSET_URL }}"></script> <script src="{{ ASSET_URL }}"></script>

View File

@ -0,0 +1,31 @@
{% set breadcrumbs %}
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
{% if request.path == url_for('.contributions') %}
<li class="tab"><a class="active" href="{{ url_for('.contributions') }}" target="_self">Contributions Overview</a></li>
{% elif request.path == url_for('.add_tesseract_ocr_pipeline_model') %}
<li class="tab"><a href="{{ url_for('.contributions') }}" target="_self">Contributions Overview</a></li>
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
<li class="tab"><a class="active" href="{{ url_for('.add_tesseract_ocr_pipeline_model') }}" target="_self">{{ title }}</a></li>
{% elif request.path == url_for('.add_spacy_nlp_pipeline_model') %}
<li class="tab"><a href="{{ url_for('.contributions') }}" target="_self">Contributions Overview</a></li>
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
<li class="tab"><a class="active" href="{{ url_for('.add_spacy_nlp_pipeline_model') }}" target="_self">{{ title }}</a></li>
{% elif tesseract_ocr_pipeline_model and request.path == url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) %}
<li class="tab"><a href="{{ url_for('.contributions') }}" target="_self">Contributions Overview</a></li>
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
<li class="tab">
<a class="active" href="{{ url_for('.tesseract_ocr_pipeline_model', tesseract_ocr_pipeline_model_id=tesseract_ocr_pipeline_model.id) }}" target="_self">
Edit {{ tesseract_ocr_pipeline_model.title }}
</a>
</li>
{% elif spacy_nlp_pipeline_model and request.path == url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id) %}
<li class="tab"><a href="{{ url_for('.contributions') }}" target="_self">Contributions Overview</a></li>
<li class="tab disabled"><i class="material-icons">navigate_next</i></li>
<li class="tab">
<a class="active" href="{{ url_for('.spacy_nlp_pipeline_model', spacy_nlp_pipeline_model_id=spacy_nlp_pipeline_model.id) }}" target="_self">
Edit {{ spacy_nlp_pipeline_model.title }}
</a>
</li>
{% endif %}
{% endset %}

View File

@ -1,32 +0,0 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12 m8 offset-m2">
<h1 id="title">{{ title }}</h1>
<p>
In order to add a new model, please fill in the form below.
</p>
<form method="POST">
<div class="card-panel">
{{ form.hidden_tag() }}
{{ wtf.render_field(form.title) }}
{{ wtf.render_field(form.description) }}
{{ wtf.render_field(form.publisher) }}
{{ wtf.render_field(form.publisher_url) }}
{{ wtf.render_field(form.publishing_url) }}
{{ wtf.render_field(form.publishing_year) }}
{{ wtf.render_field(form.shared) }}
{{ wtf.render_field(form.version) }}
{{ wtf.render_field(form.compatible_service_versions) }}
{{ wtf.render_field(form.submit, class_='width-100', material_icon='send') }}
</div>
</form>
</div>
</div>
{% endblock page_content %}

View File

@ -0,0 +1,124 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m3 push-m9">
<div class="center-align">
<p class="hide-on-small-only">&nbsp;</p>
<p class="hide-on-small-only">&nbsp;</p>
<a class="btn-floating btn-large btn-scale-x2 waves-effect waves-light">
<i class="nopaque-icons service-color darken service-icon" data-service="spacy-nlp-pipeline"></i>
</a>
</div>
</div>
<div class="col s12 m9 pull-m3">
<div class="card service-color-border border-darken" data-service="spacy-nlp-pipeline" style="border-top: 10px solid;">
<div class="card-content">
<div class="row">
<div class="col s12">
<div class="card-panel z-depth-0">
<span class="card-title"><i class="left material-icons">layers</i>spaCy NLP Models</span>
<p>You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.</p>
<p><a href="">Edit already uploaded models</a></p>
<p><a class="modal-trigger" href="#models-modal">Information about the already existing models.</a></p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="col s12">
<h2>Add a model</h2>
<div class="card">
<form class="create-contribution-form" enctype="multipart/form-data" method="POST">
<div class="card-content">
{{ form.hidden_tag() }}
<div class="row">
<div class="col s12 l5">
{{ wtf.render_field(form.spacy_model_file, accept='.tar.gz', placeholder='Choose a .tar.gz file') }}
</div>
<div class="col s12 l7">
{{ wtf.render_field(form.title, material_icon='title') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.description, material_icon='description') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publisher, material_icon='account_balance') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publisher_url, material_icon='link') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publishing_url, material_icon='link') }}
</div>
<div class="col s12 l10">
{{ wtf.render_field(form.version, material_icon='apps') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.compatible_service_versions) }}
</div>
<div class="col s12 l6 right-align" style="padding-right:20px;">
<p></p>
<br>
{{ wtf.render_field(form.shared) }}
</div>
</div>
</div>
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
</div>
</form>
</div>
</div>
</div>
</div>
{% endblock page_content %}
{% block modals %}
{{ super() }}
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>spaCy NLP Pipeline models</h4>
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
</tr>
</thead>
<tbody>
{% for m in spacy_nlp_pipeline_models %}
<tr id="spacy-nlp-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
{% endblock modals %}

View File

@ -0,0 +1,124 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m3 push-m9">
<div class="center-align">
<p class="hide-on-small-only">&nbsp;</p>
<p class="hide-on-small-only">&nbsp;</p>
<a class="btn-floating btn-large btn-scale-x2 waves-effect waves-light">
<i class="nopaque-icons service-color darken service-icon" data-service="tesseract-ocr-pipeline"></i>
</a>
</div>
</div>
<div class="col s12 m9 pull-m3">
<div class="card service-color-border border-darken" data-service="tesseract-ocr-pipeline" style="border-top: 10px solid;">
<div class="card-content">
<div class="row">
<div class="col s12">
<div class="card-panel z-depth-0">
<span class="card-title"><i class="left material-icons">layers</i>Tesseract OCR Models</span>
<p>You can add more Tesseract OCR models using the form below. They will automatically appear in the list of usable models.</p>
<p><a class="modal-trigger" href="#models-modal">Information about the already existing models.</a></p>
<p><a href="">Edit already uploaded models</a></p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="col s12">
<h2>Add a model</h2>
<div class="card">
<form class="create-contribution-form" enctype="multipart/form-data" method="POST">
<div class="card-content">
{{ form.hidden_tag() }}
<div class="row">
<div class="col s12 l5">
{{ wtf.render_field(form.tesseract_model_file, accept='.traineddata', placeholder='Choose a .traineddata file') }}
</div>
<div class="col s12 l7">
{{ wtf.render_field(form.title, material_icon='title') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.description, material_icon='description') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publisher, material_icon='account_balance') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publisher_url, material_icon='link') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publishing_url, material_icon='link') }}
</div>
<div class="col s12 l10">
{{ wtf.render_field(form.version, material_icon='apps') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.compatible_service_versions) }}
</div>
<div class="col s12 l6 right-align" style="padding-right:20px;">
<p></p>
<br>
{{ wtf.render_field(form.shared) }}
</div>
</div>
</div>
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
</div>
</form>
</div>
</div>
</div>
</div>
{% endblock page_content %}
{% block modals %}
{{ super() }}
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>Tesseract OCR Pipeline models</h4>
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
</tr>
</thead>
<tbody>
{% for m in tesseract_ocr_pipeline_models %}
<tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
{% endblock modals %}

View File

@ -0,0 +1,129 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
{# Tesseract OCR Models #}
<div>
<h3>My Tesseract OCR Pipeline Models</h3>
<p>Here you can see and edit the models that you have created. You can also create new models.</p>
<div class="row">
<div class="col s12">
<div class="card">
<div class="card-content">
<div id="tesseract-ocr-model-list" data-user-id="{{ userId }}">
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
<th></th>
</tr>
</thead>
<tbody>
{% if tesseract_ocr_user_models|length > 0 %}
{% for m in tesseract_ocr_user_models %}
<tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
<td class="right-align">
<a class="delete-button btn-floating red waves-effect waves-light" data-model-id="{{ m.hashid }}"><i class="material-icons">delete</i></a>
<a class="edit-button btn-floating service-color darken waves-effect waves-light" data-model-id="{{ m.hashid }}"><i class="material-icons">edit</i></a>
</td>
</tr>
{% endfor %}
{% else %}
<tr>
<td colspan="4">No models available.</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
<div class="card-action right-align">
<a href="{{ url_for('contributions.add_tesseract_ocr_pipeline_model') }}" class="btn waves-effect waves-light"><i class="material-icons left">add</i>Add model file</a>
</div>
</div>
</div>
</div>
</div>
{# spaCy NLP Models #}
<div>
<h3>My spaCy NLP Pipeline Models</h3>
<p>Here you can see and edit the models that you have created. You can also create new models.</p>
<div class="row">
<div class="col s12">
<div class="card">
<div class="card-content">
<div id="spacy-nlp-model-list" data-user-id="{{ userId }}" data-user-models="{{ spacy_nlp_user_models }}">
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
<th></th>
</tr>
</thead>
<tbody>
{% if spacy_nlp_user_models|length > 0 %}
{% for m in spacy_nlp_user_models %}
<tr id="spacy_nlp-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
<td class="right-align">
<a class="delete-spacy-model-button btn-floating red waves-effect waves-light" data-model-id="{{ m.hashid }}"><i class="material-icons">delete</i></a>
<a class="edit-spacy-model-button btn-floating service-color darken waves-effect waves-light" data-model-id="{{ m.hashid }}"><i class="material-icons">edit</i></a>
</td>
</tr>
{% endfor %}
{% else %}
<tr>
<td colspan="4">No models available.</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
</div>
<div class="card-action right-align">
<a href="{{ url_for('contributions.add_spacy_nlp_pipeline_model') }}" class="btn waves-effect waves-light"><i class="material-icons left">add</i>Add model file</a>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
{% endblock page_content %}
{% block scripts %}
{{ super() }}
<script>
const tesseractOCRModelList = new TesseractOCRModelList();
tesseractOCRModelList.init();
const spacyNLPModelList = new SpacyNLPModelList();
spacyNLPModelList.init();
</script>
{% endblock scripts %}

View File

@ -0,0 +1,56 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="spacy-nlp-pipeline"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12">
<div class="card">
<form class="create-contribution-form" enctype="multipart/form-data" method="POST">
<div class="card-content">
{{ form.hidden_tag() }}
<div class="row">
<div class="col s12 l7">
{{ wtf.render_field(form.title, material_icon='title') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.description, material_icon='description') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publisher, material_icon='account_balance') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publisher_url, material_icon='link') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publishing_url, material_icon='link') }}
</div>
<div class="col s12 l10">
{{ wtf.render_field(form.version, material_icon='apps') }}
</div>
<div class="col s12 l6 right-align" style="padding-right:20px;">
<p></p>
<br>
{{ wtf.render_field(form.shared) }}
</div>
</div>
</div>
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
</div>
</form>
</div>
</div>
</div>
</div>
{% endblock page_content %}

View File

@ -0,0 +1,56 @@
{% extends "base.html.j2" %}
{% import "materialize/wtf.html.j2" as wtf %}
{% from "contributions/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12">
<div class="card">
<form class="create-contribution-form" enctype="multipart/form-data" method="POST">
<div class="card-content">
{{ form.hidden_tag() }}
<div class="row">
<div class="col s12 l7">
{{ wtf.render_field(form.title, material_icon='title') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.description, material_icon='description') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publisher, material_icon='account_balance') }}
</div>
<div class="col s12 l6">
{{ wtf.render_field(form.publishing_year, material_icon='calendar_month') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publisher_url, material_icon='link') }}
</div>
<div class="col s12">
{{ wtf.render_field(form.publishing_url, material_icon='link') }}
</div>
<div class="col s12 l10">
{{ wtf.render_field(form.version, material_icon='apps') }}
</div>
<div class="col s12 l6 right-align" style="padding-right:20px;">
<p></p>
<br>
{{ wtf.render_field(form.shared) }}
</div>
</div>
</div>
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
</div>
</form>
</div>
</div>
</div>
</div>
{% endblock page_content %}

View File

@ -70,8 +70,16 @@
<div class="col s12 l5"> <div class="col s12 l5">
{{ wtf.render_field(form.txt, accept='text/plain', placeholder='Choose a plain text file') }} {{ wtf.render_field(form.txt, accept='text/plain', placeholder='Choose a plain text file') }}
</div> </div>
<div class="col s12 l4"> <div class="col s12 l4">
{{ wtf.render_field(form.model, material_icon='language') }} <div class="input-field">
<i class="material-icons prefix">language</i>
{{ form.model() }}
{{ form.model.label }}
<span class="helper-text">
<a class="modal-trigger tooltipped" href="#models-modal" data-position="bottom" data-tooltip="See more information about models"><i class="material-icons" style="color:#0064A3;">help_outline</i></a>
<a class="tooltipped" href="{{ url_for('contributions.add_spacy_nlp_pipeline_model') }}" data-position="bottom" data-tooltip="Add your own spaCy NLP models"><i class="material-icons" style="color:#0064A3">new_label</i></a>
</span>
</div>
</div> </div>
<div class="col s12 l3"> <div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }} {{ wtf.render_field(form.version, material_icon='apps') }}
@ -122,4 +130,35 @@
<a href="#!" class="modal-close waves-effect waves-light btn red abort-request">Cancel</a> <a href="#!" class="modal-close waves-effect waves-light btn red abort-request">Cancel</a>
</div> </div>
</div> </div>
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>spaCy NLP Pipeline models</h4>
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
</tr>
</thead>
<tbody>
{% for m in spacy_nlp_pipeline_models %}
<tr id="spacy-nlp-pipeline-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
{% endblock modals %} {% endblock modals %}

View File

@ -58,7 +58,8 @@
{{ form.model() }} {{ form.model() }}
{{ form.model.label }} {{ form.model.label }}
<span class="helper-text"> <span class="helper-text">
<a class="modal-trigger" href="#models-modal">More details about models</a> <a class="modal-trigger tooltipped" href="#models-modal" data-position="bottom" data-tooltip="See more information about models"><i class="material-icons" style="color:#00A58B;">help_outline</i></a>
<a class="tooltipped" href="{{ url_for('contributions.add_tesseract_ocr_pipeline_model') }}" data-position="bottom" data-tooltip="Add your own Tesseract OCR models"><i class="material-icons" style="color:#00A58B">new_label</i></a>
</span> </span>
{% for error in form.model.errors %} {% for error in form.model.errors %}
<span class="helper-text error-color-text">{{ error }}</span> <span class="helper-text error-color-text">{{ error }}</span>

View File

@ -0,0 +1,28 @@
"""empty message
Revision ID: 721829b5dd25
Revises: 31dd42e5ea6f
Create Date: 2022-11-04 13:58:13.008301
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '721829b5dd25'
down_revision = '31dd42e5ea6f'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('spacy_nlp_pipeline_models', sa.Column('pipeline_name', sa.String(length=64), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('spacy_nlp_pipeline_models', 'pipeline_name')
# ### end Alembic commands ###