mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-12 00:50:40 +00:00
Rename all services, use scss, cleanup, add sandpaper conversion script
This commit is contained in:
@ -1,5 +1,7 @@
|
||||
from app.models import TesseractOCRModel
|
||||
from app.models import Job, TesseractOCRModel
|
||||
from flask_login import current_user
|
||||
from flask_wtf import FlaskForm
|
||||
from flask_wtf.file import FileField, FileAllowed, FileRequired
|
||||
from wtforms import (
|
||||
BooleanField,
|
||||
MultipleFileField,
|
||||
@ -8,110 +10,143 @@ from wtforms import (
|
||||
SubmitField,
|
||||
ValidationError
|
||||
)
|
||||
from wtforms.validators import DataRequired, Length
|
||||
from wtforms.validators import DataRequired, InputRequired, Length
|
||||
from . import SERVICES
|
||||
|
||||
|
||||
class AddJobForm(FlaskForm):
|
||||
description = StringField('Description', validators=[DataRequired(), Length(1, 255)]) # noqa
|
||||
description = StringField('Description', validators=[InputRequired()]) # noqa
|
||||
submit = SubmitField()
|
||||
title = StringField('Title', validators=[DataRequired(), Length(1, 32)])
|
||||
title = StringField('Title', validators=[InputRequired()])
|
||||
version = SelectField('Version', validators=[DataRequired()])
|
||||
|
||||
def validate_description(self, field):
|
||||
max_length = Job.description.property.columns[0].type.length
|
||||
if len(field.data) > max_length:
|
||||
raise ValidationError(
|
||||
f'Description must be less than {max_length} characters'
|
||||
)
|
||||
|
||||
class AddSpacyNLPJobForm(AddJobForm):
|
||||
encoding_detection = BooleanField('Encoding detection')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField(
|
||||
'Model',
|
||||
choices=[('', 'Choose your option')],
|
||||
default='',
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
def validate_title(self, field):
|
||||
max_length = Job.title.property.columns[0].type.length
|
||||
if len(field.data) > max_length:
|
||||
raise ValidationError(
|
||||
f'Title must be less than {max_length} characters'
|
||||
)
|
||||
|
||||
def validate_encoding_detection(self, field):
|
||||
service_info = SERVICES['spacy-nlp']['versions'][self.version.data]
|
||||
if field.data and 'encoding_detection' not in service_info['methods']:
|
||||
raise ValidationError('Encoding detection is not available')
|
||||
|
||||
def validate_files(form, field):
|
||||
valid_extensions = ['.txt']
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(tuple(valid_extensions)):
|
||||
raise ValidationError(
|
||||
'File does not have an approved extension: '
|
||||
'/'.join(valid_extensions)
|
||||
)
|
||||
class AddFileSetupPipelineJobForm(AddJobForm):
|
||||
images = MultipleFileField('File(s)', validators=[DataRequired()])
|
||||
|
||||
def validate_images(form, field):
|
||||
valid_mimetypes = ['image/jpeg', 'image/png', 'image/tiff']
|
||||
for image in field.data:
|
||||
if image.mimetype not in valid_mimetypes:
|
||||
raise ValidationError('JPEG, PNG and TIFF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['spacy-nlp']['latest_version']) # noqa
|
||||
service_manifest = SERVICES['file-setup-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = SERVICES['spacy-nlp']['versions'][version]
|
||||
if 'encoding_detection' not in service_info['methods']:
|
||||
self.encoding_detection.render_kw = {'disabled': True}
|
||||
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['spacy-nlp']['versions']] # noqa
|
||||
self.version.default = version
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class AddTesseractOCRJobForm(AddJobForm):
|
||||
class AddTesseractOCRPipelineJobForm(AddJobForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
model = SelectField(
|
||||
'Model',
|
||||
choices=[('', 'Choose your option')],
|
||||
default='',
|
||||
validators=[DataRequired()]
|
||||
)
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['tesseract-ocr']['versions'][self.version.data]
|
||||
service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data]
|
||||
if field.data and 'binarization' not in service_info['methods']:
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_files(self, field):
|
||||
valid_extensions = ['.pdf']
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(tuple(valid_extensions)):
|
||||
raise ValidationError(
|
||||
'File does not have an approved extension: '
|
||||
'/'.join(valid_extensions)
|
||||
)
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['tesseract-ocr']['latest_version']) # noqa
|
||||
service_manifest = SERVICES['tesseract-ocr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = SERVICES['tesseract-ocr']['versions'][version]
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'binarization' not in service_info['methods']:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
self.model.choices += [(x.hashid, x.title) for x in TesseractOCRModel.query.all()] # noqa
|
||||
self.version.choices = [(x, x) for x in SERVICES['tesseract-ocr']['versions']] # noqa
|
||||
compatible_models = [
|
||||
x for x in TesseractOCRModel.query.filter_by(shared=True).all()
|
||||
if version in x.compatible_service_versions
|
||||
]
|
||||
compatible_models += [
|
||||
x for x in TesseractOCRModel.query.filter_by(shared=False, user=current_user).all()
|
||||
if version in x.compatible_service_versions
|
||||
]
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x.hashid, x.title) for x in compatible_models]
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = SERVICES['tesseract-ocr']['latest_version']
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
class AddFileSetupJobForm(AddJobForm):
|
||||
files = MultipleFileField('Files', validators=[DataRequired()])
|
||||
class AddTranskribusHTRPipelineJobForm(AddJobForm):
|
||||
binarization = BooleanField('Binarization')
|
||||
pdf = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
|
||||
def validate_files(form, field):
|
||||
valid_extensions = ['.jpeg', '.jpg', '.png', '.tiff', '.tif']
|
||||
for file in field.data:
|
||||
if not file.filename.lower().endswith(tuple(valid_extensions)):
|
||||
raise ValidationError(
|
||||
'File does not have an approved extension: '
|
||||
'/'.join(valid_extensions)
|
||||
)
|
||||
def validate_binarization(self, field):
|
||||
service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data]
|
||||
if field.data and 'binarization' not in service_info['methods']:
|
||||
raise ValidationError('Binarization is not available')
|
||||
|
||||
def validate_pdf(self, field):
|
||||
if field.data.mimetype != 'application/pdf':
|
||||
raise ValidationError('PDF files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
version = kwargs.pop('version', SERVICES['file-setup']['latest_version']) # noqa
|
||||
service_manifest = SERVICES['transkribus-htr-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
self.version.choices = [(x, x) for x in SERVICES['file-setup']['versions']] # noqa
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'binarization' not in service_info['methods']:
|
||||
self.binarization.render_kw = {'disabled': True}
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [
|
||||
('37569', 'Tim Model'),
|
||||
('29539', 'UCL–University of Toronto #7')
|
||||
]
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = SERVICES['file-setup']['latest_version']
|
||||
self.version.default = service_manifest['latest_version']
|
||||
|
||||
|
||||
AddJobForms = {
|
||||
'file-setup': AddFileSetupJobForm,
|
||||
'tesseract-ocr': AddTesseractOCRJobForm,
|
||||
'spacy-nlp': AddSpacyNLPJobForm
|
||||
}
|
||||
class AddSpacyNLPPipelineJobForm(AddJobForm):
|
||||
encoding_detection = BooleanField('Encoding detection')
|
||||
txt = FileField('File', validators=[FileRequired()])
|
||||
model = SelectField('Model', validators=[DataRequired()])
|
||||
|
||||
def validate_encoding_detection(self, field):
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
service_info = service_manifest['versions'][self.version.data]
|
||||
if field.data and 'encoding_detection' not in service_info['methods']:
|
||||
raise ValidationError('Encoding detection is not available!')
|
||||
|
||||
def validate_txt(form, field):
|
||||
if field.data.mimetype != 'text/plain':
|
||||
raise ValidationError('Plain text files only!')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
service_manifest = SERVICES['spacy-nlp-pipeline']
|
||||
version = kwargs.pop('version', service_manifest['latest_version'])
|
||||
super().__init__(*args, **kwargs)
|
||||
service_info = service_manifest['versions'][version]
|
||||
if 'encoding_detection' not in service_info['methods']:
|
||||
self.encoding_detection.render_kw = {'disabled': True}
|
||||
self.model.choices = [('', 'Choose your option')]
|
||||
self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa
|
||||
self.model.default = ''
|
||||
self.version.choices = [(x, x) for x in service_manifest['versions']]
|
||||
self.version.data = version
|
||||
self.version.default = version
|
||||
|
@ -13,47 +13,33 @@ from flask_login import current_user, login_required
|
||||
from werkzeug.utils import secure_filename
|
||||
from . import bp
|
||||
from . import SERVICES
|
||||
from .forms import AddJobForms
|
||||
from .forms import (
|
||||
AddFileSetupPipelineJobForm,
|
||||
AddTesseractOCRPipelineJobForm,
|
||||
AddTranskribusHTRPipelineJobForm,
|
||||
AddSpacyNLPPipelineJobForm
|
||||
)
|
||||
import json
|
||||
|
||||
|
||||
@bp.route('/corpus-analysis')
|
||||
@bp.route('/file-setup-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def corpus_analysis():
|
||||
return render_template(
|
||||
'services/corpus_analysis.html.j2',
|
||||
title='Corpus analysis'
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/<service>', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def service(service):
|
||||
# Check if the requested service exist
|
||||
if service not in SERVICES or service not in AddJobForms:
|
||||
def file_setup_pipeline():
|
||||
service = 'file-setup-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
version = request.args.get('version', SERVICES[service]['latest_version'])
|
||||
if version not in SERVICES[service]['versions']:
|
||||
abort(404)
|
||||
form = AddJobForms[service](prefix='add-job-form', version=version)
|
||||
title = SERVICES[service]['name']
|
||||
form = AddFileSetupPipelineJobForm(prefix='add-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
if service == 'spacy-nlp':
|
||||
service_args['model'] = form.model.data
|
||||
if form.encoding_detection.data:
|
||||
service_args['encoding_detection'] = True
|
||||
if service == 'tesseract-ocr':
|
||||
service_args['model'] = hashids.decode(form.model.data)
|
||||
if form.binarization.data:
|
||||
service_args['binarization'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=json.dumps(service_args),
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
@ -67,18 +53,17 @@ def service(service):
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
for file in form.files.data:
|
||||
filename = secure_filename(file.filename)
|
||||
for image_file in form.images.data:
|
||||
job_input = JobInput(
|
||||
filename=filename,
|
||||
filename=secure_filename(image_file.filename),
|
||||
job=job,
|
||||
mimetype=file.mimetype
|
||||
mimetype=image_file.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
try:
|
||||
file.save(job_input.path)
|
||||
image_file.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
@ -91,5 +76,196 @@ def service(service):
|
||||
return render_template(
|
||||
f'services/{service.replace("-", "_")}.html.j2',
|
||||
form=form,
|
||||
title=title
|
||||
title=service_manifest['name']
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/tesseract-ocr-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def tesseract_ocr_pipeline():
|
||||
service = 'tesseract-ocr-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddTesseractOCRPipelineJobForm(prefix='add-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = hashids.decode(form.model.data)
|
||||
if form.binarization.data:
|
||||
service_args['binarization'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.pdf.data.filename),
|
||||
job=job,
|
||||
mimetype=form.pdf.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
try:
|
||||
form.pdf.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
return render_template(
|
||||
f'services/{service.replace("-", "_")}.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name']
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/transkribus-htr-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def transkribus_htr_pipeline():
|
||||
if not current_app.config.get('NOPAQUE_TRANSKRIBUS_ENABLED'):
|
||||
abort(404)
|
||||
service = 'transkribus-htr-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', service_manifest['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddTranskribusHTRPipelineJobForm(prefix='add-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = form.model.data
|
||||
if form.binarization.data:
|
||||
service_args['binarization'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.pdf.data.filename),
|
||||
job=job,
|
||||
mimetype=form.pdf.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
try:
|
||||
form.pdf.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
return render_template(
|
||||
f'services/{service.replace("-", "_")}.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name']
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/spacy-nlp-pipeline', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
def spacy_nlp_pipeline():
|
||||
service = 'spacy-nlp-pipeline'
|
||||
service_manifest = SERVICES[service]
|
||||
version = request.args.get('version', SERVICES[service]['latest_version'])
|
||||
if version not in service_manifest['versions']:
|
||||
abort(404)
|
||||
form = AddSpacyNLPPipelineJobForm(prefix='add-job-form', version=version)
|
||||
if form.is_submitted():
|
||||
if not form.validate():
|
||||
return make_response(form.errors, 400)
|
||||
service_args = {}
|
||||
service_args['model'] = form.model.data
|
||||
if form.encoding_detection.data:
|
||||
service_args['encoding_detection'] = True
|
||||
job = Job(
|
||||
user=current_user,
|
||||
description=form.description.data,
|
||||
service=service,
|
||||
service_args=service_args,
|
||||
service_version=form.version.data,
|
||||
title=form.title.data
|
||||
)
|
||||
db.session.add(job)
|
||||
db.session.flush(objects=[job])
|
||||
db.session.refresh(job)
|
||||
try:
|
||||
job.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job_input = JobInput(
|
||||
filename=secure_filename(form.txt.data.filename),
|
||||
job=job,
|
||||
mimetype=form.txt.data.mimetype
|
||||
)
|
||||
db.session.add(job_input)
|
||||
db.session.flush(objects=[job_input])
|
||||
db.session.refresh(job_input)
|
||||
try:
|
||||
form.txt.data.save(job_input.path)
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
flash('Internal Server Error', 'error')
|
||||
return make_response({'redirect_url': url_for('.service', service=service)}, 500) # noqa
|
||||
job.status = JobStatus.SUBMITTED
|
||||
db.session.commit()
|
||||
flash(f'Job "{job.title}" added', 'job')
|
||||
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
|
||||
return render_template(
|
||||
f'services/{service.replace("-", "_")}.html.j2',
|
||||
form=form,
|
||||
title=service_manifest['name']
|
||||
)
|
||||
|
||||
|
||||
@bp.route('/corpus-analysis')
|
||||
@login_required
|
||||
def corpus_analysis():
|
||||
return render_template(
|
||||
'services/corpus_analysis.html.j2',
|
||||
title='Corpus analysis'
|
||||
)
|
@ -1,38 +1,70 @@
|
||||
# TODO: This could also be done via GitLab/GitHub APIs
|
||||
#file-setup-pipeline:
|
||||
file-setup:
|
||||
file-setup-pipeline:
|
||||
name: 'File setup pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.0'
|
||||
versions:
|
||||
0.1.0:
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup/-/releases/v0.1.0'
|
||||
#spacy-nlp-pipeline:
|
||||
spacy-nlp:
|
||||
name: 'spaCy NLP'
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/file-setup-pipeline/-/releases/v0.1.0'
|
||||
tesseract-ocr-pipeline:
|
||||
name: 'Tesseract OCR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.4'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0'
|
||||
0.1.1:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
|
||||
0.1.2:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
|
||||
0.1.3:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.3'
|
||||
0.1.4:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4'
|
||||
transkribus-htr-pipeline:
|
||||
name: 'Transkribus HTR Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.0'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0'
|
||||
spacy-nlp-pipeline:
|
||||
name: 'spaCy NLP Pipeline'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
latest_version: '0.1.0'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'encoding_detection'
|
||||
models:
|
||||
ca: 'Catalan'
|
||||
de: 'German'
|
||||
el: 'Greek'
|
||||
en: 'English'
|
||||
es: 'Spanish'
|
||||
fr: 'French'
|
||||
it: 'Italian'
|
||||
pl: 'Polish'
|
||||
ru: 'Russian'
|
||||
zh: 'Chinese'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nlp/-/releases/v0.1.0'
|
||||
#tesseract-ocr-pipeline:
|
||||
tesseract-ocr:
|
||||
name: 'Tesseract OCR'
|
||||
latest_version: '0.1.0'
|
||||
versions:
|
||||
0.1.0:
|
||||
methods:
|
||||
- 'binarization'
|
||||
publisher: 'Bielefeld University - CRC 1288 - INF'
|
||||
publishing_year: 2022
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/ocr/-/releases/v0.1.0'
|
||||
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.0'
|
Reference in New Issue
Block a user