nopaque/app/templates/services/tesseract_ocr_pipeline.html.j2

188 lines
7.3 KiB
Plaintext
Raw Normal View History

{% extends "base.html.j2" %}
{% from "services/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% import "materialize/wtf.html.j2" as wtf %}
2020-02-07 14:21:59 +00:00
{% block main_attribs %} class="service-scheme" data-service="tesseract-ocr-pipeline"{% endblock main_attribs %}
2020-10-26 11:47:06 +00:00
2020-08-25 13:37:45 +00:00
{% block page_content %}
2020-10-26 11:47:06 +00:00
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
2020-08-25 09:49:43 +00:00
2020-10-26 11:47:06 +00:00
<div class="col s12 m3 push-m9">
<div class="center-align">
<p class="hide-on-small-only">&nbsp;</p>
<p class="hide-on-small-only">&nbsp;</p>
2021-05-05 08:05:12 +00:00
<a class="btn-floating btn-large btn-scale-x2 waves-effect waves-light">
<i class="nopaque-icons service-color darken service-icon" data-service="tesseract-ocr-pipeline"></i>
2020-10-26 11:47:06 +00:00
</a>
</div>
</div>
2020-03-07 19:20:58 +00:00
2020-10-26 11:47:06 +00:00
<div class="col s12 m9 pull-m3">
<div class="card service-color-border border-darken" data-service="tesseract-ocr-pipeline" style="border-top: 10px solid;">
2020-10-26 11:47:06 +00:00
<div class="card-content">
<div class="row">
<div class="col s12">
<div class="card-panel z-depth-0">
<span class="card-title"><i class="left material-icons">layers</i>OCR</span>
<p>In this process, nopaque converts your image data like photos or scans into text data. This step enables you to proceed with the computational analysis of your documents.</p>
</div>
</div>
2020-02-18 14:31:10 +00:00
</div>
</div>
</div>
</div>
2020-03-02 13:19:54 +00:00
2020-10-26 11:47:06 +00:00
<div class="col s12">
2020-10-28 10:21:05 +00:00
<h2>Submit a job</h2>
2020-10-26 11:47:06 +00:00
<div class="card">
2022-09-02 11:07:30 +00:00
<form class="create-job-form" enctype="multipart/form-data" method="POST">
2020-10-26 11:47:06 +00:00
<div class="card-content">
{{ form.hidden_tag() }}
2020-10-26 11:47:06 +00:00
<div class="row">
<div class="col s12 l4">
2022-04-12 14:11:24 +00:00
{{ wtf.render_field(form.title, material_icon='title') }}
2020-10-26 11:47:06 +00:00
</div>
<div class="col s12 l8">
2022-04-12 14:11:24 +00:00
{{ wtf.render_field(form.description, material_icon='description') }}
2020-10-26 11:47:06 +00:00
</div>
<div class="col s12 l5">
{{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }}
2020-10-26 11:47:06 +00:00
</div>
<div class="col s12 l4">
2022-04-22 13:27:52 +00:00
<div class="input-field">
<i class="material-icons prefix">language</i>
{{ form.model() }}
{{ form.model.label }}
<span class="helper-text">
<a class="modal-trigger tooltipped" href="#models-modal" data-position="bottom" data-tooltip="See more information about models"><i class="material-icons" style="color:#00A58B;">help_outline</i></a>
<a class="tooltipped" href="{{ url_for('contributions.add_tesseract_ocr_pipeline_model') }}" data-position="bottom" data-tooltip="Add your own Tesseract OCR models"><i class="material-icons" style="color:#00A58B">new_label</i></a>
2022-04-22 13:27:52 +00:00
</span>
{% for error in form.model.errors %}
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
2020-10-26 11:47:06 +00:00
</div>
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}
2020-10-26 11:47:06 +00:00
</div>
<div class="col s12">
<span class="card-title">Preprocessing</span>
</div>
<div class="col s9">
<p>{{ form.binarization.label.text }}</p>
2020-10-26 11:47:06 +00:00
<p class="light">Based on a brightness threshold pixels are converted into either black or white. It is useful to reduce noise in images. (<b>longer duration</b>)</p>
</div>
<div class="col s3 right-align">
<div class="switch">
<label>
{{ form.binarization() }}
2020-10-26 11:47:06 +00:00
<span class="lever"></span>
</label>
</div>
</div>
2022-11-10 11:14:03 +00:00
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s9">
2022-11-10 15:19:58 +00:00
<p>Intensity (between 0 and 1)</p>
2022-11-10 11:14:03 +00:00
<p class="range-field">{{ form.ocropus_nlbin_threshold() }}</p>
</div>
2020-10-26 11:47:06 +00:00
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s12 divider"></div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s9">
<p>Page range</p>
<p class="light"></p>
</div>
<div class="col s3 right-align">
<div class="switch">
<label>
<input disabled type="checkbox">
<span class="lever"></span>
</label>
</div>
</div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s12 divider"></div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s9">
<p>Page rotation</p>
<p class="light"></p>
</div>
<div class="col s3 right-align">
<div class="switch">
<label>
<input disabled type="checkbox">
<span class="lever"></span>
</label>
</div>
</div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s12 divider"></div>
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s9">
<p>Page split</p>
<p class="light"></p>
</div>
<div class="col s3 right-align">
<div class="switch">
<label>
<input disabled type="checkbox">
<span class="lever"></span>
</label>
</div>
</div>
<!--
Seperate each setting with the following
<div class="col s12"><p>&nbsp;</p></div>
<div class="col s12 divider"></div>
<div class="col s12"><p>&nbsp;</p></div>
-->
2020-02-27 15:26:04 +00:00
</div>
2020-03-02 09:42:25 +00:00
</div>
2020-10-26 11:47:06 +00:00
<div class="card-action right-align">
{{ wtf.render_field(form.submit, material_icon='send') }}
2019-09-12 14:00:48 +00:00
</div>
2020-10-26 11:47:06 +00:00
</form>
2020-03-02 09:42:25 +00:00
</div>
2020-10-26 11:47:06 +00:00
</div>
2019-07-19 11:28:17 +00:00
</div>
</div>
2021-12-01 13:15:20 +00:00
{% endblock page_content %}
2021-12-01 13:15:20 +00:00
{% block modals %}
{{ super() }}
2022-04-22 13:27:52 +00:00
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>Tesseract OCR Pipeline models</h4>
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
</tr>
</thead>
<tbody>
2022-10-12 08:23:05 +00:00
{% for m in tesseract_ocr_pipeline_models %}
<tr id="tesseract-ocr-pipeline-model-{{ m.hashid }}">
2022-04-22 13:27:52 +00:00
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
2021-12-01 13:15:20 +00:00
{% endblock modals %}