Add new tesseract-ocr-pipeline version. Remove redundant spacy-nlp-pipeline version.

This commit is contained in:
Patrick Jentsch 2023-11-28 10:34:30 +01:00
parent 9be5ce6014
commit bdcc80a66f
3 changed files with 111 additions and 18 deletions

View File

@ -8,7 +8,7 @@
pipeline_name: 'ca_core_news_md' pipeline_name: 'ca_core_news_md'
version: '3.2.0' version: '3.2.0'
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- title: 'German' - title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.2.0/de_core_news_md-3.2.0.tar.gz'
@ -19,7 +19,7 @@
pipeline_name: 'de_core_news_md' pipeline_name: 'de_core_news_md'
version: '3.2.0' version: '3.2.0'
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- title: 'Greek' - title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.2.0/el_core_news_md-3.2.0.tar.gz'
@ -120,7 +120,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.' description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -132,7 +131,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Greek' - title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.' description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -144,7 +142,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -156,7 +153,6 @@
version: '3.4.1' version: '3.4.1'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish' - title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -168,7 +164,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French' - title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -180,7 +175,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian' - title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner' description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -192,7 +186,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Polish' - title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.' description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -204,7 +197,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Russian' - title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.' description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -216,7 +208,6 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Chinese' - title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.' description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz' url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -228,4 +219,3 @@
version: '3.4.0' version: '3.4.0'
compatible_service_versions: compatible_service_versions:
- '0.1.1' - '0.1.1'
- '0.1.2'

View File

@ -9,6 +9,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Amharic' # - title: 'Amharic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -20,6 +21,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Arabic' - title: 'Arabic'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -31,6 +33,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Assamese' # - title: 'Assamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -42,6 +45,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani' # - title: 'Azerbaijani'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -53,6 +57,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani - Cyrillic' # - title: 'Azerbaijani - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -64,6 +69,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Belarusian' # - title: 'Belarusian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -75,6 +81,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bengali' # - title: 'Bengali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -86,6 +93,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tibetan' # - title: 'Tibetan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -97,6 +105,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bosnian' # - title: 'Bosnian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -108,6 +117,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Bulgarian' # - title: 'Bulgarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -119,6 +129,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Catalan; Valencian' # - title: 'Catalan; Valencian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -130,6 +141,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Cebuano' # - title: 'Cebuano'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -141,6 +153,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Czech' # - title: 'Czech'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -152,6 +165,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Chinese - Simplified' # - title: 'Chinese - Simplified'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -163,6 +177,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Chinese - Traditional' - title: 'Chinese - Traditional'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -174,6 +189,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Cherokee' # - title: 'Cherokee'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -185,6 +201,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Welsh' # - title: 'Welsh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -196,6 +213,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Danish' - title: 'Danish'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -207,6 +225,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German' - title: 'German'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -218,6 +237,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Dzongkha' # - title: 'Dzongkha'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -229,6 +249,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Modern (1453-)' - title: 'Greek, Modern (1453-)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -240,6 +261,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English' - title: 'English'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -251,6 +273,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'English, Middle (1100-1500)' - title: 'English, Middle (1100-1500)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -262,6 +285,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Esperanto' # - title: 'Esperanto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -273,6 +297,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Estonian' # - title: 'Estonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -284,6 +309,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Basque' # - title: 'Basque'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -295,6 +321,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Persian' # - title: 'Persian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -306,6 +333,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Finnish' # - title: 'Finnish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -317,6 +345,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'French' - title: 'French'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -328,6 +357,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'German Fraktur' - title: 'German Fraktur'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -339,6 +369,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'French, Middle (ca. 1400-1600)' - title: 'French, Middle (ca. 1400-1600)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -350,6 +381,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Irish' # - title: 'Irish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -361,6 +393,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Galician' # - title: 'Galician'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -372,6 +405,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Greek, Ancient (-1453)' - title: 'Greek, Ancient (-1453)'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -383,6 +417,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Gujarati' # - title: 'Gujarati'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -394,6 +429,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Haitian; Haitian Creole' # - title: 'Haitian; Haitian Creole'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -405,6 +441,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hebrew' # - title: 'Hebrew'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -416,6 +453,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hindi' # - title: 'Hindi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -427,6 +465,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Croatian' # - title: 'Croatian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -438,6 +477,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Hungarian' # - title: 'Hungarian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -449,6 +489,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Inuktitut' # - title: 'Inuktitut'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -460,6 +501,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Indonesian' # - title: 'Indonesian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -471,6 +513,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Icelandic' # - title: 'Icelandic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -482,6 +525,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Italian' - title: 'Italian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -493,6 +537,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Italian - Old' - title: 'Italian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -504,6 +549,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Javanese' # - title: 'Javanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -515,6 +561,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Japanese' # - title: 'Japanese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -526,6 +573,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kannada' # - title: 'Kannada'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -537,6 +585,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian' # - title: 'Georgian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -548,6 +597,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Georgian - Old' # - title: 'Georgian - Old'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -559,6 +609,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kazakh' # - title: 'Kazakh'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -570,6 +621,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Central Khmer' # - title: 'Central Khmer'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -581,6 +633,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kirghiz; Kyrgyz' # - title: 'Kirghiz; Kyrgyz'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -592,6 +645,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Korean' # - title: 'Korean'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -603,6 +657,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Kurdish' # - title: 'Kurdish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -614,6 +669,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lao' # - title: 'Lao'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -625,6 +681,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latin' # - title: 'Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -636,6 +693,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Latvian' # - title: 'Latvian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -647,6 +705,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Lithuanian' # - title: 'Lithuanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -658,6 +717,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malayalam' # - title: 'Malayalam'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -669,6 +729,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Marathi' # - title: 'Marathi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -680,6 +741,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Macedonian' # - title: 'Macedonian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -691,6 +753,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Maltese' # - title: 'Maltese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -702,6 +765,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Malay' # - title: 'Malay'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -713,6 +777,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Burmese' # - title: 'Burmese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -724,6 +789,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Nepali' # - title: 'Nepali'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -735,6 +801,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Dutch; Flemish' # - title: 'Dutch; Flemish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -746,6 +813,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Norwegian' # - title: 'Norwegian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -757,6 +825,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Oriya' # - title: 'Oriya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -768,6 +837,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Panjabi; Punjabi' # - title: 'Panjabi; Punjabi'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -779,6 +849,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Polish' # - title: 'Polish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -790,6 +861,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Portuguese' - title: 'Portuguese'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -801,6 +873,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Pushto; Pashto' # - title: 'Pushto; Pashto'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -812,6 +885,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Romanian; Moldavian; Moldovan' # - title: 'Romanian; Moldavian; Moldovan'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -823,6 +897,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Russian' - title: 'Russian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -834,6 +909,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Sanskrit' # - title: 'Sanskrit'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -845,6 +921,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Sinhala; Sinhalese' # - title: 'Sinhala; Sinhalese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -856,6 +933,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovak' # - title: 'Slovak'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -867,6 +945,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Slovenian' # - title: 'Slovenian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -878,6 +957,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
- title: 'Spanish; Castilian' - title: 'Spanish; Castilian'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -889,6 +969,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
- title: 'Spanish; Castilian - Old' - title: 'Spanish; Castilian - Old'
description: '' description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -900,6 +981,7 @@
compatible_service_versions: compatible_service_versions:
- '0.1.0' - '0.1.0'
- '0.1.1' - '0.1.1'
- '0.1.2'
# - title: 'Albanian' # - title: 'Albanian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -911,6 +993,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian' # - title: 'Serbian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -922,6 +1005,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Serbian - Latin' # - title: 'Serbian - Latin'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -933,6 +1017,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swahili' # - title: 'Swahili'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -944,6 +1029,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Swedish' # - title: 'Swedish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -955,6 +1041,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Syriac' # - title: 'Syriac'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -966,6 +1053,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tamil' # - title: 'Tamil'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -977,6 +1065,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Telugu' # - title: 'Telugu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -988,6 +1077,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tajik' # - title: 'Tajik'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -999,6 +1089,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tagalog' # - title: 'Tagalog'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1010,6 +1101,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Thai' # - title: 'Thai'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1021,6 +1113,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Tigrinya' # - title: 'Tigrinya'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1032,6 +1125,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Turkish' # - title: 'Turkish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1043,6 +1137,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uighur; Uyghur' # - title: 'Uighur; Uyghur'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1054,6 +1149,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Ukrainian' # - title: 'Ukrainian'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1065,6 +1161,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Urdu' # - title: 'Urdu'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1076,6 +1173,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek' # - title: 'Uzbek'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1087,6 +1185,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek - Cyrillic' # - title: 'Uzbek - Cyrillic'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1098,6 +1197,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Vietnamese' # - title: 'Vietnamese'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1109,6 +1209,7 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'
# - title: 'Yiddish' # - title: 'Yiddish'
# description: '' # description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1120,3 +1221,4 @@
# compatible_service_versions: # compatible_service_versions:
# - '0.1.0' # - '0.1.0'
# - '0.1.1' # - '0.1.1'
# - '0.1.2'

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline: tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline' name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.1' latest_version: '0.1.2'
versions: versions:
0.1.0: 0.1.0:
methods: methods:
@ -23,6 +23,12 @@ tesseract-ocr-pipeline:
- 'ocropus_nlbin_threshold' - 'ocropus_nlbin_threshold'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2023
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
transkribus-htr-pipeline: transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline' name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF' publisher: 'Bielefeld University - CRC 1288 - INF'
@ -53,8 +59,3 @@ spacy-nlp-pipeline:
- 'encoding_detection' - 'encoding_detection'
publishing_year: 2022 publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1' url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'