Add tesseract-ocr-pipeline and transkribus-htr-pipeline v0.1.1

This commit is contained in:
Patrick Jentsch 2022-10-04 13:05:20 +02:00
parent b662a3a4d1
commit 5fb4329d5f
2 changed files with 109 additions and 2 deletions

View File

@ -8,6 +8,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Amharic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -18,6 +19,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Arabic'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -28,6 +30,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Assamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -38,6 +41,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Azerbaijani'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -48,6 +52,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Azerbaijani - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -58,6 +63,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Belarusian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -68,6 +74,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Bengali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -78,6 +85,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Tibetan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -88,6 +96,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Bosnian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -98,6 +107,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Bulgarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -108,6 +118,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Catalan; Valencian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -118,6 +129,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Cebuano'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -128,6 +140,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Czech'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -138,6 +151,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Chinese - Simplified'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -148,6 +162,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Chinese - Traditional'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -158,6 +173,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Cherokee'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -168,6 +184,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Welsh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -178,6 +195,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Danish'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -188,6 +206,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'German'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -198,6 +217,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Dzongkha'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -208,6 +228,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Greek, Modern (1453-)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -218,6 +239,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'English'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -228,6 +250,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'English, Middle (1100-1500)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -238,6 +261,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Esperanto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -248,6 +272,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Estonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -258,6 +283,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Basque'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -268,6 +294,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Persian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -278,6 +305,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Finnish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -288,6 +316,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'French'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -298,6 +327,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'German Fraktur'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -308,6 +338,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'French, Middle (ca. 1400-1600)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -318,6 +349,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Irish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -328,6 +360,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Galician'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -338,6 +371,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Greek, Ancient (-1453)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -348,6 +382,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Gujarati'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -358,6 +393,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Haitian; Haitian Creole'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -368,6 +404,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Hebrew'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -378,6 +415,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Hindi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -388,6 +426,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Croatian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -398,6 +437,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Hungarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -408,6 +448,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Inuktitut'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -418,6 +459,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Indonesian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -428,6 +470,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Icelandic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -438,6 +481,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Italian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -448,6 +492,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'Italian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -458,6 +503,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Javanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -468,6 +514,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Japanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -478,6 +525,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Kannada'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -488,6 +536,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Georgian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -498,6 +547,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Georgian - Old'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -508,6 +558,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Kazakh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -518,6 +569,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Central Khmer'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -528,6 +580,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Kirghiz; Kyrgyz'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -538,6 +591,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Korean'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -548,6 +602,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Kurdish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -558,6 +613,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Lao'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -568,6 +624,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -578,6 +635,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Latvian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -588,6 +646,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Lithuanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -598,6 +657,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Malayalam'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -608,6 +668,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Marathi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -618,6 +679,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Macedonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -628,6 +690,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Maltese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -638,6 +701,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Malay'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -648,6 +712,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Burmese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -658,6 +723,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Nepali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -668,6 +734,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Dutch; Flemish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -678,6 +745,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Norwegian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -688,6 +756,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Oriya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -698,6 +767,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Panjabi; Punjabi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -708,6 +778,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Polish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -718,6 +789,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Portuguese'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -728,6 +800,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Pushto; Pashto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -738,6 +811,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Romanian; Moldavian; Moldovan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -748,6 +822,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Russian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -758,6 +833,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Sanskrit'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -768,6 +844,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Sinhala; Sinhalese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -778,6 +855,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Slovak'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -788,6 +866,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Slovenian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -798,6 +877,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
- title: 'Spanish; Castilian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -808,6 +888,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- title: 'Spanish; Castilian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -818,6 +899,7 @@
version: '4.1.0'
compatible_service_versions:
- '0.1.0'
- '0.1.1'
# - title: 'Albanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -828,6 +910,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Serbian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -838,6 +921,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Serbian - Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -848,6 +932,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Swahili'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -858,6 +943,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Swedish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -868,6 +954,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Syriac'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -878,6 +965,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Tamil'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -888,6 +976,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Telugu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -898,6 +987,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Tajik'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -908,6 +998,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Tagalog'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -918,6 +1009,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Thai'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -928,6 +1020,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Tigrinya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -938,6 +1031,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Turkish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -948,6 +1042,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Uighur; Uyghur'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -958,6 +1053,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Ukrainian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -968,6 +1064,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Urdu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -978,6 +1075,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Uzbek'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -988,6 +1086,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Uzbek - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -998,6 +1097,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Vietnamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1008,6 +1108,7 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - title: 'Yiddish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1018,3 +1119,4 @@
# version: '4.1.0'
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.0'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
@ -25,13 +25,18 @@ tesseract-ocr-pipeline:
transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.0'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0'
0.1.1:
methods:
- 'binarization'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1'
spacy-nlp-pipeline:
name: 'spaCy NLP Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'