From 5fb4329d5fc88f0c5dceca140984ef634cd34b7f Mon Sep 17 00:00:00 2001 From: Patrick Jentsch Date: Tue, 4 Oct 2022 13:05:20 +0200 Subject: [PATCH] Add tesseract-ocr-pipeline and transkribus-htr-pipeline v0.1.1 --- app/TesseractOCRModel.defaults.yml | 102 +++++++++++++++++++++++++++++ app/services/services.yml | 9 ++- 2 files changed, 109 insertions(+), 2 deletions(-) diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRModel.defaults.yml index bac6a0ad..834b0ea5 100644 --- a/app/TesseractOCRModel.defaults.yml +++ b/app/TesseractOCRModel.defaults.yml @@ -8,6 +8,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Amharic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' @@ -18,6 +19,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Arabic' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' @@ -28,6 +30,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Assamese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' @@ -38,6 +41,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Azerbaijani' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' @@ -48,6 +52,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Azerbaijani - Cyrillic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' @@ -58,6 +63,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Belarusian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' @@ -68,6 +74,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Bengali' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' @@ -78,6 +85,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Tibetan' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' @@ -88,6 +96,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Bosnian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' @@ -98,6 +107,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Bulgarian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' @@ -108,6 +118,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Catalan; Valencian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' @@ -118,6 +129,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Cebuano' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' @@ -128,6 +140,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Czech' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' @@ -138,6 +151,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Chinese - Simplified' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' @@ -148,6 +162,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Chinese - Traditional' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' @@ -158,6 +173,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Cherokee' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' @@ -168,6 +184,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Welsh' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' @@ -178,6 +195,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Danish' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' @@ -188,6 +206,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'German' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' @@ -198,6 +217,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Dzongkha' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' @@ -208,6 +228,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Greek, Modern (1453-)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' @@ -218,6 +239,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'English' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' @@ -228,6 +250,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'English, Middle (1100-1500)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' @@ -238,6 +261,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Esperanto' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' @@ -248,6 +272,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Estonian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' @@ -258,6 +283,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Basque' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' @@ -268,6 +294,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Persian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' @@ -278,6 +305,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Finnish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' @@ -288,6 +316,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'French' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' @@ -298,6 +327,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'German Fraktur' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' @@ -308,6 +338,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'French, Middle (ca. 1400-1600)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' @@ -318,6 +349,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Irish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' @@ -328,6 +360,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Galician' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' @@ -338,6 +371,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Greek, Ancient (-1453)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' @@ -348,6 +382,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Gujarati' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' @@ -358,6 +393,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Haitian; Haitian Creole' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' @@ -368,6 +404,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Hebrew' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' @@ -378,6 +415,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Hindi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' @@ -388,6 +426,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Croatian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' @@ -398,6 +437,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Hungarian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' @@ -408,6 +448,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Inuktitut' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' @@ -418,6 +459,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Indonesian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' @@ -428,6 +470,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Icelandic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' @@ -438,6 +481,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Italian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' @@ -448,6 +492,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'Italian - Old' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' @@ -458,6 +503,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Javanese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' @@ -468,6 +514,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Japanese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' @@ -478,6 +525,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Kannada' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' @@ -488,6 +536,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Georgian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' @@ -498,6 +547,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Georgian - Old' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' @@ -508,6 +558,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Kazakh' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' @@ -518,6 +569,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Central Khmer' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' @@ -528,6 +580,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Kirghiz; Kyrgyz' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' @@ -538,6 +591,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Korean' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' @@ -548,6 +602,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Kurdish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' @@ -558,6 +613,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Lao' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' @@ -568,6 +624,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Latin' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' @@ -578,6 +635,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Latvian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' @@ -588,6 +646,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Lithuanian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' @@ -598,6 +657,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Malayalam' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' @@ -608,6 +668,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Marathi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' @@ -618,6 +679,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Macedonian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' @@ -628,6 +690,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Maltese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' @@ -638,6 +701,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Malay' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' @@ -648,6 +712,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Burmese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' @@ -658,6 +723,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Nepali' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' @@ -668,6 +734,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Dutch; Flemish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' @@ -678,6 +745,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Norwegian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' @@ -688,6 +756,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Oriya' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' @@ -698,6 +767,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Panjabi; Punjabi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' @@ -708,6 +778,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Polish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' @@ -718,6 +789,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Portuguese' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' @@ -728,6 +800,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Pushto; Pashto' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' @@ -738,6 +811,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Romanian; Moldavian; Moldovan' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' @@ -748,6 +822,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Russian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' @@ -758,6 +833,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Sanskrit' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' @@ -768,6 +844,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Sinhala; Sinhalese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' @@ -778,6 +855,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Slovak' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' @@ -788,6 +866,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Slovenian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' @@ -798,6 +877,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' - title: 'Spanish; Castilian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' @@ -808,6 +888,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' - title: 'Spanish; Castilian - Old' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' @@ -818,6 +899,7 @@ version: '4.1.0' compatible_service_versions: - '0.1.0' + - '0.1.1' # - title: 'Albanian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' @@ -828,6 +910,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Serbian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' @@ -838,6 +921,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Serbian - Latin' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' @@ -848,6 +932,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Swahili' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' @@ -858,6 +943,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Swedish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' @@ -868,6 +954,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Syriac' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' @@ -878,6 +965,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Tamil' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' @@ -888,6 +976,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Telugu' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' @@ -898,6 +987,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Tajik' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' @@ -908,6 +998,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Tagalog' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' @@ -918,6 +1009,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Thai' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' @@ -928,6 +1020,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Tigrinya' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' @@ -938,6 +1031,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Turkish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' @@ -948,6 +1042,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Uighur; Uyghur' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' @@ -958,6 +1053,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Ukrainian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' @@ -968,6 +1064,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Urdu' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' @@ -978,6 +1075,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Uzbek' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' @@ -988,6 +1086,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Uzbek - Cyrillic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' @@ -998,6 +1097,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Vietnamese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' @@ -1008,6 +1108,7 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' # - title: 'Yiddish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' @@ -1018,3 +1119,4 @@ # version: '4.1.0' # compatible_service_versions: # - '0.1.0' +# - '0.1.1' diff --git a/app/services/services.yml b/app/services/services.yml index b7a49473..e8db1b33 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -10,7 +10,7 @@ file-setup-pipeline: tesseract-ocr-pipeline: name: 'Tesseract OCR Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF' - latest_version: '0.1.0' + latest_version: '0.1.1' versions: 0.1.0: methods: @@ -25,13 +25,18 @@ tesseract-ocr-pipeline: transkribus-htr-pipeline: name: 'Transkribus HTR Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF' - latest_version: '0.1.0' + latest_version: '0.1.1' versions: 0.1.0: methods: - 'binarization' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.0' + 0.1.1: + methods: + - 'binarization' + publishing_year: 2022 + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/transkribus-htr-pipeline/-/releases/v0.1.1' spacy-nlp-pipeline: name: 'spaCy NLP Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF'