diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRModel.defaults.yml index 0d067d54..1d644fba 100644 --- a/app/TesseractOCRModel.defaults.yml +++ b/app/TesseractOCRModel.defaults.yml @@ -10,6 +10,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Amharic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' @@ -22,6 +23,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Arabic' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' @@ -34,6 +36,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Assamese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' @@ -46,6 +49,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Azerbaijani' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' @@ -58,6 +62,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Azerbaijani - Cyrillic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' @@ -70,6 +75,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Belarusian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' @@ -82,6 +88,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Bengali' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' @@ -94,6 +101,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Tibetan' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' @@ -106,6 +114,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Bosnian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' @@ -118,6 +127,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Bulgarian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' @@ -130,6 +140,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Catalan; Valencian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' @@ -142,6 +153,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Cebuano' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' @@ -154,6 +166,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Czech' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' @@ -166,6 +179,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Chinese - Simplified' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' @@ -178,6 +192,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Chinese - Traditional' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' @@ -190,6 +205,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Cherokee' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' @@ -202,6 +218,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Welsh' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' @@ -214,6 +231,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Danish' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' @@ -226,6 +244,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'German' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' @@ -238,6 +257,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Dzongkha' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' @@ -250,6 +270,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Greek, Modern (1453-)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' @@ -262,6 +283,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'English' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' @@ -274,6 +296,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'English, Middle (1100-1500)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' @@ -286,6 +309,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Esperanto' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' @@ -298,6 +322,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Estonian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' @@ -310,6 +335,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Basque' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' @@ -322,6 +348,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Persian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' @@ -334,6 +361,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Finnish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' @@ -346,6 +374,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'French' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' @@ -358,6 +387,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'German Fraktur' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' @@ -370,6 +400,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'French, Middle (ca. 1400-1600)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' @@ -382,6 +413,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Irish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' @@ -394,6 +426,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Galician' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' @@ -406,6 +439,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Greek, Ancient (-1453)' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' @@ -418,6 +452,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Gujarati' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' @@ -430,6 +465,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Haitian; Haitian Creole' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' @@ -442,6 +478,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Hebrew' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' @@ -454,6 +491,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Hindi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' @@ -466,6 +504,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Croatian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' @@ -478,6 +517,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Hungarian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' @@ -490,6 +530,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Inuktitut' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' @@ -502,6 +543,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Indonesian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' @@ -514,6 +556,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Icelandic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' @@ -526,6 +569,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Italian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' @@ -538,6 +582,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'Italian - Old' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' @@ -550,6 +595,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Javanese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' @@ -562,6 +608,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Japanese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' @@ -574,6 +621,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Kannada' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' @@ -586,6 +634,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Georgian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' @@ -598,6 +647,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Georgian - Old' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' @@ -610,6 +660,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Kazakh' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' @@ -622,6 +673,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Central Khmer' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' @@ -634,6 +686,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Kirghiz; Kyrgyz' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' @@ -646,6 +699,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Korean' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' @@ -658,6 +712,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Kurdish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' @@ -670,6 +725,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Lao' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' @@ -682,6 +738,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Latin' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' @@ -694,6 +751,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Latvian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' @@ -706,6 +764,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Lithuanian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' @@ -718,6 +777,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Malayalam' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' @@ -730,6 +790,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Marathi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' @@ -742,6 +803,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Macedonian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' @@ -754,6 +816,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Maltese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' @@ -766,6 +829,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Malay' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' @@ -778,6 +842,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Burmese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' @@ -790,6 +855,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Nepali' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' @@ -802,6 +868,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Dutch; Flemish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' @@ -814,6 +881,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Norwegian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' @@ -826,6 +894,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Oriya' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' @@ -838,6 +907,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Panjabi; Punjabi' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' @@ -850,6 +920,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Polish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' @@ -862,6 +933,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Portuguese' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' @@ -874,6 +946,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Pushto; Pashto' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' @@ -886,6 +959,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Romanian; Moldavian; Moldovan' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' @@ -898,6 +972,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Russian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' @@ -910,6 +985,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Sanskrit' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' @@ -922,6 +998,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Sinhala; Sinhalese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' @@ -934,6 +1011,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Slovak' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' @@ -946,6 +1024,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Slovenian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' @@ -958,6 +1037,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' - title: 'Spanish; Castilian' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' @@ -970,6 +1050,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' - title: 'Spanish; Castilian - Old' description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' @@ -982,6 +1063,7 @@ - '0.1.2' - '0.1.3' - '0.1.4' + - '0.1.5' # - title: 'Albanian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' @@ -994,6 +1076,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Serbian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' @@ -1006,6 +1089,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Serbian - Latin' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' @@ -1018,6 +1102,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Swahili' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' @@ -1030,6 +1115,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Swedish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' @@ -1042,6 +1128,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Syriac' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' @@ -1054,6 +1141,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Tamil' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' @@ -1066,6 +1154,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Telugu' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' @@ -1078,6 +1167,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Tajik' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' @@ -1090,6 +1180,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Tagalog' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' @@ -1102,6 +1193,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Thai' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' @@ -1114,6 +1206,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Tigrinya' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' @@ -1126,6 +1219,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Turkish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' @@ -1138,6 +1232,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Uighur; Uyghur' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' @@ -1150,6 +1245,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Ukrainian' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' @@ -1162,6 +1258,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Urdu' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' @@ -1174,6 +1271,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Uzbek' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' @@ -1186,6 +1284,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Uzbek - Cyrillic' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' @@ -1198,6 +1297,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Vietnamese' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' @@ -1210,6 +1310,7 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' # - title: 'Yiddish' # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' @@ -1222,3 +1323,4 @@ # - '0.1.2' # - '0.1.3' # - '0.1.4' +# - '0.1.5' diff --git a/app/services/forms.py b/app/services/forms.py index 5e184e35..c5a9c790 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -46,8 +46,12 @@ class AddTesseractOCRPipelineJobForm(AddJobForm): def validate_binarization(self, field): service_info = SERVICES['tesseract-ocr-pipeline']['versions'][self.version.data] - if field.data and 'binarization' not in service_info['methods']: - raise ValidationError('Binarization is not available') + if field.data: + if( + 'methods' not in service_info + or 'binarization' not in service_info['methods'] + ): + raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': @@ -58,8 +62,13 @@ class AddTesseractOCRPipelineJobForm(AddJobForm): version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] - if 'binarization' not in service_info['methods']: - self.binarization.render_kw = {'disabled': True} + if self.binarization.render_kw is None: + self.binarization.render_kw = {} + self.binarization.render_kw['disabled'] = True + if 'methods' in service_info: + if 'binarization' in service_info['methods']: + if 'disabled' in self.binarization.render_kw: + del self.binarization.render_kw['disabled'] compatible_models = [ x for x in TesseractOCRModel.query.filter_by(shared=True).all() if version in x.compatible_service_versions @@ -83,8 +92,12 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm): def validate_binarization(self, field): service_info = SERVICES['transkribus-htr-pipeline']['versions'][self.version.data] - if field.data and 'binarization' not in service_info['methods']: - raise ValidationError('Binarization is not available') + if field.data: + if( + 'methods' not in service_info + or 'binarization' not in service_info['methods'] + ): + raise ValidationError('Binarization is not available') def validate_pdf(self, field): if field.data.mimetype != 'application/pdf': @@ -95,8 +108,13 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm): version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] - if 'binarization' not in service_info['methods']: - self.binarization.render_kw = {'disabled': True} + if self.binarization.render_kw is None: + self.binarization.render_kw = {} + self.binarization.render_kw['disabled'] = True + if 'methods' in service_info: + if 'binarization' in service_info['methods']: + if 'disabled' in self.binarization.render_kw: + del self.binarization.render_kw['disabled'] self.model.choices = [('', 'Choose your option')] self.model.choices += [ ('37569', 'Tim Model'), @@ -109,15 +127,18 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm): class AddSpacyNLPPipelineJobForm(AddJobForm): - encoding_detection = BooleanField('Encoding detection') + encoding_detection = BooleanField('Encoding detection', render_kw={'disabled': True}) txt = FileField('File', validators=[FileRequired()]) model = SelectField('Model', validators=[DataRequired()]) def validate_encoding_detection(self, field): - service_manifest = SERVICES['spacy-nlp-pipeline'] - service_info = service_manifest['versions'][self.version.data] - if field.data and 'encoding_detection' not in service_info['methods']: - raise ValidationError('Encoding detection is not available!') + service_info = SERVICES['spacy-nlp-pipeline']['versions'][self.version.data] + if field.data: + if( + 'methods' not in service_info + or 'encoding_detection' not in service_info['methods'] + ): + raise ValidationError('Encoding detection is not available') def validate_txt(form, field): if field.data.mimetype != 'text/plain': @@ -128,8 +149,13 @@ class AddSpacyNLPPipelineJobForm(AddJobForm): version = kwargs.pop('version', service_manifest['latest_version']) super().__init__(*args, **kwargs) service_info = service_manifest['versions'][version] - if 'encoding_detection' not in service_info['methods']: - self.encoding_detection.render_kw = {'disabled': True} + if self.encoding_detection.render_kw is None: + self.encoding_detection.render_kw = {} + self.encoding_detection.render_kw['disabled'] = True + if 'methods' in service_info: + if 'encoding_detection' in service_info['methods']: + if 'disabled' in self.encoding_detection.render_kw: + del self.encoding_detection.render_kw['disabled'] self.model.choices = [('', 'Choose your option')] self.model.choices += [(x, y) for x, y in service_info['models'].items()] # noqa self.model.default = '' diff --git a/app/services/services.yml b/app/services/services.yml index c26f7fb7..b4804ff3 100644 --- a/app/services/services.yml +++ b/app/services/services.yml @@ -10,33 +10,28 @@ file-setup-pipeline: tesseract-ocr-pipeline: name: 'Tesseract OCR Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF' - latest_version: '0.1.4' + latest_version: '0.1.5' versions: 0.1.0: - methods: - - 'binarization' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.0' 0.1.1: - methods: - - 'binarization' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1' 0.1.2: - methods: - - 'binarization' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2' 0.1.3: - methods: - - 'binarization' publishing_year: 2022 url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.3' 0.1.4: + publishing_year: 2022 + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4' + 0.1.5: methods: - 'binarization' publishing_year: 2022 - url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.4' + url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.5' transkribus-htr-pipeline: name: 'Transkribus HTR Pipeline' publisher: 'Bielefeld University - CRC 1288 - INF'