diff --git a/app/TesseractOCRModel.defaults.yml b/app/TesseractOCRModel.defaults.yml index 1d644fba..a6f703eb 100644 --- a/app/TesseractOCRModel.defaults.yml +++ b/app/TesseractOCRModel.defaults.yml @@ -2,6 +2,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/afr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -15,6 +17,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -28,6 +32,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -41,6 +47,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -54,6 +62,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -67,6 +77,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -80,6 +92,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -93,6 +107,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -106,6 +122,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -119,6 +137,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -132,6 +152,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -145,6 +167,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -158,6 +182,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -171,6 +197,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -184,6 +212,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -197,6 +227,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -210,6 +242,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -223,6 +257,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -236,6 +272,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -249,6 +287,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -262,6 +302,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -275,6 +317,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -288,6 +332,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -301,6 +347,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -314,6 +362,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -327,6 +377,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -340,6 +392,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -353,6 +407,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -366,6 +422,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -379,6 +437,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -392,6 +452,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -405,6 +467,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -418,6 +482,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -431,6 +497,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -444,6 +512,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -457,6 +527,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -470,6 +542,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -483,6 +557,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -496,6 +572,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -509,6 +587,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -522,6 +602,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -535,6 +617,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -548,6 +632,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -561,6 +647,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -574,6 +662,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -587,6 +677,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -600,6 +692,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -613,6 +707,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -626,6 +722,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -639,6 +737,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -652,6 +752,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -665,6 +767,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -678,6 +782,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -691,6 +797,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -704,6 +812,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -717,6 +827,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -730,6 +842,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -743,6 +857,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -756,6 +872,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -769,6 +887,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -782,6 +902,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -795,6 +917,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -808,6 +932,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -821,6 +947,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -834,6 +962,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -847,6 +977,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -860,6 +992,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -873,6 +1007,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -886,6 +1022,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -899,6 +1037,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -912,6 +1052,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -925,6 +1067,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -938,6 +1082,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -951,6 +1097,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -964,6 +1112,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -977,6 +1127,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -990,6 +1142,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1003,6 +1157,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1016,6 +1172,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1029,6 +1187,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1042,6 +1202,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -1055,6 +1217,8 @@ description: '' url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata' publisher: 'tesseract-ocr' + publisher_url: 'https://github.com/tesseract-ocr' + publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' publishing_year: 2021 version: '4.1.0' compatible_service_versions: @@ -1068,6 +1232,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1081,6 +1247,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1094,6 +1262,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1107,6 +1277,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1120,6 +1292,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1133,6 +1307,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1146,6 +1322,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1159,6 +1337,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1172,6 +1352,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1185,6 +1367,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1198,6 +1382,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1211,6 +1397,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1224,6 +1412,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1237,6 +1427,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1250,6 +1442,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1263,6 +1457,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1276,6 +1472,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1289,6 +1487,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1302,6 +1502,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: @@ -1315,6 +1517,8 @@ # description: '' # url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata' # publisher: 'tesseract-ocr' +# publisher_url: 'https://github.com/tesseract-ocr' +# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0' # publishing_year: 2021 # version: '4.1.0' # compatible_service_versions: diff --git a/app/TranskribusHTRModel.defaults.yml b/app/TranskribusHTRModel.defaults.yml deleted file mode 100644 index e69de29b..00000000 diff --git a/app/__init__.py b/app/__init__.py index 9db97d3f..304f937d 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -29,9 +29,9 @@ def create_app(config: Config = Config) -> Flask: ''' Creates an initialized Flask (WSGI Application) object. ''' app: Flask = Flask(__name__) app.config.from_object(config) + config.init_app(app) assets.init_app(app) - config.init_app(app) db.init_app(app) hashids.init_app(app) login.init_app(app) diff --git a/app/cli.py b/app/cli.py index 4bff77d3..64cf4fb7 100644 --- a/app/cli.py +++ b/app/cli.py @@ -1,7 +1,7 @@ from flask import current_app from flask_migrate import upgrade from . import db -from .models import Corpus, Job, Role, User, TesseractOCRModel +from .models import Corpus, Role, User, TesseractOCRModel, TranskribusHTRModel import click import os @@ -36,8 +36,10 @@ def register(app): Role.insert_defaults() current_app.logger.info('Insert/Update default users') User.insert_defaults() - current_app.logger.info('Insert/Update default tesseract ocr models') + current_app.logger.info('Insert/Update default TesseractOCRModels') TesseractOCRModel.insert_defaults() + current_app.logger.info('Insert/Update default TranskribusHTRModels') + TranskribusHTRModel.insert_defaults() @app.cli.group() def daemon(): diff --git a/app/daemon/job_utils.py b/app/daemon/job_utils.py index e56bafbc..d65fc3be 100644 --- a/app/daemon/job_utils.py +++ b/app/daemon/job_utils.py @@ -1,5 +1,11 @@ from app import db -from app.models import Job, JobResult, JobStatus, TesseractOCRModel +from app.models import ( + Job, + JobResult, + JobStatus, + TesseractOCRModel, + TranskribusHTRModel +) from datetime import datetime from flask import current_app from werkzeug.utils import secure_filename @@ -56,7 +62,8 @@ class CheckJobsMixin: if 'binarization' in job.service_args and job.service_args['binarization']: command += ' --binarize' elif job.service == 'transkribus-htr-pipeline': - command += f' -m {job.service_args["model"]}' + transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model']) + command += f' -m {transkribus_htr_model.transkribus_model_id}' readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME') command += f' --readcoop-username "{readcoop_username}"' readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD') diff --git a/app/models.py b/app/models.py index 1a857dd4..3c4bd94f 100644 --- a/app/models.py +++ b/app/models.py @@ -18,6 +18,10 @@ import xml.etree.ElementTree as ET import yaml +TRANSKRIBUS_HTR_MODELS = \ + json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text').content)['trpModelMetadata'] # noqa + + class IntEnumColumn(db.TypeDecorator): impl = db.Integer @@ -187,6 +191,12 @@ class User(HashidMixin, UserMixin, db.Model): cascade='all, delete-orphan', lazy='dynamic' ) + transkribus_htr_models = db.relationship( + 'TranskribusHTRModel', + backref='user', + cascade='all, delete-orphan', + lazy='dynamic' + ) corpora = db.relationship( 'Corpus', backref='user', @@ -362,6 +372,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): compatible_service_versions = db.Column(ContainerColumn(list, 255)) description = db.Column(db.String(255)) publisher = db.Column(db.String(128)) + publisher_url = db.Column(db.String(512)) + publishing_url = db.Column(db.String(512)) publishing_year = db.Column(db.Integer) shared = db.Column(db.Boolean, default=False) title = db.Column(db.String(64)) @@ -383,7 +395,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): 'compatible_service_versions': self.compatible_service_versions, 'description': self.description, 'publisher': self.publisher, + 'publisher_url': self.publisher_url, + 'publishing_url': self.publishing_url, 'publishing_year': self.publishing_year, + 'shared': self.shared, 'title': self.title, **self.file_mixin_to_dict() } @@ -409,7 +424,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): model.compatible_service_versions = m['compatible_service_versions'] model.description = m['description'] model.publisher = m['publisher'] + model.publisher_url = m['publisher_url'] + model.publishing_url = m['publishing_url'] model.publishing_year = m['publishing_year'] + model.shared = True model.title = m['title'] model.version = m['version'] continue @@ -417,6 +435,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): compatible_service_versions=m['compatible_service_versions'], description=m['description'], publisher=m['publisher'], + publisher_url=m['publisher_url'], + publishing_url=m['publishing_url'], publishing_year=m['publishing_year'], shared=True, title=m['title'], @@ -445,6 +465,58 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model): db.session.commit() +class TranskribusHTRModel(HashidMixin, db.Model): + __tablename__ = 'transkribus_htr_models' + # Primary key + id = db.Column(db.Integer, primary_key=True) + # Foreign keys + user_id = db.Column(db.Integer, db.ForeignKey('users.id')) + # Fields + shared = db.Column(db.Boolean, default=False) + transkribus_model_id = db.Column(db.Integer) + transkribus_name = db.Column(db.String(64)) + # Backrefs: user: User + + def to_dict(self, backrefs=False, relationships=False): + dict_tesseract_ocr_model = { + 'id': self.hashid, + 'user_id': self.user.hashid, + 'shared': self.shared, + 'transkribus_model_id': self.transkribus_model_id, + 'transkribus_name': self.transkribus_name + } + if backrefs: + dict_tesseract_ocr_model['user'] = \ + self.user.to_dict(backrefs=True, relationships=False) + if relationships: + pass + return dict_tesseract_ocr_model + + @staticmethod + def insert_defaults(): + user = User.query.filter_by(username='nopaque').first() + models = [ + m for m in TRANSKRIBUS_HTR_MODELS if True + and 'creator' in m and m['creator'] == 'Transkribus Team' + and 'docType' in m and m['docType'] == 'handwritten' + ] + for m in models: + model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa + if model is not None: + model.shared = True + model.transkribus_model_id = m['modelId'] + model.transkribus_name = m['name'] + continue + model = TranskribusHTRModel( + shared=True, + transkribus_name=m['name'], + transkribus_model_id=m['modelId'], + user=user, + ) + db.session.add(model) + db.session.commit() + + class JobInput(FileMixin, HashidMixin, db.Model): __tablename__ = 'job_inputs' # Primary key diff --git a/app/services/__init__.py b/app/services/__init__.py index e41a895d..73c78b59 100644 --- a/app/services/__init__.py +++ b/app/services/__init__.py @@ -3,11 +3,10 @@ import os import yaml -services_file = os.path.join( - os.path.dirname(os.path.abspath(__file__)), 'services.yml') +services_file = \ + os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml') with open(services_file, 'r') as f: SERVICES = yaml.safe_load(f) - bp = Blueprint('services', __name__) from . import routes # noqa diff --git a/app/services/forms.py b/app/services/forms.py index 9d7bf45d..c35621db 100644 --- a/app/services/forms.py +++ b/app/services/forms.py @@ -1,4 +1,4 @@ -from app.models import TesseractOCRModel +from app.models import TesseractOCRModel, TranskribusHTRModel from flask_login import current_user from flask_wtf import FlaskForm from flask_wtf.file import FileField, FileRequired @@ -115,11 +115,10 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm): if 'binarization' in service_info['methods']: if 'disabled' in self.binarization.render_kw: del self.binarization.render_kw['disabled'] + models = TranskribusHTRModel.query.filter_by(shared=True).all() + models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all() self.model.choices = [('', 'Choose your option')] - self.model.choices += [ - ('37569', 'Tim Model'), - ('29539', 'UCL–University of Toronto #7') - ] + self.model.choices += [(x.hashid, x.transkribus_name) for x in models] self.model.default = '' self.version.choices = [(x, x) for x in service_manifest['versions']] self.version.data = version diff --git a/app/services/routes.py b/app/services/routes.py index feecf39a..638ff1cf 100644 --- a/app/services/routes.py +++ b/app/services/routes.py @@ -1,5 +1,12 @@ from app import db, hashids -from app.models import Job, JobInput, JobStatus +from app.models import ( + Job, + JobInput, + JobStatus, + TesseractOCRModel, + TRANSKRIBUS_HTR_MODELS, + TranskribusHTRModel +) from flask import ( abort, current_app, @@ -74,7 +81,7 @@ def file_setup_pipeline(): flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/file_setup_pipeline.html.j2', form=form, title=service_manifest['name'] ) @@ -133,9 +140,11 @@ def tesseract_ocr_pipeline(): db.session.commit() flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa + tesseract_ocr_models = TesseractOCRModel.query.all() return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/tesseract_ocr_pipeline.html.j2', form=form, + tesseract_ocr_models=tesseract_ocr_models, title=service_manifest['name'] ) @@ -155,7 +164,7 @@ def transkribus_htr_pipeline(): if not form.validate(): return make_response(form.errors, 400) service_args = {} - service_args['model'] = form.model.data + service_args['model'] = hashids.decode(form.model.data) if form.binarization.data: service_args['binarization'] = True job = Job( @@ -195,10 +204,14 @@ def transkribus_htr_pipeline(): db.session.commit() flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa + transkribus_htr_models = TranskribusHTRModel.query.filter_by(shared=True).all() + transkribus_htr_models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all() return render_template( - f'services/{service.replace("-", "_")}.html.j2', + f'services/transkribus_htr_pipeline.html.j2', form=form, - title=service_manifest['name'] + title=service_manifest['name'], + TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS, + transkribus_htr_models=transkribus_htr_models ) @@ -256,7 +269,7 @@ def spacy_nlp_pipeline(): flash(f'Job "{job.title}" added', 'job') return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa return render_template( - f'services/{service.replace("-", "_")}.html.j2', + 'services/spacy_nlp_pipeline.html.j2', form=form, title=service_manifest['name'] ) diff --git a/app/templates/services/tesseract_ocr_pipeline.html.j2 b/app/templates/services/tesseract_ocr_pipeline.html.j2 index 723e7758..129b74aa 100644 --- a/app/templates/services/tesseract_ocr_pipeline.html.j2 +++ b/app/templates/services/tesseract_ocr_pipeline.html.j2 @@ -53,7 +53,17 @@ {{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }}