Integrate TranskribusHTRModels

This commit is contained in:
Patrick Jentsch 2022-04-22 15:27:52 +02:00
parent 9d4001f469
commit ccdd0d3faa
13 changed files with 409 additions and 25 deletions

View File

@ -2,6 +2,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/afr.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -15,6 +17,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -28,6 +32,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -41,6 +47,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -54,6 +62,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -67,6 +77,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -80,6 +92,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -93,6 +107,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -106,6 +122,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -119,6 +137,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -132,6 +152,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -145,6 +167,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -158,6 +182,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -171,6 +197,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -184,6 +212,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -197,6 +227,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -210,6 +242,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -223,6 +257,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -236,6 +272,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -249,6 +287,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -262,6 +302,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -275,6 +317,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -288,6 +332,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -301,6 +347,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -314,6 +362,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -327,6 +377,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -340,6 +392,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -353,6 +407,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -366,6 +422,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -379,6 +437,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -392,6 +452,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -405,6 +467,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -418,6 +482,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -431,6 +497,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -444,6 +512,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -457,6 +527,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -470,6 +542,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -483,6 +557,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -496,6 +572,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -509,6 +587,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -522,6 +602,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -535,6 +617,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -548,6 +632,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -561,6 +647,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -574,6 +662,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -587,6 +677,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -600,6 +692,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -613,6 +707,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -626,6 +722,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -639,6 +737,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -652,6 +752,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -665,6 +767,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -678,6 +782,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -691,6 +797,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -704,6 +812,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -717,6 +827,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -730,6 +842,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -743,6 +857,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -756,6 +872,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -769,6 +887,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -782,6 +902,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -795,6 +917,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -808,6 +932,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -821,6 +947,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -834,6 +962,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -847,6 +977,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -860,6 +992,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -873,6 +1007,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -886,6 +1022,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -899,6 +1037,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -912,6 +1052,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -925,6 +1067,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -938,6 +1082,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -951,6 +1097,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -964,6 +1112,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -977,6 +1127,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -990,6 +1142,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1003,6 +1157,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1016,6 +1172,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1029,6 +1187,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1042,6 +1202,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -1055,6 +1217,8 @@
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
publisher: 'tesseract-ocr'
publisher_url: 'https://github.com/tesseract-ocr'
publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
publishing_year: 2021
version: '4.1.0'
compatible_service_versions:
@ -1068,6 +1232,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1081,6 +1247,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1094,6 +1262,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1107,6 +1277,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1120,6 +1292,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1133,6 +1307,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1146,6 +1322,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1159,6 +1337,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1172,6 +1352,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1185,6 +1367,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1198,6 +1382,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1211,6 +1397,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1224,6 +1412,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1237,6 +1427,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1250,6 +1442,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1263,6 +1457,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1276,6 +1472,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1289,6 +1487,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1302,6 +1502,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:
@ -1315,6 +1517,8 @@
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
# publisher: 'tesseract-ocr'
# publisher_url: 'https://github.com/tesseract-ocr'
# publishing_url: 'https://github.com/tesseract-ocr/tessdata/releases/tag/4.1.0'
# publishing_year: 2021
# version: '4.1.0'
# compatible_service_versions:

View File

@ -29,9 +29,9 @@ def create_app(config: Config = Config) -> Flask:
''' Creates an initialized Flask (WSGI Application) object. '''
app: Flask = Flask(__name__)
app.config.from_object(config)
config.init_app(app)
assets.init_app(app)
config.init_app(app)
db.init_app(app)
hashids.init_app(app)
login.init_app(app)

View File

@ -1,7 +1,7 @@
from flask import current_app
from flask_migrate import upgrade
from . import db
from .models import Corpus, Job, Role, User, TesseractOCRModel
from .models import Corpus, Role, User, TesseractOCRModel, TranskribusHTRModel
import click
import os
@ -36,8 +36,10 @@ def register(app):
Role.insert_defaults()
current_app.logger.info('Insert/Update default users')
User.insert_defaults()
current_app.logger.info('Insert/Update default tesseract ocr models')
current_app.logger.info('Insert/Update default TesseractOCRModels')
TesseractOCRModel.insert_defaults()
current_app.logger.info('Insert/Update default TranskribusHTRModels')
TranskribusHTRModel.insert_defaults()
@app.cli.group()
def daemon():

View File

@ -1,5 +1,11 @@
from app import db
from app.models import Job, JobResult, JobStatus, TesseractOCRModel
from app.models import (
Job,
JobResult,
JobStatus,
TesseractOCRModel,
TranskribusHTRModel
)
from datetime import datetime
from flask import current_app
from werkzeug.utils import secure_filename
@ -56,7 +62,8 @@ class CheckJobsMixin:
if 'binarization' in job.service_args and job.service_args['binarization']:
command += ' --binarize'
elif job.service == 'transkribus-htr-pipeline':
command += f' -m {job.service_args["model"]}'
transkribus_htr_model = TranskribusHTRModel.query.get(job.service_args['model'])
command += f' -m {transkribus_htr_model.transkribus_model_id}'
readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
command += f' --readcoop-username "{readcoop_username}"'
readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')

View File

@ -18,6 +18,10 @@ import xml.etree.ElementTree as ET
import yaml
TRANSKRIBUS_HTR_MODELS = \
json.loads(requests.get('https://transkribus.eu/TrpServer/rest/models/text').content)['trpModelMetadata'] # noqa
class IntEnumColumn(db.TypeDecorator):
impl = db.Integer
@ -187,6 +191,12 @@ class User(HashidMixin, UserMixin, db.Model):
cascade='all, delete-orphan',
lazy='dynamic'
)
transkribus_htr_models = db.relationship(
'TranskribusHTRModel',
backref='user',
cascade='all, delete-orphan',
lazy='dynamic'
)
corpora = db.relationship(
'Corpus',
backref='user',
@ -362,6 +372,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
compatible_service_versions = db.Column(ContainerColumn(list, 255))
description = db.Column(db.String(255))
publisher = db.Column(db.String(128))
publisher_url = db.Column(db.String(512))
publishing_url = db.Column(db.String(512))
publishing_year = db.Column(db.Integer)
shared = db.Column(db.Boolean, default=False)
title = db.Column(db.String(64))
@ -383,7 +395,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
'compatible_service_versions': self.compatible_service_versions,
'description': self.description,
'publisher': self.publisher,
'publisher_url': self.publisher_url,
'publishing_url': self.publishing_url,
'publishing_year': self.publishing_year,
'shared': self.shared,
'title': self.title,
**self.file_mixin_to_dict()
}
@ -409,7 +424,10 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
model.publishing_year = m['publishing_year']
model.shared = True
model.title = m['title']
model.version = m['version']
continue
@ -417,6 +435,8 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
publishing_year=m['publishing_year'],
shared=True,
title=m['title'],
@ -445,6 +465,58 @@ class TesseractOCRModel(FileMixin, HashidMixin, db.Model):
db.session.commit()
class TranskribusHTRModel(HashidMixin, db.Model):
__tablename__ = 'transkribus_htr_models'
# Primary key
id = db.Column(db.Integer, primary_key=True)
# Foreign keys
user_id = db.Column(db.Integer, db.ForeignKey('users.id'))
# Fields
shared = db.Column(db.Boolean, default=False)
transkribus_model_id = db.Column(db.Integer)
transkribus_name = db.Column(db.String(64))
# Backrefs: user: User
def to_dict(self, backrefs=False, relationships=False):
dict_tesseract_ocr_model = {
'id': self.hashid,
'user_id': self.user.hashid,
'shared': self.shared,
'transkribus_model_id': self.transkribus_model_id,
'transkribus_name': self.transkribus_name
}
if backrefs:
dict_tesseract_ocr_model['user'] = \
self.user.to_dict(backrefs=True, relationships=False)
if relationships:
pass
return dict_tesseract_ocr_model
@staticmethod
def insert_defaults():
user = User.query.filter_by(username='nopaque').first()
models = [
m for m in TRANSKRIBUS_HTR_MODELS if True
and 'creator' in m and m['creator'] == 'Transkribus Team'
and 'docType' in m and m['docType'] == 'handwritten'
]
for m in models:
model = TranskribusHTRModel.query.filter_by(transkribus_model_id=m['modelId']).first() # noqa
if model is not None:
model.shared = True
model.transkribus_model_id = m['modelId']
model.transkribus_name = m['name']
continue
model = TranskribusHTRModel(
shared=True,
transkribus_name=m['name'],
transkribus_model_id=m['modelId'],
user=user,
)
db.session.add(model)
db.session.commit()
class JobInput(FileMixin, HashidMixin, db.Model):
__tablename__ = 'job_inputs'
# Primary key

View File

@ -3,11 +3,10 @@ import os
import yaml
services_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), 'services.yml')
services_file = \
os.path.join(os.path.dirname(os.path.abspath(__file__)), 'services.yml')
with open(services_file, 'r') as f:
SERVICES = yaml.safe_load(f)
bp = Blueprint('services', __name__)
from . import routes # noqa

View File

@ -1,4 +1,4 @@
from app.models import TesseractOCRModel
from app.models import TesseractOCRModel, TranskribusHTRModel
from flask_login import current_user
from flask_wtf import FlaskForm
from flask_wtf.file import FileField, FileRequired
@ -115,11 +115,10 @@ class AddTranskribusHTRPipelineJobForm(AddJobForm):
if 'binarization' in service_info['methods']:
if 'disabled' in self.binarization.render_kw:
del self.binarization.render_kw['disabled']
models = TranskribusHTRModel.query.filter_by(shared=True).all()
models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all()
self.model.choices = [('', 'Choose your option')]
self.model.choices += [
('37569', 'Tim Model'),
('29539', 'UCLUniversity of Toronto #7')
]
self.model.choices += [(x.hashid, x.transkribus_name) for x in models]
self.model.default = ''
self.version.choices = [(x, x) for x in service_manifest['versions']]
self.version.data = version

View File

@ -1,5 +1,12 @@
from app import db, hashids
from app.models import Job, JobInput, JobStatus
from app.models import (
Job,
JobInput,
JobStatus,
TesseractOCRModel,
TRANSKRIBUS_HTR_MODELS,
TranskribusHTRModel
)
from flask import (
abort,
current_app,
@ -74,7 +81,7 @@ def file_setup_pipeline():
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
return render_template(
f'services/{service.replace("-", "_")}.html.j2',
'services/file_setup_pipeline.html.j2',
form=form,
title=service_manifest['name']
)
@ -133,9 +140,11 @@ def tesseract_ocr_pipeline():
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
tesseract_ocr_models = TesseractOCRModel.query.all()
return render_template(
f'services/{service.replace("-", "_")}.html.j2',
'services/tesseract_ocr_pipeline.html.j2',
form=form,
tesseract_ocr_models=tesseract_ocr_models,
title=service_manifest['name']
)
@ -155,7 +164,7 @@ def transkribus_htr_pipeline():
if not form.validate():
return make_response(form.errors, 400)
service_args = {}
service_args['model'] = form.model.data
service_args['model'] = hashids.decode(form.model.data)
if form.binarization.data:
service_args['binarization'] = True
job = Job(
@ -195,10 +204,14 @@ def transkribus_htr_pipeline():
db.session.commit()
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
transkribus_htr_models = TranskribusHTRModel.query.filter_by(shared=True).all()
transkribus_htr_models += TranskribusHTRModel.query.filter_by(shared=False, user=current_user).all()
return render_template(
f'services/{service.replace("-", "_")}.html.j2',
f'services/transkribus_htr_pipeline.html.j2',
form=form,
title=service_manifest['name']
title=service_manifest['name'],
TRANSKRIBUS_HTR_MODELS=TRANSKRIBUS_HTR_MODELS,
transkribus_htr_models=transkribus_htr_models
)
@ -256,7 +269,7 @@ def spacy_nlp_pipeline():
flash(f'Job "{job.title}" added', 'job')
return make_response({'redirect_url': url_for('jobs.job', job_id=job.id)}, 201) # noqa
return render_template(
f'services/{service.replace("-", "_")}.html.j2',
'services/spacy_nlp_pipeline.html.j2',
form=form,
title=service_manifest['name']
)

View File

@ -53,7 +53,17 @@
{{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }}
</div>
<div class="col s12 l4">
{{ wtf.render_field(form.model, material_icon='language') }}
<div class="input-field">
<i class="material-icons prefix">language</i>
{{ form.model() }}
{{ form.model.label }}
<span class="helper-text">
<a class="modal-trigger" href="#models-modal">More details about models</a>
</span>
{% for error in form.model.errors %}
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
</div>
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}
@ -138,6 +148,37 @@
{% block modals %}
{{ super() }}
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>Tesseract OCR Pipeline models</h4>
<table>
<thead>
<tr>
<th>Title</th>
<th>Description</th>
<th>Biblio</th>
</tr>
</thead>
<tbody>
{% for m in tesseract_ocr_models %}
<tr id="tesseract-ocr-model-{{ m.hashid }}">
<td>{{ m.title }}</td>
{% if m.description == '' %}
<td>Description is not available.</td>
{% else %}
<td>{{ m.description }}</td>
{% endif %}
<td><a href="{{ m.publisher_url }}">{{ m.publisher }}</a> ({{ m.publishing_year }}), {{ m.title }} {{ m.version}}, <a href="{{ m.publishing_url }}">{{ m.publishing_url }}</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
<div id="progress-modal" class="modal">
<div class="modal-content">
<h4><i class="material-icons left">file_upload</i>Uploading files...</h4>

View File

@ -58,7 +58,17 @@
{{ wtf.render_field(form.pdf, accept='application/pdf', placeholder='Choose a PDF file') }}
</div>
<div class="col s12 l4">
{{ wtf.render_field(form.model, material_icon='language') }}
<div class="input-field">
<i class="material-icons prefix">language</i>
{{ form.model() }}
{{ form.model.label }}
<span class="helper-text">
<a class="modal-trigger" href="#models-modal">More details about models</a>
</span>
{% for error in form.model.errors %}
<span class="helper-text error-color-text">{{ error }}</span>
{% endfor %}
</div>
</div>
<div class="col s12 l3">
{{ wtf.render_field(form.version, material_icon='apps') }}
@ -143,6 +153,29 @@
{% block modals %}
{{ super() }}
<div id="models-modal" class="modal">
<div class="modal-content">
<h4>Transkribus HTR Pipeline models</h4>
<ul class="collapsible popout" id="transkribus-htr-models">
{% for m in transkribus_htr_models %}
<li id="transkribus-htr-model-{{ m.hashid }}">
<div class="collapsible-header"><i class="material-icons">widgets</i>{{ m.transkribus_name }}</div>
<div class="collapsible-body">
{% for m_info in TRANSKRIBUS_HTR_MODELS %}
{% if m_info['modelId'] == m.transkribus_model_id %}
{{ m_info|tojson }}
{% endif %}
{% endfor %}
</div>
</li>
{% endfor %}
</ul>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-light btn">Close</a>
</div>
</div>
<div id="progress-modal" class="modal">
<div class="modal-content">
<h4><i class="material-icons left">file_upload</i>Uploading files...</h4>

View File

@ -1,8 +1,8 @@
"""empty message
Revision ID: aa855b80cf1d
Revision ID: 9e8d7d15d950
Revises:
Create Date: 2022-04-01 12:14:42.606685
Create Date: 2022-04-22 09:38:49.527498
"""
from alembic import op
@ -10,7 +10,7 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'aa855b80cf1d'
revision = '9e8d7d15d950'
down_revision = None
branch_labels = None
depends_on = None
@ -83,6 +83,8 @@ def upgrade():
sa.Column('compatible_service_versions', sa.String(length=255), nullable=True),
sa.Column('description', sa.String(length=255), nullable=True),
sa.Column('publisher', sa.String(length=128), nullable=True),
sa.Column('publisher_url', sa.String(length=512), nullable=True),
sa.Column('publishing_url', sa.String(length=512), nullable=True),
sa.Column('publishing_year', sa.Integer(), nullable=True),
sa.Column('shared', sa.Boolean(), nullable=True),
sa.Column('title', sa.String(length=64), nullable=True),
@ -90,6 +92,15 @@ def upgrade():
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('transkribus_htr_models',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=True),
sa.Column('shared', sa.Boolean(), nullable=True),
sa.Column('transkribus_model_id', sa.Integer(), nullable=True),
sa.Column('transkribus_name', sa.String(length=64), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
sa.PrimaryKeyConstraint('id')
)
op.create_table('corpus_files',
sa.Column('creation_date', sa.DateTime(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True),
@ -141,6 +152,7 @@ def downgrade():
op.drop_table('job_results')
op.drop_table('job_inputs')
op.drop_table('corpus_files')
op.drop_table('transkribus_htr_models')
op.drop_table('tesseract_ocr_models')
op.drop_table('jobs')
op.drop_table('corpora')

View File

@ -13,6 +13,7 @@ from app.models import (
Permission,
Role,
TesseractOCRModel,
TranskribusHTRModel,
User
) # noqa
from flask import Flask # noqa
@ -42,5 +43,6 @@ def make_shell_context() -> Dict[str, Any]:
'Permission': Permission,
'Role': Role,
'TesseractOCRModel': TesseractOCRModel,
'TranskribusHTRModel': TranskribusHTRModel,
'User': User
}