Compare commits

..

No commits in common. "fa28c875e1dbb144314fe154c5ea732c718b9f18" and "9be5ce60145a74efff588776ea42ed7ecf6554b4" have entirely different histories.

6 changed files with 108 additions and 231 deletions

View File

@ -120,6 +120,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'German'
description: 'German pipeline optimized for CPU. Components: tok2vec, tagger, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/de_core_news_md-3.4.0/de_core_news_md-3.4.0.tar.gz'
@ -131,6 +132,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Greek'
description: 'Greek pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/el_core_news_md-3.4.0/el_core_news_md-3.4.0.tar.gz'
@ -142,6 +144,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'English'
description: 'English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.4.1/en_core_web_md-3.4.1.tar.gz'
@ -153,6 +156,7 @@
version: '3.4.1'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Spanish'
description: 'Spanish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.4.0/es_core_news_md-3.4.0.tar.gz'
@ -164,6 +168,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'French'
description: 'French pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/fr_core_news_md-3.4.0/fr_core_news_md-3.4.0.tar.gz'
@ -175,6 +180,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Italian'
description: 'Italian pipeline optimized for CPU. Components: tok2vec, morphologizer, tagger, parser, lemmatizer (trainable_lemmatizer), senter, ner'
url: 'https://github.com/explosion/spacy-models/releases/download/it_core_news_md-3.4.0/it_core_news_md-3.4.0.tar.gz'
@ -186,6 +192,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Polish'
description: 'Polish pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, lemmatizer (trainable_lemmatizer), tagger, senter, ner.'
url: 'https://github.com/explosion/spacy-models/releases/download/pl_core_news_md-3.4.0/pl_core_news_md-3.4.0.tar.gz'
@ -197,6 +204,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Russian'
description: 'Russian pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.'
url: 'https://github.com/explosion/spacy-models/releases/download/ru_core_news_md-3.4.0/ru_core_news_md-3.4.0.tar.gz'
@ -208,6 +216,7 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'
- title: 'Chinese'
description: 'Chinese pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler.'
url: 'https://github.com/explosion/spacy-models/releases/download/zh_core_web_md-3.4.0/zh_core_web_md-3.4.0.tar.gz'
@ -219,3 +228,4 @@
version: '3.4.0'
compatible_service_versions:
- '0.1.1'
- '0.1.2'

View File

@ -9,7 +9,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Amharic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/amh.traineddata'
@ -21,7 +20,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Arabic'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ara.traineddata'
@ -33,7 +31,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Assamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/asm.traineddata'
@ -45,7 +42,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze.traineddata'
@ -57,7 +53,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Azerbaijani - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/aze_cyrl.traineddata'
@ -69,7 +64,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Belarusian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bel.traineddata'
@ -81,7 +75,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bengali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ben.traineddata'
@ -93,7 +86,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tibetan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bod.traineddata'
@ -105,7 +97,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bosnian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bos.traineddata'
@ -117,7 +108,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Bulgarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/bul.traineddata'
@ -129,7 +119,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Catalan; Valencian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cat.traineddata'
@ -141,7 +130,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Cebuano'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ceb.traineddata'
@ -153,7 +141,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Czech'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ces.traineddata'
@ -165,7 +152,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Chinese - Simplified'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_sim.traineddata'
@ -177,7 +163,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Chinese - Traditional'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chi_tra.traineddata'
@ -189,7 +174,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Cherokee'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/chr.traineddata'
@ -201,7 +185,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Welsh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/cym.traineddata'
@ -213,7 +196,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Danish'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dan.traineddata'
@ -225,7 +207,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'German'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/deu.traineddata'
@ -237,7 +218,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Dzongkha'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/dzo.traineddata'
@ -249,7 +229,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Greek, Modern (1453-)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ell.traineddata'
@ -261,7 +240,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'English'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eng.traineddata'
@ -273,7 +251,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'English, Middle (1100-1500)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/enm.traineddata'
@ -285,7 +262,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Esperanto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/epo.traineddata'
@ -297,7 +273,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Estonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/est.traineddata'
@ -309,7 +284,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Basque'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/eus.traineddata'
@ -321,7 +295,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Persian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fas.traineddata'
@ -333,7 +306,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Finnish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fin.traineddata'
@ -345,7 +317,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'French'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/fra.traineddata'
@ -357,7 +328,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'German Fraktur'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frk.traineddata'
@ -369,7 +339,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'French, Middle (ca. 1400-1600)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/frm.traineddata'
@ -381,7 +350,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Irish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/gle.traineddata'
@ -393,7 +361,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Galician'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/glg.traineddata'
@ -405,7 +372,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Greek, Ancient (-1453)'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/grc.traineddata'
@ -417,7 +383,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Gujarati'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/guj.traineddata'
@ -429,7 +394,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Haitian; Haitian Creole'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hat.traineddata'
@ -441,7 +405,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hebrew'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/heb.traineddata'
@ -453,7 +416,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hindi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hin.traineddata'
@ -465,7 +427,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Croatian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hrv.traineddata'
@ -477,7 +438,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Hungarian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/hun.traineddata'
@ -489,7 +449,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Inuktitut'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/iku.traineddata'
@ -501,7 +460,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Indonesian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ind.traineddata'
@ -513,7 +471,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Icelandic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/isl.traineddata'
@ -525,7 +482,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Italian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita.traineddata'
@ -537,7 +493,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'Italian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ita_old.traineddata'
@ -549,7 +504,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Javanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jav.traineddata'
@ -561,7 +515,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Japanese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/jpn.traineddata'
@ -573,7 +526,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kannada'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kan.traineddata'
@ -585,7 +537,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Georgian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat.traineddata'
@ -597,7 +548,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Georgian - Old'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kat_old.traineddata'
@ -609,7 +559,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kazakh'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kaz.traineddata'
@ -621,7 +570,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Central Khmer'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/khm.traineddata'
@ -633,7 +581,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kirghiz; Kyrgyz'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kir.traineddata'
@ -645,7 +592,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Korean'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kor.traineddata'
@ -657,7 +603,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Kurdish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/kur.traineddata'
@ -669,7 +614,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Lao'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lao.traineddata'
@ -681,7 +625,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lat.traineddata'
@ -693,7 +636,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Latvian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lav.traineddata'
@ -705,7 +647,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Lithuanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/lit.traineddata'
@ -717,7 +658,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Malayalam'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mal.traineddata'
@ -729,7 +669,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Marathi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mar.traineddata'
@ -741,7 +680,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Macedonian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mkd.traineddata'
@ -753,7 +691,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Maltese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mlt.traineddata'
@ -765,7 +702,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Malay'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/msa.traineddata'
@ -777,7 +713,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Burmese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/mya.traineddata'
@ -789,7 +724,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Nepali'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nep.traineddata'
@ -801,7 +735,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Dutch; Flemish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nld.traineddata'
@ -813,7 +746,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Norwegian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/nor.traineddata'
@ -825,7 +757,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Oriya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ori.traineddata'
@ -837,7 +768,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Panjabi; Punjabi'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pan.traineddata'
@ -849,7 +779,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Polish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pol.traineddata'
@ -861,7 +790,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Portuguese'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/por.traineddata'
@ -873,7 +801,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Pushto; Pashto'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/pus.traineddata'
@ -885,7 +812,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Romanian; Moldavian; Moldovan'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ron.traineddata'
@ -897,7 +823,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Russian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/rus.traineddata'
@ -909,7 +834,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Sanskrit'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/san.traineddata'
@ -921,7 +845,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Sinhala; Sinhalese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sin.traineddata'
@ -933,7 +856,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Slovak'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slk.traineddata'
@ -945,7 +867,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Slovenian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/slv.traineddata'
@ -957,7 +878,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
- title: 'Spanish; Castilian'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa.traineddata'
@ -969,7 +889,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
- title: 'Spanish; Castilian - Old'
description: ''
url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/spa_old.traineddata'
@ -981,7 +900,6 @@
compatible_service_versions:
- '0.1.0'
- '0.1.1'
- '0.1.2'
# - title: 'Albanian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/sqi.traineddata'
@ -993,7 +911,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Serbian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp.traineddata'
@ -1005,7 +922,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Serbian - Latin'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/srp_latn.traineddata'
@ -1017,7 +933,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Swahili'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swa.traineddata'
@ -1029,7 +944,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Swedish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/swe.traineddata'
@ -1041,7 +955,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Syriac'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/syr.traineddata'
@ -1053,7 +966,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tamil'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tam.traineddata'
@ -1065,7 +977,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Telugu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tel.traineddata'
@ -1077,7 +988,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tajik'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgk.traineddata'
@ -1089,7 +999,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tagalog'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tgl.traineddata'
@ -1101,7 +1010,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Thai'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tha.traineddata'
@ -1113,7 +1021,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Tigrinya'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tir.traineddata'
@ -1125,7 +1032,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Turkish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/tur.traineddata'
@ -1137,7 +1043,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uighur; Uyghur'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uig.traineddata'
@ -1149,7 +1054,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Ukrainian'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/ukr.traineddata'
@ -1161,7 +1065,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Urdu'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/urd.traineddata'
@ -1173,7 +1076,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb.traineddata'
@ -1185,7 +1087,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Uzbek - Cyrillic'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/uzb_cyrl.traineddata'
@ -1197,7 +1098,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Vietnamese'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/vie.traineddata'
@ -1209,7 +1109,6 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'
# - title: 'Yiddish'
# description: ''
# url: 'https://github.com/tesseract-ocr/tessdata/raw/4.1.0/yid.traineddata'
@ -1221,4 +1120,3 @@
# compatible_service_versions:
# - '0.1.0'
# - '0.1.1'
# - '0.1.2'

View File

@ -953,7 +953,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid
@staticmethod
def insert_defaults(force_download=False):
def insert_defaults():
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
@ -966,7 +966,6 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = f'{model.id}.traineddata'
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
@ -974,7 +973,7 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
model.is_public = True
model.title = m['title']
model.version = m['version']
else:
continue
model = TesseractOCRPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
@ -991,7 +990,6 @@ class TesseractOCRPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.flush(objects=[model])
db.session.refresh(model)
model.filename = f'{model.id}.traineddata'
if not os.path.exists(model.path) or force_download:
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',
@ -1082,7 +1080,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
return self.user.hashid
@staticmethod
def insert_defaults(force_download=False):
def insert_defaults():
nopaque_user = User.query.filter_by(username='nopaque').first()
defaults_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
@ -1095,7 +1093,6 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
if model is not None:
model.compatible_service_versions = m['compatible_service_versions']
model.description = m['description']
model.filename = m['url'].split('/')[-1]
model.publisher = m['publisher']
model.publisher_url = m['publisher_url']
model.publishing_url = m['publishing_url']
@ -1104,11 +1101,10 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
model.title = m['title']
model.version = m['version']
model.pipeline_name = m['pipeline_name']
else:
continue
model = SpaCyNLPPipelineModel(
compatible_service_versions=m['compatible_service_versions'],
description=m['description'],
filename=m['url'].split('/')[-1],
publisher=m['publisher'],
publisher_url=m['publisher_url'],
publishing_url=m['publishing_url'],
@ -1122,7 +1118,7 @@ class SpaCyNLPPipelineModel(FileMixin, HashidMixin, db.Model):
db.session.add(model)
db.session.flush(objects=[model])
db.session.refresh(model)
if not os.path.exists(model.path) or force_download:
model.filename = m['url'].split('/')[-1]
r = requests.get(m['url'], stream=True)
pbar = tqdm(
desc=f'{model.title} ({model.filename})',

View File

@ -10,7 +10,7 @@ file-setup-pipeline:
tesseract-ocr-pipeline:
name: 'Tesseract OCR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
latest_version: '0.1.2'
latest_version: '0.1.1'
versions:
0.1.0:
methods:
@ -23,12 +23,6 @@ tesseract-ocr-pipeline:
- 'ocropus_nlbin_threshold'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'binarization'
- 'ocropus_nlbin_threshold'
publishing_year: 2023
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/tesseract-ocr-pipeline/-/releases/v0.1.2'
transkribus-htr-pipeline:
name: 'Transkribus HTR Pipeline'
publisher: 'Bielefeld University - CRC 1288 - INF'
@ -59,3 +53,8 @@ spacy-nlp-pipeline:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.1'
0.1.2:
methods:
- 'encoding_detection'
publishing_year: 2022
url: 'https://gitlab.ub.uni-bielefeld.de/sfb1288inf/spacy-nlp-pipeline/-/releases/v0.1.2'

View File

@ -7,21 +7,13 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
this.addEventListenersToNAndMInputSubmit();
this.elements.deleteQueryButton.addEventListener('click', () => {this.resetQueryInputField()});
this.expertModeQueryBuilderSwitchHandler();
this.extensions = {
structuralAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.StructuralAttributeBuilderFunctions(this),
tokenAttributeBuilderFunctions: new nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions(this),
};
this.dropdown = M.Dropdown.init(
document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]'),
{
onCloseStart: () => {
this.unselectChipElement(this.elements.queryInputField.querySelector('.chip.teal'));
}
}
)
}
addEventListenersToQueryElementTarget() {
@ -116,7 +108,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
prettyQueryText = nopaque.Utils.escape(prettyQueryText);
let queryChipElement = nopaque.Utils.HTMLToElement(
`
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true"">
<span class="chip query-component" data-type="${dataType}" data-query="${queryText}" draggable="true" data-closing-tag="${isClosingTag}">
${prettyQueryText}${isEditable ? '<i class="material-icons chip-action-button" data-chip-action="edit" style="padding-left:5px; font-size:18px; cursor:pointer;">edit</i>': ''}
${isClosingTag ? '' : '<i class="material-icons close chip-action-button" data-chip-action="delete">close</i>'}
</span>
@ -160,6 +152,8 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
this.deleteChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'edit') {
this.editChipElement(queryChipElement);
} else if (event.target.dataset.chipAction === 'lock') {
this.lockClosingChipElement(queryChipElement);
}
});
});
@ -298,36 +292,24 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
}
selectChipElement(attr) {
if (attr.classList.contains('teal')) {
return;
document.querySelectorAll('.chip.teal').forEach(element => {
if (element !== attr) {
element.classList.remove('teal', 'lighten-2');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
});
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'toggle');
attr.classList.toggle('teal');
attr.classList.toggle('lighten-5');
M.Dropdown.getInstance(document.querySelector('.dropdown-trigger[data-toggle-area="token-incidence-modifiers"]')).open();
}
unselectChipElement(attr) {
let nModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-exactly-n-token-modal'));
let nmModalInstance = M.Modal.getInstance(document.querySelector('#corpus-analysis-concordance-between-nm-token-modal'));
if (nModalInstance.isOpen || nmModalInstance.isOpen) {
return;
}
attr.classList.remove('teal', 'lighten-5');
this.toggleClass(['token-incidence-modifiers'], 'disabled', 'add');
}
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty, nOrNM = false) {
tokenIncidenceModifierHandler(incidenceModifier, incidenceModifierPretty) {
// Adds a token incidence modifier to the query input field.
let selectedChip = this.elements.queryInputField.querySelector('.chip.teal');
let selectedChipIndex = Array.from(this.elements.queryChipElements).indexOf(selectedChip);
if (nOrNM) {
this.unselectChipElement(selectedChip);
}
this.submitQueryChipElement('token-incidence-modifier', incidenceModifierPretty, incidenceModifier, selectedChipIndex);
this.selectChipElement(selectedChip);
}
tokenNMSubmitHandler(modalId) {
@ -345,7 +327,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
let instance = M.Modal.getInstance(modal);
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input, true);
this.tokenIncidenceModifierHandler(input, pretty_input);
}
expertModeQueryBuilderSwitchHandler() {
@ -386,13 +368,15 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
this.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let closingTagElements = ['end-sentence', 'end-entity'];
let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) {
let isClosingTag = closingTagElements.includes(chipElement['type']);
let isEditable = editableElements.includes(chipElement['type']);
if (chipElement['query'] === '[]'){
isEditable = false;
}
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, false, isEditable);
this.submitQueryChipElement(chipElement['type'], chipElement['pretty'], chipElement['query'], null, isClosingTag, isEditable);
}
}

View File

@ -52,14 +52,14 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
switch (elem) {
case 'option-group':
this.cursorPositionInputfieldHandler(input, '(option1|option2)');
input.value += '(option1|option2)';
let firstIndex = input.value.indexOf('option1');
let lastIndex = firstIndex + 'option1'.length;
input.focus();
input.setSelectionRange(firstIndex, lastIndex);
break;
case 'wildcard-char':
this.cursorPositionInputfieldHandler(input, '.');
input.focus();
input.value += '.';
break;
case 'and':
this.conditionHandler('and');
@ -73,19 +73,9 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
this.optionToggleHandler();
}
cursorPositionInputfieldHandler(input, addedInput) {
let cursorPosition = input.selectionStart;
let textBeforeCursor = input.value.substring(0, cursorPosition);
let textAfterCursor = input.value.substring(cursorPosition);
let newInputValue = textBeforeCursor + addedInput + textAfterCursor;
input.value = newInputValue;
let newCursorPosition = cursorPosition + addedInput.length;
input.setSelectionRange(newCursorPosition, newCursorPosition);
}
characterIncidenceModifierHandler(elem) {
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, elem.dataset.token);
input.value += elem.dataset.token;
}
characterNMSubmitHandler(modalId) {
@ -93,12 +83,12 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? ',' + input_m.value : '';
let addedInput = `${input_n}${input_m}`;
let input = `${input_n}${input_m}`;
let instance = M.Modal.getInstance(modal);
instance.close();
let input = this.tokenInputCheck(this.elements.tokenBuilderContent);
this.cursorPositionInputfieldHandler(input, `{${addedInput}}`);
let tokenInput = this.tokenInputCheck(this.elements.tokenBuilderContent);
tokenInput.value += '{' + input + '}';
}
conditionHandler(conditionText) {