diff --git a/web/app/corpora/events.py b/web/app/corpora/events.py index a0ae9322..9eb9c20f 100644 --- a/web/app/corpora/events.py +++ b/web/app/corpora/events.py @@ -8,7 +8,8 @@ from ..events import connected_sessions from ..models import Corpus, User import cqi import math -import logging +import os +import shutil ''' @@ -23,6 +24,29 @@ corpus_analysis_sessions = {} corpus_analysis_clients = {} +@socketio.on('corpus_create_zip') +@socketio_login_required +def corpus_create_zip(corpus_id): + corpus = Corpus.query.get_or_404(corpus_id) + # delete old corpus archive if it exists/has been build before + if corpus.archive_file is not None: + if (os.path.isfile(corpus.archive_file)): + os.remove(corpus.archive_file) + root_dir = os.path.join(current_app.config['DATA_DIR'], + str(current_user.id), + 'corpora') + base_dir = os.path.join(root_dir, str(corpus.id)) + zip_name = corpus.title + zip_path = os.path.join(root_dir, zip_name) + corpus.archive_file = os.path.join(base_dir, zip_name) + '.zip' + db.session.commit() + shutil.make_archive(zip_path, + 'zip', + base_dir) + shutil.move(zip_path + '.zip', corpus.archive_file) + socketio.emit('corpus_zip_created', room=request.sid) + + @socketio.on('corpus_analysis_init') @socketio_login_required def init_corpus_analysis(corpus_id): @@ -125,10 +149,6 @@ def corpus_analysis_query(query): chunk_start = 0 context = 50 progress = 0 - # for attr in corpus.structural_attributes.list(): - # if attr.attrs['name'] == 'text': - # text_attr = attr - # logging.warning(results.fdist_1(15, results.attrs['fields']['match'], text_attr)) client.status = 'running' while chunk_start <= results.attrs['size']: if client.status == 'abort': diff --git a/web/app/corpora/forms.py b/web/app/corpora/forms.py index f25c6b64..252668d7 100644 --- a/web/app/corpora/forms.py +++ b/web/app/corpora/forms.py @@ -69,6 +69,26 @@ class AddCorpusForm(FlaskForm): title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) +class ImportCorpusForm(FlaskForm): + ''' + Form to import a corpus. + ''' + description = StringField('Description', + validators=[DataRequired(), Length(1, 255)]) + file = FileField('File', validators=[DataRequired()]) + submit = SubmitField() + title = StringField('Title', validators=[DataRequired(), Length(1, 32)]) + + def __init__(self, *args, **kwargs): + super(ImportCorpusForm, self).__init__(*args, **kwargs) + + def validate_file(self, field): + if not field.data.filename.lower().endswith('.zip'): + raise ValidationError('File does not have an approved extension: ' + '.zip') + field.data.filename = secure_filename(field.data.filename) + + class QueryForm(FlaskForm): ''' Form to submit a query to the server which is executed via cqi-py. diff --git a/web/app/corpora/import_corpus.py b/web/app/corpora/import_corpus.py new file mode 100644 index 00000000..a78f6f26 --- /dev/null +++ b/web/app/corpora/import_corpus.py @@ -0,0 +1,89 @@ +check_zip_contents = ['data/', + 'merged/', + 'registry/', + 'registry/corpus', + 'data/corpus/', + 'data/corpus/text_editor.avs', + 'data/corpus/pos.lexicon', + 'data/corpus/simple_pos.huf', + 'data/corpus/word.huf', + 'data/corpus/text_booktitle.avs', + 'data/corpus/word.lexicon.srt', + 'data/corpus/word.lexicon.idx', + 'data/corpus/simple_pos.crx', + 'data/corpus/text_pages.rng', + 'data/corpus/simple_pos.crc', + 'data/corpus/ner.lexicon', + 'data/corpus/lemma.huf', + 'data/corpus/text_title.rng', + 'data/corpus/text_chapter.avx', + 'data/corpus/lemma.lexicon.srt', + 'data/corpus/lemma.lexicon.idx', + 'data/corpus/text_school.rng', + 'data/corpus/text_journal.avs', + 'data/corpus/simple_pos.lexicon', + 'data/corpus/pos.huf', + 'data/corpus/text_editor.avx', + 'data/corpus/lemma.crc', + 'data/corpus/lemma.lexicon', + 'data/corpus/pos.hcd', + 'data/corpus/text_title.avx', + 'data/corpus/text_institution.avs', + 'data/corpus/text_address.avx', + 'data/corpus/lemma.corpus.cnt', + 'data/corpus/word.crx', + 'data/corpus/simple_pos.hcd', + 'data/corpus/simple_pos.huf.syn', + 'data/corpus/simple_pos.lexicon.srt', + 'data/corpus/text_author.avx', + 'data/corpus/text_publisher.avs', + 'data/corpus/text_chapter.avs', + 'data/corpus/ner.corpus.cnt', + 'data/corpus/pos.huf.syn', + 'data/corpus/text_booktitle.rng', + 'data/corpus/lemma.huf.syn', + 'data/corpus/pos.corpus.cnt', + 'data/corpus/word.lexicon', + 'data/corpus/text_publishing_year.avs', + 'data/corpus/lemma.hcd', + 'data/corpus/text_school.avs', + 'data/corpus/text_journal.rng', + 'data/corpus/word.corpus.cnt', + 'data/corpus/text_school.avx', + 'data/corpus/text_journal.avx', + 'data/corpus/pos.lexicon.srt', + 'data/corpus/text_title.avs', + 'data/corpus/word.hcd', + 'data/corpus/text_chapter.rng', + 'data/corpus/text_address.rng', + 'data/corpus/ner.hcd', + 'data/corpus/text_publisher.avx', + 'data/corpus/text_institution.rng', + 'data/corpus/lemma.crx', + 'data/corpus/pos.crc', + 'data/corpus/text_author.rng', + 'data/corpus/text_address.avs', + 'data/corpus/pos.lexicon.idx', + 'data/corpus/ner.huf', + 'data/corpus/ner.huf.syn', + 'data/corpus/text_pages.avs', + 'data/corpus/text_publishing_year.avx', + 'data/corpus/ner.lexicon.idx', + 'data/corpus/text.rng', + 'data/corpus/word.crc', + 'data/corpus/ner.crc', + 'data/corpus/text_publisher.rng', + 'data/corpus/text_editor.rng', + 'data/corpus/text_author.avs', + 'data/corpus/s.rng', + 'data/corpus/text_publishing_year.rng', + 'data/corpus/simple_pos.corpus.cnt', + 'data/corpus/simple_pos.lexicon.idx', + 'data/corpus/word.huf.syn', + 'data/corpus/ner.lexicon.srt', + 'data/corpus/text_pages.avx', + 'data/corpus/text_booktitle.avx', + 'data/corpus/pos.crx', + 'data/corpus/ner.crx', + 'data/corpus/text_institution.avx', + 'merged/corpus.vrt'] diff --git a/web/app/corpora/views.py b/web/app/corpora/views.py index 1e98b679..7c3a49d9 100644 --- a/web/app/corpora/views.py +++ b/web/app/corpora/views.py @@ -5,12 +5,18 @@ from . import corpora from . import tasks from .forms import (AddCorpusFileForm, AddCorpusForm, AddQueryResultForm, EditCorpusFileForm, QueryDownloadForm, QueryForm, - DisplayOptionsForm, InspectDisplayOptionsForm) + DisplayOptionsForm, InspectDisplayOptionsForm, + ImportCorpusForm) +from jsonschema import validate from .. import db from ..models import Corpus, CorpusFile, QueryResult import json -from jsonschema import validate import os +import shutil +import glob +import xml.etree.ElementTree as ET +from zipfile import ZipFile +from .import_corpus import check_zip_contents @corpora.route('/add', methods=['GET', 'POST']) @@ -40,6 +46,85 @@ def add_corpus(): title='Add corpus') +@corpora.route('/import', methods=['GET', 'POST']) +@login_required +def import_corpus(): + import_corpus_form = ImportCorpusForm() + if import_corpus_form.is_submitted(): + if not import_corpus_form.validate(): + return make_response(import_corpus_form.errors, 400) + corpus = Corpus(creator=current_user, + description=import_corpus_form.description.data, + status='unprepared', + title=import_corpus_form.title.data) + db.session.add(corpus) + db.session.commit() + dir = os.path.join(current_app.config['DATA_DIR'], + str(corpus.user_id), 'corpora', str(corpus.id)) + try: + os.makedirs(dir) + except OSError: + flash('[ERROR]: Could not import corpus!', 'corpus') + corpus.delete() + else: + # Upload zip + archive_file = os.path.join(current_app.config['DATA_DIR'], dir, + import_corpus_form.file.data.filename) + corpus_dir = os.path.dirname(archive_file) + import_corpus_form.file.data.save(archive_file) + # Some checks to verify it is a valid exported corpus + with ZipFile(archive_file, 'r') as zip: + contents = zip.namelist() + if set(check_zip_contents).issubset(contents): + # Unzip + shutil.unpack_archive(archive_file, corpus_dir) + # Register vrt files to corpus + vrts = glob.glob(corpus_dir + '/*.vrt') + for file in vrts: + element_tree = ET.parse(file) + text_node = element_tree.find('text') + corpus_file = CorpusFile( + address=text_node.get('address', 'NULL'), + author=text_node.get('author', 'NULL'), + booktitle=text_node.get('booktitle', 'NULL'), + chapter=text_node.get('chapter', 'NULL'), + corpus=corpus, + dir=dir, + editor=text_node.get('editor', 'NULL'), + filename=os.path.basename(file), + institution=text_node.get('institution', 'NULL'), + journal=text_node.get('journal', 'NULL'), + pages=text_node.get('pages', 'NULL'), + publisher=text_node.get('publisher', 'NULL'), + publishing_year=text_node.get('publishing_year', ''), + school=text_node.get('school', 'NULL'), + title=text_node.get('title', 'NULL')) + db.session.add(corpus_file) + # finish import and got to imported corpus + url = url_for('corpora.corpus', corpus_id=corpus.id) + corpus.status = 'prepared' + db.session.commit() + os.remove(archive_file) + flash('[{}] imported'.format(url, + corpus.title), + 'corpus') + return make_response( + {'redirect_url': url_for('corpora.corpus', + corpus_id=corpus.id)}, + 201) + else: + # If imported zip is not valid delete corpus and give feedback + corpus.delete() + db.session.commit() + flash('Imported corpus is not valid.', 'error') + return make_response( + {'redirect_url': url_for('corpora.import_corpus')}, + 201) + return render_template('corpora/import_corpus.html.j2', + import_corpus_form=import_corpus_form, + title='Import Corpus') + + @corpora.route('/') @login_required def corpus(corpus_id): @@ -60,6 +145,20 @@ def corpus(corpus_id): title='Corpus') +@corpora.route('//export') +@login_required +def export_corpus(corpus_id): + corpus = Corpus.query.get_or_404(corpus_id) + if not (corpus.creator == current_user or current_user.is_administrator()): + abort(403) + dir = os.path.dirname(corpus.archive_file) + filename = os.path.basename(corpus.archive_file) + return send_from_directory(directory=dir, + filename=filename, + mimetype='zip', + as_attachment=True) + + @corpora.route('//analyse') @login_required def analyse_corpus(corpus_id): diff --git a/web/app/decorators.py b/web/app/decorators.py index 4bd2f731..de0189ad 100644 --- a/web/app/decorators.py +++ b/web/app/decorators.py @@ -26,6 +26,7 @@ def background(f): @wraps(f) def wrapped(*args, **kwargs): kwargs['app'] = current_app._get_current_object() + kwargs['current_user'] = current_user._get_current_object() thread = socketio.start_background_task(f, *args, **kwargs) return thread return wrapped diff --git a/web/app/models.py b/web/app/models.py index e28a5b06..00c83245 100644 --- a/web/app/models.py +++ b/web/app/models.py @@ -555,6 +555,7 @@ class Corpus(db.Model): max_nr_of_tokens = db.Column(db.BigInteger, default=2147483647) status = db.Column(db.String(16)) title = db.Column(db.String(32)) + archive_file = db.Column(db.String(255)) # Relationships files = db.relationship('CorpusFile', backref='corpus', lazy='dynamic', cascade='save-update, merge, delete') diff --git a/web/app/static/css/nopaque.css b/web/app/static/css/nopaque.css index d61138bc..45f9aec9 100644 --- a/web/app/static/css/nopaque.css +++ b/web/app/static/css/nopaque.css @@ -2,7 +2,7 @@ * ### Start sticky footer ### * Force the footer to always stay on the bottom of the page regardless of how * little content is on the page. -*/ + */ body { display: flex; min-height: 100vh; @@ -34,6 +34,16 @@ main { height: 19.5px !important; } +/* + * changes preoloader size etc. to fit visually better with the chip status + * indicator of jobs + */ +.status-spinner { + margin-bottom: -10px; + width: 30px !important; + height: 30px !important; +} + /* flat-interaction addition to show background color */ .flat-interaction { diff --git a/web/app/static/js/modules/corpus_analysis/view/ResultsView.js b/web/app/static/js/modules/corpus_analysis/view/ResultsView.js index adab3ee7..9394208e 100644 --- a/web/app/static/js/modules/corpus_analysis/view/ResultsView.js +++ b/web/app/static/js/modules/corpus_analysis/view/ResultsView.js @@ -64,7 +64,6 @@ class ResultsList extends List { * hase been issued by the user. */ resetFields() { - this.addToSubResultsStatus = {}; this.subResultsIndexes = {}; } @@ -216,17 +215,21 @@ class ResultsList extends List { btn.textContent = "add"; } /** - * Either adds or removes a match to the sub-results. For this it checks - * onclick if the current button has been checked or not. For this the - * function checks if its status in addToSubResultsStatus is either flase or - * true. Adds match to sub-results if status is false if status is true it - * removes it. + * This function is invoked when the users adds or removes a match using the + * add-btn (+ button/or green checkmark) to/from sub-results. When the button + * is clicked the function checks if the current dataIndex ID is already + * saved in subResultsIndexes or not. If it is not the dataIndex will be used + * as a key in subResultsIndexes with the value true. If it is already added + * the entry with the key dataIndex will be deleted from subResultsIndexes. + * Visual feedback (green checkmark if a match has been added etc.) is also + * handled on the basis of the information stored in subResultsIndexes. */ addToSubResults(dataIndex, client, tableCall=true) { let toShowArray; dataIndex = parseInt(dataIndex); if (!this.subResultsIndexes[dataIndex] - || this.subResultsIndexes === undefined) { + || this.subResultsIndexes[dataIndex] === undefined) { + // add button is activated because status is false or undefined this.helperActivateAddBtn(event.target); this.subResultsIndexes[dataIndex] = true; toShowArray = Object.keys(this.subResultsIndexes).map(index => parseInt(index)); @@ -273,7 +276,7 @@ class ResultsList extends List { this.getHTMLElements(['#query-results-table']); let container = this.queryResultsTable.querySelector(`[data-index="${dataIndex}"]`); let tableAddBtn = container.querySelector('.add-btn'); // gets the add button from the list view - if (this.addToSubResultsStatus[dataIndex]) { + if (this.subResultsIndexes[dataIndex]) { this.helperActivateAddBtn(tableAddBtn); } else { this.helperDeactivateAddBtn(tableAddBtn); diff --git a/web/app/static/js/modules/corpus_analysis/view/listeners.js b/web/app/static/js/modules/corpus_analysis/view/listeners.js index 812c125b..a7c6c675 100644 --- a/web/app/static/js/modules/corpus_analysis/view/listeners.js +++ b/web/app/static/js/modules/corpus_analysis/view/listeners.js @@ -281,9 +281,9 @@ function exportFullContextSwitch(resultsList) { function createFullResults(resultsList, results) { resultsList.fullResultsCreate.onclick = (event) => { resultsList.fullResultsCreate.querySelector('i').classList.toggle('hide'); - resultsList.fullResultsCreate.innerText = 'Creating...'; + resultsList.fullResultsCreate.textContent = 'Creating...'; resultsList.fullResultsCreate.insertAdjacentHTML('afterbegin', - loadingSpinnerHTML); + loadingSpinnerHTML); // .keys() is for a zero based array. I think... let dataIndexes = [...Array(results.data.match_count).keys()]; // Empty fullResultsData so that no previous data is used. @@ -302,7 +302,7 @@ function createSubResults(resultsList, results) { dataIndexes.push(id); }); resultsList.subResultsCreate.querySelector('i').classList.toggle('hide'); - resultsList.subResultsCreate.innerText = 'Creating...'; + resultsList.subResultsCreate.textContent = 'Creating...'; resultsList.subResultsCreate.insertAdjacentHTML('afterbegin', loadingSpinnerHTML); // Empty subResultsData so that no previous data is used. diff --git a/web/app/templates/corpora/corpus.html.j2 b/web/app/templates/corpora/corpus.html.j2 index 8f3b7fc5..bf082304 100644 --- a/web/app/templates/corpora/corpus.html.j2 +++ b/web/app/templates/corpora/corpus.html.j2 @@ -4,7 +4,9 @@

{{ corpus.title }}

{{ corpus.description }}

-
+ +
@@ -17,7 +19,6 @@
-
@@ -109,98 +111,127 @@
{% endblock %} diff --git a/web/app/templates/corpora/import_corpus.html.j2 b/web/app/templates/corpora/import_corpus.html.j2 new file mode 100644 index 00000000..dd64265e --- /dev/null +++ b/web/app/templates/corpora/import_corpus.html.j2 @@ -0,0 +1,46 @@ +{% extends "nopaque.html.j2" %} + +{% block page_content %} +
+

Fill out the following form to import a corpus.

+ arrow_backBack to dashboard +
+ +
+
+
+
+ {{ import_corpus_form.hidden_tag() }} +
+
+ {{ M.render_field(import_corpus_form.title, data_length='32', material_icon='title') }} +
+
+ {{ M.render_field(import_corpus_form.description, data_length='255', material_icon='description') }} +
+
+
+
+ {{ M.render_field(import_corpus_form.file, accept='.zip', placeholder='Choose your exported .zip file') }} +
+
+
+
+ {{ M.render_field(import_corpus_form.submit, material_icon='send') }} +
+ +
+
+ + +{% endblock %} diff --git a/web/app/templates/corpora/interactions/infos.html.j2 b/web/app/templates/corpora/interactions/infos.html.j2 index 7a9eafe8..83959864 100644 --- a/web/app/templates/corpora/interactions/infos.html.j2 +++ b/web/app/templates/corpora/interactions/infos.html.j2 @@ -6,20 +6,18 @@ result.-->
Infos
-
- +
diff --git a/web/app/templates/jobs/job.html.j2 b/web/app/templates/jobs/job.html.j2 index 70cf4736..5bb77297 100644 --- a/web/app/templates/jobs/job.html.j2 +++ b/web/app/templates/jobs/job.html.j2 @@ -32,7 +32,8 @@
-
+
diff --git a/web/app/templates/main/dashboard.html.j2 b/web/app/templates/main/dashboard.html.j2 index 194bdecb..13bd579d 100644 --- a/web/app/templates/main/dashboard.html.j2 +++ b/web/app/templates/main/dashboard.html.j2 @@ -37,6 +37,7 @@
    diff --git a/web/app/templates/services/corpus_analysis.html.j2 b/web/app/templates/services/corpus_analysis.html.j2 index e50cab77..f7eee8d5 100644 --- a/web/app/templates/services/corpus_analysis.html.j2 +++ b/web/app/templates/services/corpus_analysis.html.j2 @@ -45,7 +45,8 @@
      diff --git a/web/migrations/versions/befe5326787e_.py b/web/migrations/versions/befe5326787e_.py new file mode 100644 index 00000000..11839d5c --- /dev/null +++ b/web/migrations/versions/befe5326787e_.py @@ -0,0 +1,30 @@ +"""empty message + +Revision ID: befe5326787e +Revises: ecaf75fece7b +Create Date: 2020-10-16 13:32:09.620960 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'befe5326787e' +down_revision = 'ecaf75fece7b' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('archive_file', sa.String(length=255), nullable=True)) + op.drop_column('corpora', 'archive_dir') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('archive_dir', sa.VARCHAR(length=255), autoincrement=False, nullable=True)) + op.drop_column('corpora', 'archive_file') + # ### end Alembic commands ### diff --git a/web/migrations/versions/ecaf75fece7b_.py b/web/migrations/versions/ecaf75fece7b_.py new file mode 100644 index 00000000..5e258a2c --- /dev/null +++ b/web/migrations/versions/ecaf75fece7b_.py @@ -0,0 +1,28 @@ +"""empty message + +Revision ID: ecaf75fece7b +Revises: c3827cddea6e +Create Date: 2020-10-16 13:31:30.681269 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'ecaf75fece7b' +down_revision = 'c3827cddea6e' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('corpora', sa.Column('archive_dir', sa.String(length=255), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('corpora', 'archive_dir') + # ### end Alembic commands ###