Add result json import validation

This commit is contained in:
Stephan Porada 2020-07-09 15:41:25 +02:00
parent f86f3f4fd5
commit d4eb9e7663
4 changed files with 360 additions and 16 deletions

View File

@ -11,6 +11,7 @@ from flask_login import current_user, login_required
import json
import os
from .. import logger
from jsonschema import validate
@results.route('/import_results', methods=['GET', 'POST'])
@ -32,6 +33,7 @@ def import_results():
if not (result.creator == current_user
or current_user.is_administrator()):
abort(403)
# create paths to save the uploaded json file
dir = os.path.join(str(result.user_id),
'results',
'corpus_analysis_results',
@ -40,23 +42,43 @@ def import_results():
abs_file_path = os.path.join(abs_dir,
import_results_form.file.data.filename)
os.makedirs(abs_dir)
# save the json file
import_results_form.file.data.save(abs_file_path)
# Saves all needed metadata entries in one json field
with open(abs_file_path, 'r') as f:
corpus_metadata = json.load(f)
del corpus_metadata['matches']
del corpus_metadata['cpos_lookup']
result_file = ResultFile(
result_id=result.id,
dir=dir,
filename=import_results_form.file.data.filename)
result.corpus_metadata = corpus_metadata
# Create ResultFile db entry
result_file = ResultFile(result_id=result.id,
dir=dir,
filename=import_results_form.file.data.filename) # noqa
db.session.add(result_file)
db.session.commit()
flash('Result file added!', 'result')
return make_response(
{'redirect_url': url_for('results.results_overview')},
201)
# reads uploaded json file
with open(abs_file_path, 'r') as f:
corpus_metadata = json.load(f)
try:
# open json schema to validate against it
with open('app/static/json_schema/nopaque_cqi_py_results_schema.json', # noqa
'r') as s:
schema = json.load(s)
# validate if imported json is actually a json result file
validate(instance=corpus_metadata, schema=schema)
# if validated continue
# delete matches and cpos_lookup from read json file
del corpus_metadata['matches']
del corpus_metadata['cpos_lookup']
# save metadate directly as json into one field
result.corpus_metadata = corpus_metadata
flash('Result file added!', 'result')
db.session.commit()
return make_response(
{'redirect_url': url_for('results.results_overview')},
201)
except Exception as e:
# this runs if validation fails
flash('Uploaded file was not a valid result JSON!', 'result')
# deletes before created Result and ResultFile db entries
tasks.delete_result(result.id)
return make_response(
{'redirect_url': url_for('results.import_results')},
201)
return render_template('results/import_results.html.j2',
import_results_form=import_results_form,
title='Add corpus file')
@ -70,7 +92,6 @@ def results_overview():
'''
# get all results of current user
results = User.query.get(current_user.id).results
logger.warning(results)
def __p_time(time_str):
# helper to convert the datetime into a nice readable string

View File

@ -0,0 +1,322 @@
{
"$schema": "http://json-schema.org/draft-06/schema#",
"$ref": "#/definitions/NopaqueCQIPYResults",
"definitions": {
"NopaqueCQIPYResults": {
"type": "object",
"additionalProperties": false,
"properties": {
"matches": {
"type": "array",
"items": {
"$ref": "#/definitions/Match"
}
},
"cpos_lookup": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/CposLookup"
}
},
"text_lookup": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/CorpusAllText"
}
},
"match_count": {
"type": "integer"
},
"corpus_type": {
"type": "string"
},
"query": {
"type": "string"
},
"corpus_name": {
"type": "string"
},
"corpus_description": {
"type": "string"
},
"corpus_creation_date": {
"type": "string",
"format": "date-time"
},
"corpus_last_edited_date": {
"type": "string",
"format": "date-time"
},
"corpus_properties": {
"type": "array",
"items": {}
},
"corpus_size_tokens": {
"type": "integer"
},
"corpus_all_texts": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/CorpusAllText"
}
},
"corpus_analysis_date": {
"type": "string",
"format": "date-time"
},
"corpus_cqi_py_protocol_version": {
"type": "string"
},
"corpus_cqi_py_package_version": {
"type": "string"
},
"corpus_cqpserver_version": {
"type": "string"
},
"cpos_ranges": {
"type": "boolean"
}
},
"required": [
"corpus_all_texts",
"corpus_analysis_date",
"corpus_cqi_py_package_version",
"corpus_cqi_py_protocol_version",
"corpus_cqpserver_version",
"corpus_creation_date",
"corpus_description",
"corpus_last_edited_date",
"corpus_name",
"corpus_properties",
"corpus_size_tokens",
"corpus_type",
"cpos_lookup",
"cpos_ranges",
"match_count",
"matches",
"query",
"text_lookup"
],
"title": "NopaqueCQIPYResults"
},
"CorpusAllText": {
"type": "object",
"additionalProperties": false,
"properties": {
"address": {
"type": "string"
},
"author": {
"type": "string"
},
"booktitle": {
"type": "string"
},
"chapter": {
"type": "string"
},
"editor": {
"type": "string"
},
"institution": {
"type": "string"
},
"journal": {
"type": "string"
},
"pages": {
"type": "string"
},
"publisher": {
"type": "string"
},
"publishing_year": {
"type": "string",
"format": "integer"
},
"school": {
"type": "string"
},
"title": {
"type": "string"
}
},
"required": [
"address",
"author",
"booktitle",
"chapter",
"editor",
"institution",
"journal",
"pages",
"publisher",
"publishing_year",
"school",
"title"
],
"title": "CorpusAllText"
},
"CposLookup": {
"type": "object",
"additionalProperties": false,
"properties": {
"word": {
"type": "string"
},
"lemma": {
"type": "string"
},
"simple_pos": {
"$ref": "#/definitions/SimplePos"
},
"pos": {
"type": "string"
},
"ner": {
"$ref": "#/definitions/Ner"
},
"text": {
"type": "integer"
},
"s": {
"type": "integer"
}
},
"required": [
"lemma",
"ner",
"pos",
"s",
"simple_pos",
"text",
"word"
],
"title": "CposLookup"
},
"Match": {
"type": "object",
"additionalProperties": false,
"properties": {
"lc": {
"type": "array",
"items": {
"type": "integer"
}
},
"c": {
"type": "array",
"items": {
"type": "integer"
}
},
"rc": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"required": [
"c",
"lc",
"rc"
],
"title": "Match"
},
"Ner": {
"type": "string",
"enum": [
"NULL",
"LOC",
"MISC",
"ORG",
"PER",
"EVENT",
"GPE",
"LOC",
"ORG",
"PERSON",
"PRODUCT",
"CARDINAL",
"DATE",
"EVENT",
"FAC",
"GPE",
"LANGUAGE",
"LAW",
"LOC",
"MONEY",
"NORP",
"ORDINAL",
"ORG",
"PERCENT",
"PERSON",
"PRODUCT",
"QUANTITY",
"TIME",
"WORK_OF_ART",
"LOC",
"MISC",
"ORG",
"PER",
"LOC",
"MISC",
"ORG",
"PER",
"LOC",
"MISC",
"ORG",
"PER",
"CARDINAL",
"DATE",
"EVENT",
"FAC",
"GPE",
"LANGUAGE",
"LAW",
"LOC",
"MONEY",
"NORP",
"ORDINAL",
"ORG",
"PERCENT",
"PERSON",
"PRODUCT",
"QUANTITY",
"TIME",
"WORK_OF_ART",
"LOC",
"MISC",
"ORG",
"PER"
],
"title": "Ner"
},
"SimplePos": {
"type": "string",
"enum": [
"ADJ",
"ADP",
"ADV",
"AUX",
"CONJ",
"CCONJ",
"DET",
"INTJ",
"NOUN",
"NUM",
"PART",
"PRON",
"PROPN",
"PUNCT",
"SCONJ",
"SYM",
"VERB",
"X",
"SPACE"
],
"title": "SimplePos"
}
}
}

View File

@ -33,7 +33,7 @@
<tr class="show-if-only-child">
<td colspan="5">
<span class="card-title"><i class="material-icons left">folder</i>Nothing here...</span>
<p>No results yet improted.</p>
<p>No results yet imported.</p>
</td>
</tr>
</tbody>

View File

@ -12,3 +12,4 @@ Flask-WTF
jsonpatch
psycopg2
redis
jsonschema