Compare commits

...

3 Commits

Author SHA1 Message Date
Patrick Jentsch
ab132746e7 Add TODO in migration scripts 2023-11-17 10:42:55 +01:00
Inga Kirschnick
ae5646512d Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-17 10:15:50 +01:00
Inga Kirschnick
fc66327920 Make double quotation marks escapable again 2023-11-17 10:15:39 +01:00
4 changed files with 11 additions and 26 deletions

View File

@ -99,18 +99,14 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
// Ensures that metadata is always at the end of the query and if an index is given, inserts the query chip at the given index and if there is a closing tag, inserts the query chip before the closing tag. // Ensures that metadata is always at the end of the query and if an index is given, inserts the query chip at the given index and if there is a closing tag, inserts the query chip before the closing tag.
this.removePlaceholder(); this.removePlaceholder();
let lastChild = this.elements.queryInputField.lastChild;
let isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
if (!index) { if (!index) {
let closingTagElement = this.elements.queryInputField.querySelector('[data-closing-tag="true"]'); let closingTagElement = this.elements.queryInputField.querySelector('[data-closing-tag="true"]');
if (closingTagElement) { if (closingTagElement) {
index = Array.from(this.elements.queryInputField.children).indexOf(closingTagElement); index = Array.from(this.elements.queryInputField.children).indexOf(closingTagElement);
} }
} }
if (dataType !== 'text-annotation' && index) { if (index) {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryChipElements[index]); this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryChipElements[index]);
} else if (dataType !== 'text-annotation' && isLastChildTextAnnotation) {
this.elements.queryInputField.insertBefore(queryChipElement, lastChild);
} else { } else {
this.elements.queryInputField.appendChild(queryChipElement); this.elements.queryInputField.appendChild(queryChipElement);
} }
@ -149,9 +145,6 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case 'start-entity': case 'start-entity':
this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement); this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement);
break; break;
case 'text-annotation':
this.extensions.structuralAttributeBuilderFunctions.editTextAnnotationChipElement(queryChipElement);
break;
case 'token': case 'token':
let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement); let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement);
this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent); this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent);
@ -329,7 +322,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') { if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML)); button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') { } else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.characterIncidenceModifierHandler(button)); button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
} }
}); });
} }
@ -360,7 +353,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value; let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue); let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let closingTagElements = ['end-sentence', 'end-entity']; let closingTagElements = ['end-sentence', 'end-entity'];
let editableElements = ['start-entity', 'text-annotation', 'token']; let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) { for (let chipElement of chipElements) {
let isClosingTag = closingTagElements.includes(chipElement['type']); let isClosingTag = closingTagElements.includes(chipElement['type']);
let isEditable = editableElements.includes(chipElement['type']); let isEditable = editableElements.includes(chipElement['type']);
@ -393,11 +386,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
pretty: 'Entity End', pretty: 'Entity End',
type: 'end-entity' type: 'end-entity'
}, },
':: ?match\\.text_[A-Za-z]+="[^"]+"': { '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]': {
pretty: '',
type: 'text-annotation'
},
'\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': {
pretty: '', pretty: '',
type: 'token' type: 'token'
}, },
@ -450,14 +439,8 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case ':: ?match\\.text_[A-Za-z]+="[^"]+"': case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, ''); prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break; break;
case '\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': case '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]':
let doubleQuotes = /(word|lemma|pos|simple_pos)="[^"]+"/gi; prettyText = stringElement.replace(/^\[|\]$|(?<!\\)"/g, '');
let singleQuotes = /(word|lemma|pos|simple_pos)='[^']+'/gi;
if (doubleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|"/g, '');
} else if (singleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|'/g, '');
}
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or '); prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break; break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])': case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':

View File

@ -285,8 +285,8 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
} }
prepareTokenQueryElementsContent(queryChipElement) { prepareTokenQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string within single or double quotes" followed by one or no ignore case markers, followed by one or no condition characters. //this regex searches for word or lemma or pos or simple_pos="any string (also quotation marks escaped by backslash) within double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=(("[^"]+")|(\\\\u0027[^\\\\u0027]+\\\\u0027)) ?(%c)? ?(\\&|\\|)?', 'gm'); let regex = new RegExp('(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?(\\&|\\|)?', 'gm');
let m; let m;
let queryElementsContent = []; let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) { while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
@ -299,7 +299,7 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
if (tokenAttr === 'pos') { if (tokenAttr === 'pos') {
tokenAttr = 'english-pos'; tokenAttr = 'english-pos';
} }
let tokenValue = m[2].replace(/"|'/g, ''); let tokenValue = m[2].replace(/(?<!\\)"/g, '');
let ignoreCase = false; let ignoreCase = false;
let condition = undefined; let condition = undefined;
m.forEach((match) => { m.forEach((match) => {

View File

@ -20,6 +20,7 @@ depends_on = None
def upgrade(): def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all(): for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models') spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir): if os.path.exists(spacy_nlp_pipeline_models_dir):

View File

@ -17,6 +17,7 @@ depends_on = None
def upgrade(): def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all(): for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models') old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models') new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')