Compare commits

...

3 Commits

Author SHA1 Message Date
Patrick Jentsch
ab132746e7 Add TODO in migration scripts 2023-11-17 10:42:55 +01:00
Inga Kirschnick
ae5646512d Merge branch 'query-builder' of gitlab.ub.uni-bielefeld.de:sfb1288inf/nopaque into query-builder 2023-11-17 10:15:50 +01:00
Inga Kirschnick
fc66327920 Make double quotation marks escapable again 2023-11-17 10:15:39 +01:00
4 changed files with 11 additions and 26 deletions

View File

@ -99,18 +99,14 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
// Ensures that metadata is always at the end of the query and if an index is given, inserts the query chip at the given index and if there is a closing tag, inserts the query chip before the closing tag.
this.removePlaceholder();
let lastChild = this.elements.queryInputField.lastChild;
let isLastChildTextAnnotation = lastChild && lastChild.dataset.type === 'text-annotation';
if (!index) {
let closingTagElement = this.elements.queryInputField.querySelector('[data-closing-tag="true"]');
if (closingTagElement) {
index = Array.from(this.elements.queryInputField.children).indexOf(closingTagElement);
}
}
if (dataType !== 'text-annotation' && index) {
if (index) {
this.elements.queryInputField.insertBefore(queryChipElement, this.elements.queryChipElements[index]);
} else if (dataType !== 'text-annotation' && isLastChildTextAnnotation) {
this.elements.queryInputField.insertBefore(queryChipElement, lastChild);
} else {
this.elements.queryInputField.appendChild(queryChipElement);
}
@ -149,9 +145,6 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case 'start-entity':
this.extensions.structuralAttributeBuilderFunctions.editStartEntityChipElement(queryChipElement);
break;
case 'text-annotation':
this.extensions.structuralAttributeBuilderFunctions.editTextAnnotationChipElement(queryChipElement);
break;
case 'token':
let queryElementsContent = this.extensions.tokenAttributeBuilderFunctions.prepareTokenQueryElementsContent(queryChipElement);
this.extensions.tokenAttributeBuilderFunctions.editTokenChipElement(queryElementsContent);
@ -329,7 +322,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
if (dropdownId === 'corpus-analysis-concordance-token-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.tokenIncidenceModifierHandler(button.dataset.token, button.innerHTML));
} else if (dropdownId === 'corpus-analysis-concordance-character-incidence-modifiers-dropdown') {
button.addEventListener('click', () => this.characterIncidenceModifierHandler(button));
button.addEventListener('click', () => this.extensions.tokenAttributeBuilderFunctions.characterIncidenceModifierHandler(button));
}
});
}
@ -360,7 +353,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
let closingTagElements = ['end-sentence', 'end-entity'];
let editableElements = ['start-entity', 'text-annotation', 'token'];
let editableElements = ['start-entity', 'token'];
for (let chipElement of chipElements) {
let isClosingTag = closingTagElements.includes(chipElement['type']);
let isEditable = editableElements.includes(chipElement['type']);
@ -393,11 +386,7 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
pretty: 'Entity End',
type: 'end-entity'
},
':: ?match\\.text_[A-Za-z]+="[^"]+"': {
pretty: '',
type: 'text-annotation'
},
'\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': {
'\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
@ -450,14 +439,8 @@ nopaque.corpus_analysis.query_builder.QueryBuilder = class QueryBuilder {
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]':
let doubleQuotes = /(word|lemma|pos|simple_pos)="[^"]+"/gi;
let singleQuotes = /(word|lemma|pos|simple_pos)='[^']+'/gi;
if (doubleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|"/g, '');
} else if (singleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|'/g, '');
}
case '\\[(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?)*\\]':
prettyText = stringElement.replace(/^\[|\]$|(?<!\\)"/g, '');
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':

View File

@ -285,8 +285,8 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
}
prepareTokenQueryElementsContent(queryChipElement) {
//this regex searches for word or lemma or pos or simple_pos="any string within single or double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=(("[^"]+")|(\\\\u0027[^\\\\u0027]+\\\\u0027)) ?(%c)? ?(\\&|\\|)?', 'gm');
//this regex searches for word or lemma or pos or simple_pos="any string (also quotation marks escaped by backslash) within double quotes" followed by one or no ignore case markers, followed by one or no condition characters.
let regex = new RegExp('(word|lemma|pos|simple_pos)=("(?:[^"\\\\]|\\\\")*") ?(%c)? ?(\\&|\\|)?', 'gm');
let m;
let queryElementsContent = [];
while ((m = regex.exec(queryChipElement.dataset.query)) !== null) {
@ -299,7 +299,7 @@ nopaque.corpus_analysis.query_builder.TokenAttributeBuilderFunctions = class Tok
if (tokenAttr === 'pos') {
tokenAttr = 'english-pos';
}
let tokenValue = m[2].replace(/"|'/g, '');
let tokenValue = m[2].replace(/(?<!\\)"/g, '');
let ignoreCase = false;
let condition = undefined;
m.forEach((match) => {

View File

@ -20,6 +20,7 @@ depends_on = None
def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all():
spacy_nlp_pipeline_models_dir = os.path.join(user.path, 'spacy_nlp_pipeline_models')
if os.path.exists(spacy_nlp_pipeline_models_dir):

View File

@ -17,6 +17,7 @@ depends_on = None
def upgrade():
# TODO: Add error handling for sqlalchemy.exc.ProgrammingError
for user in User.query.all():
old_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_models')
new_tesseract_ocr_pipeline_model_path = os.path.join(user.path, 'tesseract_ocr_pipeline_models')