First parser text to query Chip

This commit is contained in:
Inga Kirschnick 2023-09-18 17:05:01 +02:00
parent 45369d4c84
commit 86947e2cf8
7 changed files with 149 additions and 13 deletions

View File

@ -1,9 +1,6 @@
#corpus-analysis-concordance-query-builder-input-field-container {
border-bottom: #9E9E9E 1px solid;
height: 60px;
}
#corpus-analysis-concordance-query-builder-input-field {
border-bottom: #9E9E9E 1px solid;
min-height: 38px;
margin-top: 23px;
}

View File

@ -23,7 +23,7 @@ class ConcordanceQueryBuilder {
if (modalId === 'corpus-analysis-concordance-exactly-n-token-modal' || modalId === 'corpus-analysis-concordance-between-nm-token-modal') {
button.addEventListener('click', () => this.generalFunctions.tokenNMSubmitHandler(modalId));
} else if (modalId === 'corpus-analysis-concordance-exactly-n-character-modal' || modalId === 'corpus-analysis-concordance-between-nm-character-modal') {
button.addEventListener('click', () => this.generalFunctions.characterNMSubmitHandler(modalId));
button.addEventListener('click', () => this.tokenAttributeBuilderFunctions.characterNMSubmitHandler(modalId));
}
});

View File

@ -9,6 +9,13 @@ class GeneralFunctionsQueryBuilder {
});
}
resetQueryInputField() {
this.elements.queryInputField.innerHTML = '';
this.addPlaceholder();
this.updateChipList();
this.queryPreviewBuilder();
}
updateChipList() {
this.elements.queryChipElements = this.elements.queryInputField.querySelectorAll('.query-component');
}
@ -210,7 +217,7 @@ class GeneralFunctionsQueryBuilder {
let input_n = modal.querySelector('.n-m-input[data-value-type="n"]').value;
let input_m = modal.querySelector('.n-m-input[data-value-type="m"]') || undefined;
input_m = input_m !== undefined ? input_m.value : '';
let input = `{${input_n},${input_m}}`;
let input = `{${input_n}${input_m !== '' ? ',' : ''}${input_m}}`;
let pretty_input = `between ${input_n} and ${input_m} (${input})`;
if (input_m === '') {
pretty_input = `exactly ${input_n} (${input})`;
@ -220,7 +227,137 @@ class GeneralFunctionsQueryBuilder {
instance.close();
this.tokenIncidenceModifierHandler(input, pretty_input);
}
switchToExpertModeParser() {
let expertModeInputField = document.querySelector('#corpus-analysis-concordance-form-query');
expertModeInputField.value = '';
let queryBuilderInputFieldValue = Utils.unescape(document.querySelector('#corpus-analysis-concordance-query-preview').innerHTML.trim());
if (queryBuilderInputFieldValue !== "") {
expertModeInputField.value = queryBuilderInputFieldValue;
}
}
switchToQueryBuilderParser() {
this.resetQueryInputField();
let expertModeInputFieldValue = document.querySelector('#corpus-analysis-concordance-form-query').value;
let chipElements = this.parseTextToChip(expertModeInputFieldValue);
for (let chipElement of chipElements) {
this.queryChipFactory(chipElement['type'], chipElement['pretty'], chipElement['query']);
}
}
parseTextToChip(query) {
const parsingElementDict = {
'<s>': {
pretty: 'Sentence Start',
type: 'start-sentence'
},
'<\/s>': {
pretty: 'Sentence End',
type: 'end-sentence'
},
'<ent>': {
pretty: 'Entity Start',
type: 'start-empty-entity'
},
'<ent_type="([A-Z]+)">': {
pretty: '',
type: 'start-entity'
},
'<\\\/ent(_type)?>': {
pretty: 'Entity End',
type: 'end-entity'
},
':: ?match\\.text_[A-Za-z]+="[^"]+"': {
pretty: '',
type: 'text-annotation'
},
'\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]': {
pretty: '',
type: 'token'
},
'\\[\\]': {
pretty: 'Empty Token',
type: 'token'
},
'(?<!\\[) ?\\+ ?(?![^\\]]\\])': {
pretty: ' one or more (+)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\* ?(?![^\\]]\\])': {
pretty: 'zero or more (*)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\? ?(?![^\\]]\\])': {
pretty: 'zero or one (?)',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
},
'(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])': {
pretty: '',
type: 'token-incidence-modifier'
}
}
let chipElements = [];
const regex = new RegExp(`<s>|<\/s>|<ent>|<ent_type="([A-Z]+)">|<\\\/ent(_type)?>|\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]|:: ?match\\.text_[A-Za-z]+="[^"]+"|(?<!\\[) ?(\\+|\\?|\\*|{[0-9]+(,[0-9]+)?}) ?(?![^\\]]\\])`, 'gi');
let match;
while ((match = regex.exec(query)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (match.index === regex.lastIndex) {
regex.lastIndex++;
}
let stringElement = match[0];
for (let [pattern, chipElement] of Object.entries(parsingElementDict)) {
const parsingRegex = new RegExp(pattern, 'gi');
if (parsingRegex.exec(stringElement)) {
let prettyText;
switch (pattern) {
case '<ent_type="([A-Z]+)">':
prettyText = `Entity Type=${stringElement.replace(/<ent_type="|">/g, '')}`;
break;
case ':: ?match\\.text_[A-Za-z]+="[^"]+"':
prettyText = stringElement.replace(/:: ?match\.text_|"|"/g, '');
break;
case '\\[(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?((\\&|\\|) ?(word|lemma|pos|simple_pos)=(("[^"]+")|(\\u0027[^\\u0027]+\\u0027)) ?(%c)? ?)*\\]':
let doubleQuotes = /(word|lemma|pos|simple_pos)="[^"]+"/gi;
let singleQuotes = /(word|lemma|pos|simple_pos)='[^']+'/gi;
if (doubleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|"/g, '');
} else if (singleQuotes.exec(stringElement)) {
prettyText = stringElement.replace(/^\[|\]$|'/g, '');
}
prettyText = prettyText.replace(/\&/g, ' and ').replace(/\|/g, ' or ');
break;
case '(?<!\\[) ?\\{[0-9]+} ?(?![^\\]]\\])':
prettyText = `exactly ${stringElement.replace(/{|}/g, '')} (${stringElement})`;
break;
case '(?<!\\[) ?\\{[0-9]+(,[0-9]+)?} ?(?![^\\]]\\])':
prettyText = `between${stringElement.replace(/{|}/g, ' ').replace(',', ' and ')}(${stringElement})`;
break;
default:
prettyText = chipElement.pretty;
break;
}
chipElements.push({
type: chipElement.type,
pretty: prettyText,
query: stringElement
});
break;
}
}
}
return chipElements;
}
}

View File

@ -141,7 +141,7 @@ class TokenAttributeBuilderFunctionsQueryBuilder extends GeneralFunctionsQueryBu
this.disableTokenSubmit();
} else {
tokenQueryPrettyText += `simple_pos=${this.elements.simplePosSelection.value}`;
tokenQueryCQLText += `simple_pos='${this.elements.simplePosSelection.value}'`;
tokenQueryCQLText += `simple_pos="${this.elements.simplePosSelection.value}"`;
this.elements.simplePosSelection.value = '';
}
break;

View File

@ -129,6 +129,7 @@
{% macro scripts() %}
<script>
const corpusAnalysisConcordance = new CorpusAnalysisConcordance(corpusAnalysisApp);
const concordanceQueryBuilder = new ConcordanceQueryBuilder();
let queryBuilderDisplay = document.getElementById("corpus-analysis-concordance-query-builder-display");
let expertModeDisplay = document.getElementById("corpus-analysis-concordance-expert-mode-display");
@ -138,11 +139,12 @@
if (this.checked) {
queryBuilderDisplay.classList.add("hide");
expertModeDisplay.classList.remove("hide");
concordanceQueryBuilder.generalFunctions.switchToExpertModeParser();
} else {
queryBuilderDisplay.classList.remove("hide");
expertModeDisplay.classList.add("hide");
concordanceQueryBuilder.generalFunctions.switchToQueryBuilderParser();
}
});
</script>
{{ query_builder.scripts(id_prefix) }}
{% endmacro %}

View File

@ -3,7 +3,7 @@
<form id="corpus-analysis-concordance-expert-mode-form">
<div class="input-field col s12 m9">
<i class="material-icons prefix">search</i>
<input class="validate corpus-analysis-action" id="corpus-analysis-concordance-form-query" name="query" type="text" required pattern=".*\S+.*" placeholder="Type in your query or use the Query Builder on the right"></input>
<input class="validate corpus-analysis-action" id="corpus-analysis-concordance-form-query" name="query" type="text" required pattern=".*\S+.*" placeholder="Type in your query via CQL"></input>
<span class="error-color-text helper-text hide" id="corpus-analysis-concordance-error"></span>
<a class="modal-trigger" data-manual-modal-chapter="manual-modal-cqp-query-language" href="#manual-modal" style="margin-left: 40px;"><i class="material-icons" style="font-size: inherit;">help</i> Corpus Query Language tutorial</a>
<span> | </span>

View File

@ -434,8 +434,8 @@
</div>
{% endmacro %}
{% macro scripts(id_prefix) %}
{# {% macro scripts(id_prefix) %}
<script>
const concordanceQueryBuilder = new ConcordanceQueryBuilder();
</script>
{% endmacro %}
{% endmacro %} #}