Add a manual page

This commit is contained in:
Patrick Jentsch 2022-05-17 15:31:05 +02:00
parent 6b8d4d87bb
commit 8136cedccb
19 changed files with 580 additions and 2 deletions

View File

@ -33,6 +33,11 @@ def dashboard():
return render_template('main/dashboard.html.j2', title='Dashboard') return render_template('main/dashboard.html.j2', title='Dashboard')
@bp.route('/user_manual')
def user_manual():
return render_template('main/user_manual.html.j2', title='User manual')
@bp.route('/news') @bp.route('/news')
def news(): def news():
return render_template('main/news.html.j2', title='News') return render_template('main/news.html.j2', title='News')

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1018 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 309 KiB

View File

@ -27,6 +27,7 @@
</div> </div>
<ul class="dropdown-content" id="nav-more-dropdown"> <ul class="dropdown-content" id="nav-more-dropdown">
<li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons left">help</i>Manual</a></li>
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
<li><a href="{{ url_for('settings.index') }}"><i class="material-icons left">settings</i>Settings</a></li> <li><a href="{{ url_for('settings.index') }}"><i class="material-icons left">settings</i>Settings</a></li>
<li class="divider" tabindex="-1"></li> <li class="divider" tabindex="-1"></li>

View File

@ -8,7 +8,7 @@
</li> </li>
<li><a href="{{ url_for('main.index') }}">nopaque</a></li> <li><a href="{{ url_for('main.index') }}">nopaque</a></li>
<li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li> <li><a href="{{ url_for('main.news') }}"><i class="material-icons left">email</i>News</a></li>
<li><a href="#"><i class="material-icons">linear_scale</i>Workflow</a></li> <li><a href="{{ url_for('main.user_manual') }}"><i class="material-icons">help</i>Manual</a></li>
<li><a href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a></li> <li><a href="{{ url_for('main.dashboard') }}"><i class="material-icons">dashboard</i>Dashboard</a></li>
<li><a href="{{ url_for('main.dashboard', _anchor='corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>My Corpora</a></li> <li><a href="{{ url_for('main.dashboard', _anchor='corpora') }}" style="padding-left: 47px;"><i class="nopaque-icons">I</i>My Corpora</a></li>
<li><a href="{{ url_for('main.dashboard', _anchor='jobs') }}" style="padding-left: 47px;"><i class="nopaque-icons">J</i>My Jobs</a></li> <li><a href="{{ url_for('main.dashboard', _anchor='jobs') }}" style="padding-left: 47px;"><i class="nopaque-icons">J</i>My Jobs</a></li>

View File

@ -6,9 +6,12 @@
<div class="row"> <div class="row">
<div class="input-field col s12 m9"> <div class="input-field col s12 m9">
<i class="material-icons prefix">search</i> <i class="material-icons prefix">search</i>
<input class="validate corpus-analysis-action" id="concordance-extension-form-query" name="query" type="text" required pattern=".*\S+.*"></input> <input class="validate corpus-analysis-action" id="concordance-extension-form-query" name="query" type="text" required pattern=".*\S+.*" placeholder="&lt;ent_type=&quot;PERSON&quot;&gt; []* &lt;/ent_type&gt; []* [simple_pos=&quot;VERB&quot;] :: match.text_publishing_year=&quot;1991&quot;;"></input>
<label for="concordance-extension-form-query">Query</label> <label for="concordance-extension-form-query">Query</label>
<span class="error-color-text helper-text hide" id="concordance-extension-error"></span> <span class="error-color-text helper-text hide" id="concordance-extension-error"></span>
<a class="modal-trigger" href="#cql-tutorial-modal" style="margin-left: 40px;"><i class="material-icons" style="font-size: inherit;">help</i> Corpus Query Language tutorial</a>
<span> | </span>
<a class="modal-trigger" href="#tagsets-modal"><i class="material-icons" style="font-size: inherit;">info</i> Tagsets</a>
</div> </div>
<div class="input-field col s12 m3"> <div class="input-field col s12 m3">
<i class="material-icons prefix">arrow_forward</i> <i class="material-icons prefix">arrow_forward</i>

View File

@ -53,6 +53,198 @@
<p class="error-color-text hide" id="corpus-analysis-app-init-error"></p> <p class="error-color-text hide" id="corpus-analysis-app-init-error"></p>
</div> </div>
</div> </div>
<div class="modal" id="cql-tutorial-modal">
<div class="modal-content">
{% with headline_num=4 %}
{% include "main/manual/_08_cqp_query_language.html.j2" %}
{% endwith %}
</div>
</div>
<div class="modal" id="tagsets-modal">
<div class="modal-content">
<h4>Tagsets</h4>
<ul class="tabs">
<li class="tab"><a class="active" href="#simple_pos-tagset">simple_pos</a></li>
<li class="tab"><a href="#english-ent_type-tagset">English ent_type</a></li>
<li class="tab"><a href="#english-pos-tagset">English pos</a></li>
<li class="tab"><a href="#german-ent_type-tagset">German ent_type</a></li>
<li class="tab"><a href="#german-pos-tagset">German pos</a></li>
</ul>
<div id="simple_pos-tagset">
<h5>simple_pos tagset</h5>
<ul>
<li>ADJ: adjective</li>
<li>ADP: adposition</li>
<li>ADV: adverb</li>
<li>AUX: auxiliary verb</li>
<li>CONJ: coordinating conjunction</li>
<li>DET: determiner</li>
<li>INTJ: interjection</li>
<li>NOUN: noun</li>
<li>NUM: numeral</li>
<li>PART: particle</li>
<li>PRON: pronoun</li>
<li>PROPN: proper noun</li>
<li>PUNCT: punctuation</li>
<li>SCONJ: subordinating conjunction</li>
<li>SYM: symbol</li>
<li>VERB: verb</li>
<li>X: other</li>
</ul>
</div>
<div id="english-ent_type-tagset">
<h5>English ent_type tagset</h5>
<ul>
<li>CARDINAL: Numerals that do not fall under another type</li>
<li>DATE: Absolute or relative dates or periods</li>
<li>EVENT: Named hurricanes, battles, wars, sports events, etc.</li>
<li>FAC: Buildings, airports, highways, bridges, etc.</li>
<li>GPE: Countries, cities, states</li>
<li>LANGUAGE: Any named language</li>
<li>LAW: Named documents made into laws.</li>
<li>LOC: Non-GPE locations, mountain ranges, bodies of water</li>
<li>MONEY: Monetary values, including unit</li>
<li>NORP: Nationalities or religious or political groups</li>
<li>ORDINAL: "first" "second" etc.</li>
<li>ORG: Companies, agencies, institutions, etc.</li>
<li>PERCENT: Percentage, including "%"</li>
<li>PERSON: People, including fictional</li>
<li>PRODUCT: Objects, vehicles, foods, etc. (not services)</li>
<li>QUANTITY: Measurements, as of weight or distance</li>
<li>TIME: Times smaller than a day</li>
<li>WORK_OF_ART: Titles of books, songs, etc.</li>
</ul>
</div>
<div id="english-pos-tagset">
<h5>English pos tagset</h5>
<ul>
<li>ADD: email</li>
<li>AFX: affix</li>
<li>CC: conjunction, coordinating</li>
<li>CD: cardinal number</li>
<li>DT: determiner</li>
<li>EX: existential there</li>
<li>FW: foreign word</li>
<li>HYPH: punctuation mark, hyphen</li>
<li>IN: conjunction, subordinating or preposition</li>
<li>JJ: adjective</li>
<li>JJR: adjective, comparative</li>
<li>JJS: adjective, superlative</li>
<li>LS: list item marker</li>
<li>MD: verb, modal auxiliary</li>
<li>NFP: superfluous punctuation</li>
<li>NN: noun, singular or mass</li>
<li>NNP: noun, proper singular</li>
<li>NNPS: noun, proper plural</li>
<li>NNS: noun, plural</li>
<li>PDT: predeterminer</li>
<li>POS: possessive ending</li>
<li>PRP: pronoun, personal</li>
<li>PRP$: pronoun, possessive RB: adverb</li>
<li>RBR: adverb, comparative</li>
<li>RBS: adverb, superlative</li>
<li>RP: adverb, particle</li>
<li>SYM: symbol</li>
<li>TO: infinitival "to"</li>
<li>UH: interjection</li>
<li>VB: verb, base form</li>
<li>VBD: verb, past tense</li>
<li>VBG: verb, gerund or present participle</li>
<li>VBN: verb, past participle</li>
<li>VBP: verb, non-3rd person singular present</li>
<li>VBZ: verb, 3rd person singular present</li>
<li>WDT: wh-determiner</li>
<li>WP: wh-pronoun, personal</li>
<li>WP$: wh-pronoun, possessive</li>
<li>WRB: wh-adverb</li>
<li>XX: unknown</li>
<li>``: opening quotation mark</li>
<li>$: symbol, currency</li>
<li>'': closing quotation mark</li>
<li>: punctuation mark, comma</li>
<li>-LRB-: left round bracket</li>
<li>-RRB-: right round bracket</li>
<li>.: punctuation mark, sentence closer</li>
<li>:: punctuation mark, colon or ellipsis</li>
</ul>
</div>
<div id="german-ent_type-tagset">
<h5>German ent_type tagset</h5>
<ul>
<li>LOC: Non-GPE locations, mountain ranges, bodies of water</li>
<li>MISC: Miscellaneous entities, e.g. events, nationalities, products or works of art</li>
<li>ORG: Companies, agencies, institutions, etc.</li>
<li>PER: Named person or family.</li>
</ul>
</div>
<div id="german-pos-tagset">
<h5>German pos tagset</h5>
<ul>
<li>ADJA: adjective, attributive</li>
<li>ADJD: adjective, adverbial or predicative</li>
<li>ADV: adverb</li>
<li>APPO: postposition</li>
<li>APPR: preposition; circumposition left</li>
<li>APPRART: preposition with article</li>
<li>APZR: circumposition right</li>
<li>ART: definite or indefinite article</li>
<li>CARD: cardinal number</li>
<li>FM: foreign language material</li>
<li>ITJ: interjection</li>
<li>KOKOM: comparative conjunction</li>
<li>KON: coordinate conjunction</li>
<li>KOUI: subordinate conjunction with \zu\ and infinitive</li>
<li>KOUS: subordinate conjunction with sentence</li>
<li>NE: proper noun</li>
<li>NN: noun, singular or mass</li>
<li>NNE: proper noun</li>
<li>PDAT: attributive demonstrative pronoun</li>
<li>PDS: substituting demonstrative pronoun</li>
<li>PIAT: attributive indefinite pronoun without determiner</li>
<li>PIS: substituting indefinite pronoun</li>
<li>PPER: non-reflexive personal pronoun</li>
<li>PPOSAT: attributive possessive pronoun</li>
<li>PPOSS: substituting possessive pronoun</li>
<li>PRELAT: attributive relative pronoun</li>
<li>PRELS: substituting relative pronoun</li>
<li>PRF: reflexive personal pronoun</li>
<li>PROAV: pronominal adverb</li>
<li>PTKA: particle with adjective or adverb</li>
<li>PTKANT: answer particle</li>
<li>PTKNEG: negative particle</li>
<li>PTKVZ: separable verbal particle</li>
<li>PTKZU: "zu" before infinitive</li>
<li>PWAT: attributive interrogative pronoun</li>
<li>PWAV: adverbial interrogative or relative pronoun</li>
<li>PWS: substituting interrogative pronoun</li>
<li>TRUNC: word remnant</li>
<li>VAFIN: finite verb, auxiliary</li>
<li>VAIMP: imperative, auxiliary</li>
<li>VAINF: infinitive, auxiliary</li>
<li>VAPP: perfect participle, auxiliary</li>
<li>VMFIN: finite verb, modal</li>
<li>VMINF: infinitive, modal</li>
<li>VMPP: perfect participle, modal</li>
<li>VVFIN: finite verb, full</li>
<li>VVIMP: imperative, full</li>
<li>VVINF: infinitive, full</li>
<li>VVIZU: infinitive with "zu" full</li>
<li>VVPP: perfect participle, full</li>
<li>XY: non-word containing non-letter</li>
<li>$(: other sentence-internal punctuation mark</li>
<li>$,: comma</li>
<li>$.: sentence-final punctuation mark</li>
</ul>
</div>
</div>
</div>
{% endblock modals %} {% endblock modals %}
{% block scripts %} {% block scripts %}

View File

@ -0,0 +1,9 @@
<h2>Introduction</h2>
<p>
nopaque is a web-based digital working environment. It implements a
workflow based on the research process in the humanities and supports its
users in processing their data in order to subsequently apply digital
analysis methods to them. All processes are implemented in a specially
provided cloud environment with established open source software. This
always ensures that no personal data of the users is disclosed.
</p>

View File

@ -0,0 +1,18 @@
<h2>Registration and Log in</h2>
<div class="row">
<div class="col s12 m4">
<img alt="Registration and Log in" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/registration-and-log-in.png') }}">
</div>
<div class="col s12 m8">
<p>
Before you can start using the web platform, you need to create a user
account. This requires only a few details: just a user name, an e-mail
address and a password are needed. In order to register yourself, fill out
the form on the <a href="{{ url_for('auth.register') }}">registration page</a>. After successful registration, the
created account must be verified. To do this, follow the instructions
given in the automatically sent e-mail. Afterwards, you can log in as
usual with your username/email address and password in the log-in form
located next to the registration button.
</p>
</div>
</div>

View File

@ -0,0 +1,46 @@
<h2>Dashboard</h2>
<div class="row">
<div class="col s12 m4">
<img alt="Dashboard" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/dashboard.png') }}">
</div>
<div class="col s12 m8">
<p>
The <a href="{{ url_for('main.dashboard') }}">dashboard</a> provides a central overview of all resources assigned to the
user. These are <a href="{{ url_for('main.dashboard', _anchor='corpora') }}">corpora</a> and created <a href="{{ url_for('main.dashboard', _anchor='jobs') }}">jobs</a>. Corpora are freely composable
annotated text collections and jobs are the initiated file processing
procedures. Both the job and the corpus listings can be searched using
the search field displayed above them.
</p>
</div>
<div class="col s12">&nbsp;</div>
<div class="col s12 m6">
<div class="card">
<div class="card-content">
<span class="card-title"><i class="nopaque-icons">I</i> Corpus</span>
<p>
A corpus is a collection of texts that can be analyzed using the
Corpus Analysis service. All texts must be in the verticalized text
file format, which can be obtained via the Natrual Language
Processing service. It contains, in addition to the actual text,
further annotations that are searchable in combination with optional
addable metadata during your analysis.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card">
<div class="card-content">
<span class="card-title"><i class="nopaque-icons">J</i> Job</span>
<p>
A job is a construct that represents the execution of a service.
It stores input files, output files, processing status, and options
selected during creation. After submitting a job, you get redirected
to a job overview page. This can be accessed again via the job list
on the dashboard. Jobs will be deleted three months after creation,
so we encourage you to download the results after a job is completed.
</p>
</div>
</div>
</div>
</div>

View File

@ -0,0 +1,52 @@
<h2>Services</h2>
<div class="row">
<div class="col s12 m4">
<img alt="Services" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/services.png') }}">
</div>
<div class="col s12 m8">
<p>
nopaque was designed from the ground up to be modular. This modularity
means that the offered workflow provides variable entry and exit points,
so that different starting points and goals can be flexibly addressed.
Each of these modules are implemented in a self-contained service, each of
which represents a step in the workflow. The services are coordinated in
such a way that they can be used consecutively. The order can either be
taken from the listing of the services in the left sidebar or from the
roadmap (accessible via the pink compass in the upper right corner). All
services are versioned, so the data generated with nopaque is always
reproducible.
</p>
</div>
</div>
<h3>File Setup</h3>
<p>
The <a href="{{ url_for('services.file_setup_pipeline') }}">File Setup Service</a> bundles image data, such as scans and photos,
together in a handy PDF file. To use this service, use the job form to
select the images to be bundled, choose the desired service version, and
specify a title and description. Please note that the service sorts the
images into the resulting PDF file based on the file names. So naming the
images correctly is of great importance. It has proven to be a good practice
to name the files according to the following scheme:
page-01.png, page-02.jpg, page-03.tiff, etc. In general, you can assume
that the images will be sorted in the order in which the file explorer of
your operating system lists them when you view the files in a folder
sorted in ascending order by file name.
</p>
<h3>Optical Character Recognition (OCR)</h3>
<p>Comming soon...</p>
<h3>Handwritten Text Recognition (HTR)</h3>
<p>Comming soon...</p>
<h3>Natural Language Processing (NLP)</h3>
<p>Comming soon...</p>
<h3>Corpus Analysis</h3>
<p>
With the corpus analysis service, it is possible to create a text corpus
and then explore it in an analysis session. The analysis session is realized
on the server side by the Open Corpus Workbench software, which enables
efficient and complex searches with the help of the CQP Query Language.
</p>

View File

@ -0,0 +1,47 @@
<h2>A closer look at the Corpus Analysis</h2>
<h3>Create a corpus</h3>
<div class="row">
<div class="col s12 m4">
<img alt="Create a Corpus" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/create-a-corpus.png') }}">
</div>
<div class="col s12 m8">
<p>
To <a href="{{ url_for('corpora.add_corpus') }}">create a corpus</a>, you
can use the "New Corpus" button, which can be found on both, the Corpus
Analysis Service page and the Dashboard below the corpus list. Fill in the input
mask to Create a corpus. After you have completed the input mask, you will
be automatically taken to the corpus overview page (which can be called up
again via the corpus lists) of your new and accordingly still empty corpus.
</p>
</div>
<div class="col s12">&nbsp;</div>
<div class="col s12 m4">
<img alt="Create a Corpus" class="materialboxed responsive-img" src="{{ url_for('static', filename='images/manual/add-corpus-file.png') }}">
</div>
<div class="col s12 m8">
<p>
Now you can add texts in vrt format (results of the NLP service) to your new
corpus. To do this, use the "Add Corpus File" button and fill in the form
that appears. You will get the possibility to add metadata to each text.
After you have added all the desired texts to the corpus, the corpus must be
prepared for the analysis, this process can be initiated by clicking on the
"Build" button. On the corpus overview page you can always see information
about the current status of the corpus in the upper right corner. After the
build process the status should be "built".
</p>
</div>
</div>
<h3>Analyze a corpus</h3>
<p>
After you have created and built a corpus, it can be analyzed. To do this,
use the button labeled Analyze. The corpus analysis currently offers two
modules, the Reader and the Concordance module. The reader module can be
used to read your tokenized corpus in different ways. You can select a token
representation option, it determines the property of a token to be shown.
You can for example read your text completly lemmatized. You can also change
the way of how a token is displayed, by using the text style switch. The
concordance module offers some more options regarding the context size of
search results. If the context does not provide enough information you can
hop into the reader module by using the lupe icon next to a match.
</p>

View File

@ -0,0 +1,161 @@
<h2>CQP Query Language</h2>
<p>Within the Corpus Query Language, a distinction is made between two types of annotations: positional attributes and structural attributes. Positional attributes refer to a token, e.g. the word "book" is assigned the part-of-speech tag "NN", the lemma "book" and the simplified part-of-speech tag "NOUN" within the token structure. Structural attributes refer to text structure-giving elements such as sentence and entity markup. For example, the markup of a sentence is represented in the background as follows:</p>
<pre>
<code>
<span class="green-text">&lt;s&gt; structural attribute</span>
<span class="blue-text">word pos lemma simple_pos positional attribute</span>
<span class="green-text">&lt;ent type="PERSON"&gt; structural attribute</span>
<span class="blue-text">word pos lemma simple_pos positional attribute</span>
<span class="blue-text">word pos lemma simple_pos positional attribute</span>
<span class="green-text">&lt;/ent&gt; structural attribute</span>
<span class="blue-text">word pos lemma simple_pos positional attribute</span>
<span class="green-text">&lt;/s&gt; structural attribute</span>
</code>
</pre>
<h3>Positional attributes</h3>
<p>Before you can start searching for positional attributes (also called tokens), it is necessary to know what properties they contain.</p>
<ol>
<li><span class="blue-text"><b>word</b></span>: The string as it is also found in the original text</li>
<li>
<span class="blue-text"><b>pos</b></span>: A code for the word type, also called POS tag
<ol>
<li><span class="red-text"><b>IMPORTANT</b></span>: POS tags are language-dependent to best reflect language-specific properties.</li>
<li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li>
</ol>
</li>
<li><span class="blue-text"><b>lemma</b></span>: The lemmatized representation of the word</li>
<li>
<span class="blue-text"><b>simple_pos</b></span>: A simplified code for the word type that covers fewer categories than the <span class="blue-text"><b>pos</b></span> property, but is the same across languages.
<ol>
<li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li>
</ol>
</li>
</ol>
<h4>Searching for positional attributes</h4>
<div>
<p>
<b>Token with no condition on any property (also called <span class="blue-text">wildcard token</span>)</b><br>
</p>
<pre><code>[]; Each token matches this pattern</code></pre>
</div>
<div>
<p>
<b>Token with a condition on its <span class="blue-text">word</span> property</b>
</p>
<pre><code>[word="begin"]; “begin”</code></pre>
<pre><code>[word="begin" %c]; same as above but ignores case</code></pre>
</div>
<div>
<p>
<b>Token with a condition on its <span class="blue-text">lemma</span> property</b>
</p>
<pre><code>[lemma="begin"]; “begin”, “began”, “beginning”, …</code></pre>
<pre><code>[lemma="begin" %c]; same as above but ignores case</code></pre>
</div>
<div>
<p>
<b>Token with a condition on its <span class="blue-text">simple_pos</span> property</b>
</p>
<pre><code>[simple_pos="VERB"]; “begin”, “began”, “beginning”, …</code></pre>
</div>
<div>
<p>
<b>Token with a condition on its <span class="blue-text">pos</span> property</b>
</p>
<pre><code>[pos="VBG"]; “begin”, “began”, “beginning”, …</code></pre>
</div>
<div>
<p>
<b>Look for words with a variable character (also called <span class="blue-text">wildcard character</span>)</b><br>
</p>
<pre style="margin-bottom: 0;"><code>[word="beg.n"]; “begin”, “began”, “begun”</code></pre>
<pre style="margin-top: 0;" ><code> ^ the dot represents the wildcard character</code></pre>
</div>
<div>
<p><b>Token with two conditions on its properties, where both must be fulfilled (<span class="blue-text">AND</span> operation)</b></p>
<pre style="margin-bottom: 0;"><code>[lemma="be" & simple_pos="VERB"]; Lemma “be” and simple_pos is Verb</code></pre>
<pre style="margin-top: 0;" ><code> ^ the ampersand represents the and operation</code></pre>
</div>
<div>
<p><b>Token with two conditions on its properties, where at least one must be fulfilled (<span class="blue-text">OR</span> operation)</b></p>
<pre style="margin-bottom: 0;"><code>[simple_pos="VERB" | simple_pos="ADJ"]; simple_pos VERB or simple_pos ADJ (adjective)</code></pre>
<pre style="margin-top: 0;"><code> ^ the line represents the or operation</code></pre>
</div>
<div>
<p><b>Sequences</b></p>
<pre><code>[simple_pos="NOUN"] [simple_pos="VERB"]; NOUN -> VERB</code></pre>
<pre><code>[simple_pos="NOUN"] [] [simple_pos="VERB"]; NOUN -> wildcard token -> VERB</code></pre>
</div>
<div>
<p>
<b>Incidence modifiers</b><br>
Incidence Modifiers are special characters or patterns, that control how often a character/token that stands in front of it should occur.
</p>
<ol>
<li><span class="blue-text"><b>+</b></span>: <span class="blue-text">One or more</span> occurrences of the character/token before</li>
<li><span class="blue-text"><b>*</b></span>: <span class="blue-text">Zero or more occurrences</span> of the character/token before</li>
<li><span class="blue-text"><b>?</b></span>: <span class="blue-text">Zero or one occurrences</span> of the character/token before</li>
<li><span class="blue-text"><b>{n}</b></span>: <span class="blue-text">Exactly n occurrences</span> of the character/token before</li>
<li><span class="blue-text"><b>{n,m}</b></span>: <span class="blue-text">Between n and m occurrences</span> of the character/token before</li>
</ol>
<pre><code>[word="beg.+"]; “begging”, “begin”, “began”, “begun”, …</code></pre>
<pre><code>[word="beg.*"]; “beg”, “begging”, “begin”, “begun”, …</code></pre>
<pre><code>[word="beg?"]; “be”, “beg”</code></pre>
<pre><code>[word="beg.{2}"]; “begin”, “begun”, …</code></pre>
<pre><code>[word="beg.{2,4}"]; “begging”, “begin”, “begun”, …</code></pre>
<pre><code>[word="beg{2}.*"]; “begged”, “beggar”, …</code></pre>
<pre><code>[simple_pos="NOUN"] []? [simple_pos="VERB"]; NOUN -> wildcard token (x0 or x1) -> VERB</code></pre>
<pre><code>[simple_pos="NOUN"] []* [simple_pos="VERB"]; NOUN -> wildcard token (x0 or x1) -> VERB</code></pre>
</div>
<div>
<p>
<b>Option groups</b><br>
Find character sequences from a list of options.
</p>
<pre style="margin-bottom: 0;"><code>[word="be(g|gin|gan|gun)"]; “beg”, “begin”, “began”, “begun”</code></pre>
<pre style="margin-top: 0;" ><code> ^ ^ the braces indicate the start and end of an option group</code></pre>
</div>
<h3>Structural attributes</h3>
<p>nopaque provides several structural attributes for query. A distinction is made between attributes with and without value.</p>
<ol>
<li><span class="green-text"><b>s</b></span>: Annotates a sentence</li>
<li>
<span class="green-text"><b>ent</b></span>: Annotates an entity
<ol>
<li>
<span class="green-text"><b>*ent_type</b></span>: Annotates an entity and has as value a code that identifies the type of the entity.
<ol>
<li>The codes (= tagsets) can be taken from the Corpus Analysis Concordance page.</li>
</ol>
</li>
</ol>
</li>
<li>
<span class="green-text"><b>text</b></span>: Annotates a text
<ol>
<li>Note that all the following attributes have the data entered during the corpus creation as value.</li>
<li><span class="green-text"><b>*text_address</b></span></li>
<li><span class="green-text"><b>*text_author</b></span></li>
<li><span class="green-text"><b>*text_booktitle</b></span></li>
<li><span class="green-text"><b>*text_chapter</b></span></li>
<li><span class="green-text"><b>*text_editor</b></span></li>
<li><span class="green-text"><b>*text_institution</b></span></li>
<li><span class="green-text"><b>*text_journal</b></span></li>
<li><span class="green-text"><b>*text_pages</b></span></li>
<li><span class="green-text"><b>*text_publisher</b></span></li>
<li><span class="green-text"><b>*text_publishing_year</b></span></li>
<li><span class="green-text"><b>*text_school</b></span></li>
<li><span class="green-text"><b>*text_title</b></span></li>
</ol>
</li>
</ol>
<h4>Searching for structural attributes</h4>
<pre><code>&lt;ent&gt; [] &lt;/ent&gt;; A one token long entity of any type</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; [] &lt;/ent_type&gt;; A one token long entity of type PERSON</code></pre>
<pre><code>&lt;ent_type="PERSON"&gt; []* &lt;/ent_type&gt;; Entity of any length of type PERSON</code></pre>
<pre style="margin-bottom: 0;"><code>&lt;ent_type="PERSON"&gt; []* &lt;/ent_type&gt; []* [simple_pos="VERB"] :: match.text_publishing_year="1991";</code></pre>
<pre style="margin-top: 0;"><code>Arbitrarily long entity of type PERSON -> Arbitrarily many tokens -> VERB but only within texts with publication year 1991</code></pre>

View File

@ -0,0 +1,44 @@
{% extends "base.html.j2" %}
{% from "main/_breadcrumbs.html.j2" import breadcrumbs with context %}
{% block page_content %}
<div class="container">
<div class="row">
<div class="col s12">
<h1 id="title">{{ title }}</h1>
</div>
<div class="col s12 m10">
<div class="section scrollspy" id="introduction">
{% include "main/manual/_01_introduction.html.j2" %}
</div>
<div class="section scrollspy" id="registration-and-log-in">
{% include "main/manual/_02_registration_and_log_in.html.j2" %}
</div>
<div class="section scrollspy" id="dashboard">
{% include "main/manual/_03_dashboard.html.j2" %}
</div>
<div class="section scrollspy" id="services">
{% include "main/manual/_06_services.html.j2" %}
</div>
<div class="section scrollspy" id="a-closer-look-at-the-corpus-analysis">
{% include "main/manual/_07_a_closer_look_at_the_corpus_analysis.html.j2" %}
</div>
<div class="section scrollspy" id="cqp-query-language">
{% include "main/manual/_08_cqp_query_language.html.j2" %}
</div>
</div>
<div class="col m2 hide-on-small-only">
<ul class="section table-of-contents" style="position: fixed !important;">
<li><a href="#introduction">Introduction</a></li>
<li><a href="#registration-and-log-in">Registration and Log in</a></li>
<li><a href="#dashboard">Dashboard</a></li>
<li><a href="#services">Services</a></li>
<li><a href="#a-closer-look-at-the-corpus-analysis">A closer look at the Corpus Analysis</a></li>
<li><a href="#cqp-query-language">CQP Query Language</a></li>
</ul>
</div>
</div>
</div>
{% endblock page_content %}