Added some documentation.
This commit is contained in:
@ -20,6 +20,10 @@ class TimeChart(Chart):
|
||||
self.data_sets = None
|
||||
|
||||
def get_datasets(self, **kwargs):
|
||||
"""
|
||||
Takes n number of data sets as an input and creates one data-line per
|
||||
data set.
|
||||
"""
|
||||
if kwargs is not None:
|
||||
for key, value in kwargs.items():
|
||||
self.data_sets = value
|
||||
@ -42,8 +46,6 @@ class TimeChart(Chart):
|
||||
class BarChart(Chart):
|
||||
"""
|
||||
Class to configure the N-Gramm Viewer bar chart per speaker.
|
||||
The class function get_datasets() is used to get the data sets and creates
|
||||
one data set for each.
|
||||
"""
|
||||
chart_type = "horizontalBar"
|
||||
responsive = True
|
||||
@ -57,6 +59,9 @@ class BarChart(Chart):
|
||||
self.bar_names = []
|
||||
|
||||
def get_labels(self):
|
||||
"""
|
||||
Creates lables for the bar chart entries.
|
||||
"""
|
||||
try:
|
||||
tmp_list = self.lable_names
|
||||
self.lable_names = sum(tmp_list, [])[:self.speaker_range]
|
||||
@ -65,6 +70,10 @@ class BarChart(Chart):
|
||||
return self.lable_names
|
||||
|
||||
def create_data(self, **kwargs):
|
||||
"""
|
||||
Takes n numer of data sets but only one is passed because the
|
||||
Ngram Viewer per speaker is caped at one query at a time.
|
||||
"""
|
||||
if kwargs is not None:
|
||||
for key, value in kwargs.items():
|
||||
self.data_sets = value
|
||||
@ -83,6 +92,10 @@ class BarChart(Chart):
|
||||
self.bar_data.append(entry_bar_data[:self.speaker_range])
|
||||
|
||||
def get_datasets(self):
|
||||
"""
|
||||
Takes the data sets from self.bar_data plus self.bar_names and creates
|
||||
one bar per speaker from this.
|
||||
"""
|
||||
data_set_objects = []
|
||||
for bar_data, bar_name in zip(self.bar_data, self.bar_names):
|
||||
data_set_objects.append(DataSet(type="horizontalBar",
|
||||
|
@ -6,7 +6,7 @@ class NgramForm(forms.Form):
|
||||
Describes and configures the input html form for the Ngram Viewer per year.
|
||||
"""
|
||||
CORPUS_CHOICE = [('lm_ns_year', 'Lemmatisiert ohne Stoppwörter'),
|
||||
('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörter'),]
|
||||
('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörtern'),]
|
||||
query = forms.CharField(label="Suche Ngramme", max_length="200")
|
||||
case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
|
||||
search_plus = forms.BooleanField(label="search-plus", required=False)
|
||||
@ -19,7 +19,7 @@ class NgramFormSpeaker(forms.Form):
|
||||
Describes and configures the input html form for the Ngram Viewer per speaker.
|
||||
"""
|
||||
CORPUS_CHOICE = [('lm_ns_speaker', 'Lemmatisiert ohne Stoppwörter'),
|
||||
('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörter'),]
|
||||
('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörtern'),]
|
||||
query = forms.CharField(label="Suche Ngramm", max_length="200")
|
||||
case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
|
||||
search_plus = forms.BooleanField(label="search-plus", required=False)
|
||||
|
@ -13,26 +13,29 @@ class Command(BaseCommand):
|
||||
" syntax. N-grams will be added from csv files with three columns."
|
||||
" First column is the n-gram string, second column is the key "
|
||||
" (e.g. year or speaker) and the third column is the counter."
|
||||
" Input is a path pointing to one n-gram file. The user must specify"
|
||||
" if the csv is containing 1-grams, 2-grams ... 5-grams with the"
|
||||
" parameter 'n_grams'.")
|
||||
" Input (input_path) is a path pointing to one folder containing all"
|
||||
" 37 alphabetical sorted n-gram csv-files for one kind of n-gram."
|
||||
" Thus the user must specify with the parameter n_grams if the"
|
||||
" csv-files in the folder are 1-grams, 2-grams etc."
|
||||
" parameter 'n_grams'. The user also need to specifiy the corpus_type.")
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("n_grams",
|
||||
type=int,
|
||||
choices=[1, 2, 3, 4, 5],
|
||||
help="Tells the script to either import given input\
|
||||
csv as 1-grams 2-grams etc.")
|
||||
csv-files as 1-grams 2-grams etc.")
|
||||
parser.add_argument("input_folder",
|
||||
type=str,
|
||||
help="File path to the csv containing one kind of \
|
||||
ngrams.")
|
||||
help="File path to the csv-files containing one \
|
||||
kind of ngrams.")
|
||||
parser.add_argument("corpus_type",
|
||||
choices=["lm_ns_year", "tk_ws_year", "lm_ns_speaker",
|
||||
"tk_ws_speaker"],
|
||||
help="user has to choose what kind of ngrams will \
|
||||
be imported. lm_ns: Lemmatized without stopwords or\
|
||||
tk_ws not lemmatized with stopwords.",
|
||||
help="User has to choose what kind of ngrams will \
|
||||
be imported. lm_ns_year: Lemmatized without \
|
||||
stopwords per year, tk_ws_year: not lemmatized \
|
||||
with stopwords per year etc.",
|
||||
type=str)
|
||||
parser.add_argument(
|
||||
"--batch_size",
|
||||
@ -41,7 +44,8 @@ class Command(BaseCommand):
|
||||
default=1000000,
|
||||
required=False,
|
||||
help="Int to set how many rows(entries) should be \
|
||||
inserted via bulk at once. Default is 1 million.")
|
||||
inserted via bulk at once. Default is 1 million. \
|
||||
Optional parameter.")
|
||||
|
||||
def handle(self, *args, **options):
|
||||
start_time = datetime.now()
|
||||
|
@ -6,16 +6,16 @@ automatically generated with the utils/create_ngram_models.py script. One model
|
||||
holds one kind of ngram. The name of the model follows a pattern describing the
|
||||
specific kind of ngam.
|
||||
For example: KeyA_TwoGram_lm_ns_year --> This model will create a table
|
||||
contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting with the
|
||||
letter "A" or "a".
|
||||
contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting
|
||||
with the letter "A" or "a".
|
||||
For example: Key_Non_ASCII_ThreeGram_tk_ws_speaker --> This model will create a
|
||||
table containing all tokenized (tk) 3-grams with stopwords (ws) per speaker
|
||||
starting with any non ASCII letter like ü, ö, ä or é.
|
||||
|
||||
The Idea behind these splits and a single table for every kind of ngram is to
|
||||
minimize search times for the user. It would have been possible to create a table
|
||||
for every 1-gram, 2-gram etc. But these would have benn pretty long (millions of)
|
||||
rows.
|
||||
for every 1-gram, 2-gram etc. But these would have benn pretty long (100 millions
|
||||
of) rows.
|
||||
"""
|
||||
|
||||
|
||||
|
@ -10,9 +10,8 @@ class NgramSearch(object):
|
||||
"""
|
||||
Class that handles the search for ngrams per year. Inputs are the user query
|
||||
and search options. User query will be splitted and every split will be used
|
||||
as a single query. Every singel query returns a QuerySet which will be
|
||||
searched again with a regex to either match full words or partial words.
|
||||
New regex evaluated QuerySets will be returned. Data from those will be
|
||||
as a single query.
|
||||
Every singel query returns a QuerySet. Data from those will be
|
||||
retrived and converted to valid chart.js data sets. Besides the query the
|
||||
user can pass some search options to the class like case sensitive and case
|
||||
insensitve. This Class handles search per year which is kind of the default.
|
||||
@ -163,8 +162,8 @@ class NgramSearch(object):
|
||||
def query_sets_to_data(self):
|
||||
"""
|
||||
Converts QuerySets to data dictionaries. Fills missing years with zero
|
||||
value counts for ngrams. Also sums upper and lower case n-grams to one ngram
|
||||
with one count.
|
||||
value counts for ngrams. Also sums upper and lower case n-grams to one
|
||||
ngram with one count.
|
||||
"""
|
||||
data = []
|
||||
for key, query_sets in self.filtered_sets_dict.items():
|
||||
@ -216,11 +215,9 @@ class NgramSearch(object):
|
||||
class NgramSearchSpeaker(NgramSearch):
|
||||
"""
|
||||
Class that handles the search for ngrams per speaker. Inputs are the user
|
||||
query and search options. User query will be splitted and every split will
|
||||
be used as a single query. Every singel query returns a QuerySet which will
|
||||
be searched again with a regex to either match full words or partial words.
|
||||
New regex evaluated QuerySets will be returned. Data from those will be
|
||||
retrived and converted to valid chart.js data sets. Besides the query the
|
||||
query and search options. User query can only contain one n-gram.
|
||||
The query returns a QuerySet. Data from thise will be
|
||||
retrived and converted to a valid chart.js data set. Besides the query the
|
||||
user can pass some search options to the class like case sensitive and case
|
||||
insensitve. Inherits from NgramSearch.
|
||||
"""
|
||||
@ -261,8 +258,8 @@ class NgramSearchSpeaker(NgramSearch):
|
||||
|
||||
def query_sets_to_data(self):
|
||||
"""
|
||||
Converts QuerySets to data dictionaries. Fills missing years with zero
|
||||
value counts for ngrams. Also sums upper and lower case n-grams to one ngram
|
||||
Converts QuerySets to data dictionaries.
|
||||
Also sums upper and lower case n-grams to one ngram
|
||||
with one count.
|
||||
"""
|
||||
data = []
|
||||
|
@ -1,5 +1,8 @@
|
||||
{% extends "blog/base.html" %}
|
||||
|
||||
<!-- This template creates the ngram viewer page for the user according to the
|
||||
query. This creates the ngram viewer per speaker. -->
|
||||
|
||||
{% block nav-tabs %}
|
||||
<div class="nav-content">
|
||||
<ul class="tabs tabs-transparent">
|
||||
@ -33,7 +36,7 @@
|
||||
</div>
|
||||
<br />
|
||||
<br />
|
||||
Corpus:{{form.corpus_choice}}
|
||||
Korpus:{{form.corpus_choice}}
|
||||
<div class="section">
|
||||
<div class="switch section ">
|
||||
<span>Case-sensitive Suche:</span>
|
||||
|
@ -1,5 +1,8 @@
|
||||
{% extends "blog/base.html" %}
|
||||
|
||||
<!-- This template creates the ngram viewer page for the user according to the
|
||||
query. This creates the ngram viewer per year. -->
|
||||
|
||||
{% block nav-tabs %}
|
||||
<div class="nav-content">
|
||||
<ul class="tabs tabs-transparent">
|
||||
@ -28,7 +31,7 @@
|
||||
</div>
|
||||
<br />
|
||||
<br />
|
||||
Corpus:{{form.corpus_choice}}
|
||||
Korpus:{{form.corpus_choice}}
|
||||
<div class="section">
|
||||
<div class="switch section ">
|
||||
<span>Case-sensitive Suche:</span>
|
||||
|
@ -1,6 +1,10 @@
|
||||
from django.urls import path
|
||||
from . import views
|
||||
|
||||
"""
|
||||
Url paths for all ngram_viewer views.
|
||||
"""
|
||||
|
||||
urlpatterns = [
|
||||
path("pro-jahr/", views.ngram_viewer_year, name="ngram-viewer-jahr"),
|
||||
path("pro-mdb/", views.ngram_viewer_speaker, name="ngram-viewer-sprecher")
|
||||
|
@ -6,6 +6,9 @@ from .ngram_search import NgramSearch, NgramSearchSpeaker
|
||||
|
||||
|
||||
def ngram_viewer_year(request):
|
||||
"""
|
||||
This view creates the Ngram Viewer page per year.
|
||||
"""
|
||||
# logger = logging.getLogger(__name__)
|
||||
if(request.method == "GET"):
|
||||
form = NgramForm(request.GET)
|
||||
@ -49,6 +52,9 @@ def ngram_viewer_year(request):
|
||||
|
||||
|
||||
def ngram_viewer_speaker(request):
|
||||
"""
|
||||
This view creates the Ngram Viewer page per speaker.
|
||||
"""
|
||||
if(request.method == "GET"):
|
||||
form = NgramFormSpeaker(request.GET)
|
||||
if(form.is_valid()):
|
||||
|
Reference in New Issue
Block a user