Added some documentation.

2019-03-01 20:55:41 +01:00
parent 96e84d083d
commit 27aa61d91a
37 changed files with 277 additions and 115 deletions
--- a/app/ngram_viewer/charts.py
+++ b/app/ngram_viewer/charts.py
@ -20,6 +20,10 @@ class TimeChart(Chart):
        self.data_sets = None

    def get_datasets(self, **kwargs):
+        """
+        Takes n number of data sets as an input and creates one data-line per
+        data set.
+        """
        if kwargs is not None:
            for key, value in kwargs.items():
                self.data_sets = value
@ -42,8 +46,6 @@ class TimeChart(Chart):
 class BarChart(Chart):
    """
    Class to configure the N-Gramm Viewer bar chart per speaker.
-    The class function get_datasets() is used to get the data sets and creates
-    one data set for each.
    """
    chart_type = "horizontalBar"
    responsive = True
@ -57,6 +59,9 @@ class BarChart(Chart):
        self.bar_names = []

    def get_labels(self):
+        """
+        Creates lables for the bar chart entries.
+        """
        try:
            tmp_list = self.lable_names
            self.lable_names = sum(tmp_list, [])[:self.speaker_range]
@ -65,6 +70,10 @@ class BarChart(Chart):
        return self.lable_names

    def create_data(self, **kwargs):
+        """
+        Takes n numer of data sets but only one is passed because the
+        Ngram Viewer per speaker is caped at one query at a time.
+        """
        if kwargs is not None:
            for key, value in kwargs.items():
                self.data_sets = value
@ -83,6 +92,10 @@ class BarChart(Chart):
                self.bar_data.append(entry_bar_data[:self.speaker_range])

    def get_datasets(self):
+        """
+        Takes the data sets from self.bar_data plus self.bar_names and creates
+        one bar per speaker from this.
+        """
        data_set_objects = []
        for bar_data, bar_name in zip(self.bar_data, self.bar_names):
            data_set_objects.append(DataSet(type="horizontalBar",
--- a/app/ngram_viewer/forms.py
+++ b/app/ngram_viewer/forms.py
@ -6,7 +6,7 @@ class NgramForm(forms.Form):
    Describes and configures the input html form for the Ngram Viewer per year.
    """
    CORPUS_CHOICE = [('lm_ns_year', 'Lemmatisiert ohne Stoppwörter'),
-                     ('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörter'),]
+                     ('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörtern'),]
    query = forms.CharField(label="Suche Ngramme", max_length="200")
    case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
    search_plus = forms.BooleanField(label="search-plus", required=False)
@ -19,7 +19,7 @@ class NgramFormSpeaker(forms.Form):
    Describes and configures the input html form for the Ngram Viewer per speaker.
    """
    CORPUS_CHOICE = [('lm_ns_speaker', 'Lemmatisiert ohne Stoppwörter'),
-                     ('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörter'),]
+                     ('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörtern'),]
    query = forms.CharField(label="Suche Ngramm", max_length="200")
    case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
    search_plus = forms.BooleanField(label="search-plus", required=False)
--- a/app/ngram_viewer/management/commands/import_ngrams_bulk.py
+++ b/app/ngram_viewer/management/commands/import_ngrams_bulk.py
@ -13,26 +13,29 @@ class Command(BaseCommand):
            " syntax. N-grams will be added from csv files with three columns."
            " First column is the n-gram string, second column is the key "
            " (e.g. year or speaker) and the third column is the counter."
-            " Input is a path pointing to one n-gram file. The user must specify"
-            " if the csv is containing 1-grams, 2-grams ... 5-grams with the"
-            " parameter 'n_grams'.")
+            " Input (input_path) is a path pointing to one folder containing all"
+            " 37 alphabetical sorted n-gram csv-files for one kind of n-gram."
+            " Thus the user must specify with the parameter n_grams if the"
+            " csv-files in the folder are 1-grams, 2-grams etc."
+            " parameter 'n_grams'. The user also need to specifiy the corpus_type.")

    def add_arguments(self, parser):
        parser.add_argument("n_grams",
                            type=int,
                            choices=[1, 2, 3, 4, 5],
                            help="Tells the script to either import given input\
-                            csv as 1-grams 2-grams etc.")
+                            csv-files as 1-grams 2-grams etc.")
        parser.add_argument("input_folder",
                            type=str,
-                            help="File path to the csv containing one kind of  \
-                            ngrams.")
+                            help="File path to the csv-files containing one    \
+                            kind of ngrams.")
        parser.add_argument("corpus_type",
                            choices=["lm_ns_year", "tk_ws_year", "lm_ns_speaker",
                                     "tk_ws_speaker"],
-                            help="user has to choose what kind of ngrams will  \
-                            be imported. lm_ns: Lemmatized without stopwords or\
-                            tk_ws not lemmatized with stopwords.",
+                            help="User has to choose what kind of ngrams will  \
+                            be imported. lm_ns_year: Lemmatized without        \
+                            stopwords per year, tk_ws_year: not lemmatized     \
+                            with stopwords per year etc.",
                            type=str)
        parser.add_argument(
                            "--batch_size",
@ -41,7 +44,8 @@ class Command(BaseCommand):
                            default=1000000,
                            required=False,
                            help="Int to set how many rows(entries) should be  \
-                            inserted via bulk at once. Default is 1 million.")
+                            inserted via bulk at once. Default is 1 million.   \
+                            Optional parameter.")

    def handle(self, *args, **options):
        start_time = datetime.now()
--- a/app/ngram_viewer/models.py
+++ b/app/ngram_viewer/models.py
@ -6,16 +6,16 @@ automatically generated with the utils/create_ngram_models.py script. One model
 holds one kind of ngram. The name of the model follows a pattern describing the
 specific kind of ngam.
 For example: KeyA_TwoGram_lm_ns_year --> This model will create a table
-contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting with the
-letter "A" or "a".
+contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting
+with the letter "A" or "a".
 For example: Key_Non_ASCII_ThreeGram_tk_ws_speaker --> This model will create a
 table containing all tokenized (tk) 3-grams with stopwords (ws) per speaker
 starting with any non ASCII letter like ü, ö, ä or é.

 The Idea behind these splits and a single table for every kind of ngram is to
 minimize search times for the user. It would have been possible to create a table
-for every 1-gram, 2-gram etc. But these would have benn pretty long (millions of)
-rows.
+for every 1-gram, 2-gram etc. But these would have benn pretty long (100 millions
+ of) rows.
 """


--- a/app/ngram_viewer/ngram_search.py
+++ b/app/ngram_viewer/ngram_search.py
@ -10,9 +10,8 @@ class NgramSearch(object):
    """
    Class that handles the search for ngrams per year. Inputs are the user query
    and search options. User query will be splitted and every split will be used
-    as a single query. Every singel query returns a QuerySet which will be
-    searched again with a regex to either match full words or partial words.
-    New regex evaluated QuerySets will be returned. Data from those will be
+    as a single query.
+    Every singel query returns a QuerySet. Data from those will be
    retrived and converted to valid chart.js data sets. Besides the query the
    user can pass some search options to the class like case sensitive and case
    insensitve. This Class handles search per year which is kind of the default.
@ -163,8 +162,8 @@ class NgramSearch(object):
    def query_sets_to_data(self):
        """
        Converts QuerySets to data dictionaries. Fills missing years with zero
-        value counts for ngrams. Also sums upper and lower case n-grams to one ngram
-        with one count.
+        value counts for ngrams. Also sums upper and lower case n-grams to one
+        ngram with one count.
        """
        data = []
        for key, query_sets in self.filtered_sets_dict.items():
@ -216,11 +215,9 @@ class NgramSearch(object):
 class NgramSearchSpeaker(NgramSearch):
    """
    Class that handles the search for ngrams per speaker. Inputs are the user
-    query and search options. User query will be splitted and every split will
-    be used as a single query. Every singel query returns a QuerySet which will
-    be searched again with a regex to either match full words or partial words.
-    New regex evaluated QuerySets will be returned. Data from those will be
-    retrived and converted to valid chart.js data sets. Besides the query the
+    query and search options. User query can only contain one n-gram.
+    The query returns a QuerySet. Data from thise will be
+    retrived and converted to a valid chart.js data set. Besides the query the
    user can pass some search options to the class like case sensitive and case
    insensitve. Inherits from NgramSearch.
    """
@ -261,8 +258,8 @@ class NgramSearchSpeaker(NgramSearch):

    def query_sets_to_data(self):
        """
-        Converts QuerySets to data dictionaries. Fills missing years with zero
-        value counts for ngrams. Also sums upper and lower case n-grams to one ngram
+        Converts QuerySets to data dictionaries.
+        Also sums upper and lower case n-grams to one ngram
        with one count.
        """
        data = []
--- a/app/ngram_viewer/templates/ngram_viewer/ngram_viewer_speaker.html
+++ b/app/ngram_viewer/templates/ngram_viewer/ngram_viewer_speaker.html
@ -1,5 +1,8 @@
 {% extends "blog/base.html" %}

+<!-- This template creates the ngram viewer page for the user according to the
+query. This creates the ngram viewer per speaker. -->
+
 {% block nav-tabs %}
 <div class="nav-content">
      <ul class="tabs tabs-transparent">
@ -33,7 +36,7 @@
                        </div>
                        <br />
                        <br />
-                        Corpus:{{form.corpus_choice}}
+                        Korpus:{{form.corpus_choice}}
                        <div class="section">
                            <div class="switch section ">
                                <span>Case-sensitive Suche:</span>
--- a/app/ngram_viewer/templates/ngram_viewer/ngram_viewer_year.html
+++ b/app/ngram_viewer/templates/ngram_viewer/ngram_viewer_year.html
@ -1,5 +1,8 @@
 {% extends "blog/base.html" %}

+<!-- This template creates the ngram viewer page for the user according to the
+query. This creates the ngram viewer per year. -->
+
 {% block nav-tabs %}
 <div class="nav-content">
    <ul class="tabs tabs-transparent">
@ -28,7 +31,7 @@
                        </div>
                        <br />
                        <br />
-                        Corpus:{{form.corpus_choice}}
+                        Korpus:{{form.corpus_choice}}
                        <div class="section">
                            <div class="switch section ">
                                <span>Case-sensitive Suche:</span>
--- a/app/ngram_viewer/urls.py
+++ b/app/ngram_viewer/urls.py
@ -1,6 +1,10 @@
 from django.urls import path
 from . import views

+"""
+Url paths for all ngram_viewer views.
+"""
+
 urlpatterns = [
    path("pro-jahr/", views.ngram_viewer_year, name="ngram-viewer-jahr"),
    path("pro-mdb/", views.ngram_viewer_speaker, name="ngram-viewer-sprecher")
--- a/app/ngram_viewer/views.py
+++ b/app/ngram_viewer/views.py
@ -6,6 +6,9 @@ from .ngram_search import NgramSearch, NgramSearchSpeaker


 def ngram_viewer_year(request):
+    """
+    This view creates the Ngram Viewer page per year.
+    """
    # logger = logging.getLogger(__name__)
    if(request.method == "GET"):
        form = NgramForm(request.GET)
@ -49,6 +52,9 @@ def ngram_viewer_year(request):


 def ngram_viewer_speaker(request):
+    """
+    This view creates the Ngram Viewer page per speaker.
+    """
    if(request.method == "GET"):
        form = NgramFormSpeaker(request.GET)
        if(form.is_valid()):