Added some documentation.

This commit is contained in:
Stephan Porada 2019-03-01 20:55:41 +01:00
parent 96e84d083d
commit 27aa61d91a
37 changed files with 277 additions and 115 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 75 KiB

View File

@ -1,9 +1,99 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load static %}
<!-- This template is used to create the about page. It mostly serves static text.-->
{% block content %} {% block content %}
<div class="container"> <div class="parallax-container">
<div class="row"> <div class="section white">
<h1>About page!</h1> <div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Die Masterarbeit</h4>
<p>Diese Webseite sowie die für diese benötigten Daten sind im Rahmen einer Masterarbeit entstanden. In der Arbeit sind die Funktionsweise
der Software für die automatische Auszeichnung der
Bundestagsplenarprotokolle sowie die der Webanwendung
beschrieben. Die Arbeit kann <a href="">hier gelesen</a> werden.<p>
<p>Weiter unten sind einige grundlegende Aspekte des Projekts kurz beschrieben. Ebenfalls kann dort der Quellcode der Webanwendung und der Software für die automatische Auszeichnung heruntergeladen werden.</p>
</div> </div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Datengrundlage</h4>
<p>Die Ausgangsdaten, welche für das Projekt genutzt wurden, sind für
alle Bürger und Bürgerinnen auf der
<a href="https://www.bundestag.de/service/opendata">Webseite des Bundestag</a>
frei zugänglich.</p>
<p>Im Rahmen einer Open
Data-Initiative stellt der deutsche Bundestag alle Plenarprotokolle
sowie die biografischen Daten aller Abgeordneten seit 1949 als
XML-Dateien zur Verfügung.</p>
<p>
Das Projekt Bundesdata umfasst alle XML-Protokolle der Wahlperioden 1.
bis 18. und deckt somit den Zeitraum von 1949 bis 2017 ab.<p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Automatische Auszeichnung</h4>
<p>Da die von derBundesregierung bereitstellen XML-Protokolle nur wenig
bis keine maschinenlesbare Informationen dazu enthalten, welcher
Abgeordnete oder welche Abgeordnete zu welchem Zeitpunkt einen Redebeitrag
im Bundestag hatte, sind die Ausgangsdaten im Rahmen des Projekts
automatisch mit weiteren Informationen angereichert und strukturiert
worden. Hierfür wurde eine eigene Software entwickelt, die die öffentlich verfügbaren XML-Protokolle automatisch mit zusätzliche Metadaten auszeichnet. Diese Auszeichnung ermöglicht es die Protokolle auf der Website strukturiert darzustellen und durchsuchbar zu machen. Ebenfalls können so auch erst die N-Gramme für den Ngram Viewer berechnet werden.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Quellcode für Software und Webanwendung</h4>
<p>
Der Quellcode für die eigens entwickelte Software, welche die automatische
Auszeichnung erstellt hat, kann auf <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_markup_nlp_software">GitLab</a> eingesehen und
heruntergeladen werden. Der Quellcode für die Webseite ist ebenfalls
auf <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_web_app">GitLab</a> verfügbar.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Download der ausgezeichneten Daten</h4>
<p>
Die für das Projekt mittels der eigenen Software erstellten XML-Protokolle sowie weitere Forschungsdaten können <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_markup_nlp_data"> hier heruntergeladen werden</a>.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Fehlerquoten und Probleme der Ausgangsdaten</h4>
<p>Die automatische Auszeichnung der Protokolle ist nicht gänzlich fehlerfrei.
Somit können Fehler bei der Darstellung der Reden auf der Website auftreten.
Wie hoch genau die einzelen Fehlerqouten sind, ist in der <a href="#">Masterarbeit</a> beschrieben</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div> </div>
{% endblock content %} {% endblock content %}

View File

@ -1,5 +1,10 @@
{% load static %} {% load static %}
<!-- This template is the base template where all other tempaltes inherit from.
It defines the basic page strucutre (header,main, footer etc) and also defines
the navigation menu for desktop and mobile devices.
Also all needes JavaScript and CSS is laoded here.-->
<!DOCTYPE html> <!DOCTYPE html>
<html lang="ger"> <html lang="ger">
@ -84,6 +89,9 @@
document.querySelectorAll('.dropdown-trigger'), document.querySelectorAll('.dropdown-trigger'),
{"coverTrigger": false} {"coverTrigger": false}
) )
document.addEventListener('DOMContentLoaded', function() {
M.Slider.init(document.querySelectorAll('.slider'), {});
});
</script> </script>
<script src="{% static "blog/chartjs/Chart.bundle.js"%}"></script> <script src="{% static "blog/chartjs/Chart.bundle.js"%}"></script>
</body> </body>

View File

@ -1,5 +1,7 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
<!-- For now unsed tempalte for blog pages. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,6 +1,10 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load static %} {% load static %}
<!-- This tempalte creates the homepage of the web app. It contains an overview
of the different functions of the web app and some general info about the web
app.-->
{% block content %} {% block content %}
<div class="parallax-container"> <div class="parallax-container">
<div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div> <div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div>
@ -122,87 +126,44 @@
Metadaten gibt es auf der <a href="{% url "about-page" %}">Info-Seite</a>.</p> Metadaten gibt es auf der <a href="{% url "about-page" %}">Info-Seite</a>.</p>
</div> </div>
</div> </div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div> <div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div>
</div> </div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Datengrundlage</h4>
<p>Die Ausgangsdaten, welche für das Projekt genutzt wurden, sind für
alle Bürger und Bürgerinnen auf der
<a href="https://www.bundestag.de/service/opendata">Webseite des Bundestag</a>
frei zugänglich.</p>
<p>Im Rahmen einer Open
Data-Initiative stellt der deutsche Bundestag alle Plenarprotokolle
sowie die biografischen Daten aller Abgeordneten seit 1949 als
XML-Dateien zur Verfügung.</p>
<p>
Das Projekt Bundesdata umfasst alle XML-Protokolle der Wahlperioden 1.
bis 18. und deckt somit den Zeitraum von 1949 bis 2017 ab.<p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div> </div>
<div class="parallax-container"> <div class="parallax-container">
<div class="section white"> <div class="section white">
<div class="row "> <div class="row">
<div class="container grey-text text-darken-3 lighten-3"> <div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Automatische Auszeichnung</h4> <br />
<p>Da die von derBundesregierung bereitstellen XML-Protokolle nur wenig <div class="slider">
bis keine maschinenlesbare Informationen dazu enthalten, welcher <ul class="slides" style="background-color: inherit; text-align: center;">
Abgeordnete oder welche Abgeordnete zu welchem Zeitpunkt einen Redebeitrag <li>
im Bundestag hatte, sind die Ausgangsdaten im Rahmen des Projekts <img src="{% static "/blog/images/slide_ngram_viewer.png" %}"> <!-- random image -->
automatisch mit weiteren Informationen angereichert und strukturiert <div class="caption right-align">
worden. Hierfür wurde eine eigene Software entwickelt, die die öffentlich verfügbaren XML-Protokolle automatisch mit zusätzliche Metadaten auszeichnet. Diese Auszeichnung ermöglicht es die Protokolle auf der Website strukturiert darzustellen und durchsuchbar zu machen. Ebenfalls können so auch erst die N-Gramme für den Ngram Viewer berechnet werden.</p> </div>
</li>
<li>
<img src="{% static "/blog/images/slider_profile.png" %}"> <!-- random image -->
<div class="caption right-align">
</div>
</li>
<li>
<img src="{% static "/blog/images/one_speech.png" %}"> <!-- random image -->
<div class="caption right-align">
</div>
</li>
<li>
<img src="{% static "/blog/images/one_protocol.png" %}"> <!-- random image -->
<div class="caption right-align">
</div>
</li>
</ul>
</div> </div>
</div> </div>
</div> </div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div> <div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div>
</div>
</div> </div>
<div class="parallax-container"> <div class="parallax-container">
<div class="section white"> <div class="parallax"><img src="{% static "/blog/images/4116197.jpg" %}"></div>
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Quellcode für Software und Webanwendung</h4>
<p>
Der Quellcode für die eigens entwickelte Software, welche die automatische
Auszeichnung erstellt hat, kann auf <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_markup_nlp_software">GitLab</a> eingesehen und
heruntergeladen werden. Der Quellcode für die Webseite ist ebenfalls
auf <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_web_app">GitLab</a> verfügbar.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Download der ausgezeichneten Daten</h4>
<p>
Die für das Projekt mittels der eigenen Software erstellten XML-Protokolle sowie weitere Forschungsdaten können <a href="https://gitlab.ub.uni-bielefeld.de/sporada/bundesdata_markup_nlp_data"> hier heruntergeladen werden</a>.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div>
<div class="parallax-container">
<div class="section white">
<div class="row ">
<div class="container grey-text text-darken-3 lighten-3">
<h4 class="header black-text">Fehlerquoten und Probleme der Ausgangsdaten</h4>
<p>Die automatische Auszeichnung der Protokolle ist nicht gänzlich fehlerfrei.
Somit können Fehler bei der Darstellung der Reden auf der Website auftreten.
Wie hoch genau die einzelen Fehlerqouten sind, sowie weitere Informationen zum
Projekt, der Arbeit, der Datengrundlage und der automatischen Auszeichnung mit
zusätzlichen Informationen und Metadaten gibt es auf der
<a href="{% url "about-page" %}">Info-Seite</a>.</p>
</div>
</div>
</div>
<div class="parallax"><img src="{% static "/blog/images/4094966.jpg" %}"></div>
</div> </div>
{% endblock content %} {% endblock content %}

View File

@ -1,5 +1,7 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
<!-- Template for teh static impressums page. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">
@ -50,8 +52,8 @@
<br /> <br />
<b>Quelle der Bilder:</b> <b>Quelle der Bilder:</b>
<p> <p>
Bild 4094966.jpg auf der Homepage: (c) Deutscher Bundestag / Marc-Steffen Unger <br /> Bild 4094966.jpg auf der Homepage und Infoseite: (c) Deutscher Bundestag / Marc-Steffen Unger <br />
Bild 4116197.jpg auf der Homepage: (c) Deutscher Bundestag / Thomas Köhler/photothek.net <br /> Bild 4116197.jpg auf der Homepage und Infoseite: (c) Deutscher Bundestag / Thomas Köhler/photothek.net <br />
</p> </p>
</div> </div>

View File

@ -1,6 +1,10 @@
from django.urls import path from django.urls import path
from . import views from . import views
"""
Url paths for all blog views.
"""
urlpatterns = [ urlpatterns = [
path("blog/", views.blog, name="blog"), path("blog/", views.blog, name="blog"),
path("about/", views.about, name="about-page"), path("about/", views.about, name="about-page"),

View File

@ -2,16 +2,28 @@ from django.shortcuts import render
def home(request): def home(request):
"""
This view creates the homepage of the web app.
"""
return render(request, "blog/home.html", {"title": "Homepage"}) return render(request, "blog/home.html", {"title": "Homepage"})
def blog(request): def blog(request):
"""
This view creates a blog page. Not used right now.
"""
return render(request, "blog/blog.html") return render(request, "blog/blog.html")
def about(request): def about(request):
"""
This view creates the abot/info page of the web app.
"""
return render(request, "blog/about.html", {"title": "About"}) return render(request, "blog/about.html", {"title": "About"})
def impressum(request): def impressum(request):
"""
This view creates the impressum page of the web app.
"""
return render(request, "blog/impressum.html", {"title": "Impressum"}) return render(request, "blog/impressum.html", {"title": "Impressum"})

View File

@ -20,6 +20,10 @@ class TimeChart(Chart):
self.data_sets = None self.data_sets = None
def get_datasets(self, **kwargs): def get_datasets(self, **kwargs):
"""
Takes n number of data sets as an input and creates one data-line per
data set.
"""
if kwargs is not None: if kwargs is not None:
for key, value in kwargs.items(): for key, value in kwargs.items():
self.data_sets = value self.data_sets = value
@ -42,8 +46,6 @@ class TimeChart(Chart):
class BarChart(Chart): class BarChart(Chart):
""" """
Class to configure the N-Gramm Viewer bar chart per speaker. Class to configure the N-Gramm Viewer bar chart per speaker.
The class function get_datasets() is used to get the data sets and creates
one data set for each.
""" """
chart_type = "horizontalBar" chart_type = "horizontalBar"
responsive = True responsive = True
@ -57,6 +59,9 @@ class BarChart(Chart):
self.bar_names = [] self.bar_names = []
def get_labels(self): def get_labels(self):
"""
Creates lables for the bar chart entries.
"""
try: try:
tmp_list = self.lable_names tmp_list = self.lable_names
self.lable_names = sum(tmp_list, [])[:self.speaker_range] self.lable_names = sum(tmp_list, [])[:self.speaker_range]
@ -65,6 +70,10 @@ class BarChart(Chart):
return self.lable_names return self.lable_names
def create_data(self, **kwargs): def create_data(self, **kwargs):
"""
Takes n numer of data sets but only one is passed because the
Ngram Viewer per speaker is caped at one query at a time.
"""
if kwargs is not None: if kwargs is not None:
for key, value in kwargs.items(): for key, value in kwargs.items():
self.data_sets = value self.data_sets = value
@ -83,6 +92,10 @@ class BarChart(Chart):
self.bar_data.append(entry_bar_data[:self.speaker_range]) self.bar_data.append(entry_bar_data[:self.speaker_range])
def get_datasets(self): def get_datasets(self):
"""
Takes the data sets from self.bar_data plus self.bar_names and creates
one bar per speaker from this.
"""
data_set_objects = [] data_set_objects = []
for bar_data, bar_name in zip(self.bar_data, self.bar_names): for bar_data, bar_name in zip(self.bar_data, self.bar_names):
data_set_objects.append(DataSet(type="horizontalBar", data_set_objects.append(DataSet(type="horizontalBar",

View File

@ -6,7 +6,7 @@ class NgramForm(forms.Form):
Describes and configures the input html form for the Ngram Viewer per year. Describes and configures the input html form for the Ngram Viewer per year.
""" """
CORPUS_CHOICE = [('lm_ns_year', 'Lemmatisiert ohne Stoppwörter'), CORPUS_CHOICE = [('lm_ns_year', 'Lemmatisiert ohne Stoppwörter'),
('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörter'),] ('tk_ws_year', 'Nicht lemmatisiert mit Stoppwörtern'),]
query = forms.CharField(label="Suche Ngramme", max_length="200") query = forms.CharField(label="Suche Ngramme", max_length="200")
case_sensitive = forms.BooleanField(label="case-sensitive", required=False) case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
search_plus = forms.BooleanField(label="search-plus", required=False) search_plus = forms.BooleanField(label="search-plus", required=False)
@ -19,7 +19,7 @@ class NgramFormSpeaker(forms.Form):
Describes and configures the input html form for the Ngram Viewer per speaker. Describes and configures the input html form for the Ngram Viewer per speaker.
""" """
CORPUS_CHOICE = [('lm_ns_speaker', 'Lemmatisiert ohne Stoppwörter'), CORPUS_CHOICE = [('lm_ns_speaker', 'Lemmatisiert ohne Stoppwörter'),
('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörter'),] ('tk_ws_speaker', 'Nicht lemmatisiert mit Stoppwörtern'),]
query = forms.CharField(label="Suche Ngramm", max_length="200") query = forms.CharField(label="Suche Ngramm", max_length="200")
case_sensitive = forms.BooleanField(label="case-sensitive", required=False) case_sensitive = forms.BooleanField(label="case-sensitive", required=False)
search_plus = forms.BooleanField(label="search-plus", required=False) search_plus = forms.BooleanField(label="search-plus", required=False)

View File

@ -13,26 +13,29 @@ class Command(BaseCommand):
" syntax. N-grams will be added from csv files with three columns." " syntax. N-grams will be added from csv files with three columns."
" First column is the n-gram string, second column is the key " " First column is the n-gram string, second column is the key "
" (e.g. year or speaker) and the third column is the counter." " (e.g. year or speaker) and the third column is the counter."
" Input is a path pointing to one n-gram file. The user must specify" " Input (input_path) is a path pointing to one folder containing all"
" if the csv is containing 1-grams, 2-grams ... 5-grams with the" " 37 alphabetical sorted n-gram csv-files for one kind of n-gram."
" parameter 'n_grams'.") " Thus the user must specify with the parameter n_grams if the"
" csv-files in the folder are 1-grams, 2-grams etc."
" parameter 'n_grams'. The user also need to specifiy the corpus_type.")
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument("n_grams", parser.add_argument("n_grams",
type=int, type=int,
choices=[1, 2, 3, 4, 5], choices=[1, 2, 3, 4, 5],
help="Tells the script to either import given input\ help="Tells the script to either import given input\
csv as 1-grams 2-grams etc.") csv-files as 1-grams 2-grams etc.")
parser.add_argument("input_folder", parser.add_argument("input_folder",
type=str, type=str,
help="File path to the csv containing one kind of \ help="File path to the csv-files containing one \
ngrams.") kind of ngrams.")
parser.add_argument("corpus_type", parser.add_argument("corpus_type",
choices=["lm_ns_year", "tk_ws_year", "lm_ns_speaker", choices=["lm_ns_year", "tk_ws_year", "lm_ns_speaker",
"tk_ws_speaker"], "tk_ws_speaker"],
help="user has to choose what kind of ngrams will \ help="User has to choose what kind of ngrams will \
be imported. lm_ns: Lemmatized without stopwords or\ be imported. lm_ns_year: Lemmatized without \
tk_ws not lemmatized with stopwords.", stopwords per year, tk_ws_year: not lemmatized \
with stopwords per year etc.",
type=str) type=str)
parser.add_argument( parser.add_argument(
"--batch_size", "--batch_size",
@ -41,7 +44,8 @@ class Command(BaseCommand):
default=1000000, default=1000000,
required=False, required=False,
help="Int to set how many rows(entries) should be \ help="Int to set how many rows(entries) should be \
inserted via bulk at once. Default is 1 million.") inserted via bulk at once. Default is 1 million. \
Optional parameter.")
def handle(self, *args, **options): def handle(self, *args, **options):
start_time = datetime.now() start_time = datetime.now()

View File

@ -6,16 +6,16 @@ automatically generated with the utils/create_ngram_models.py script. One model
holds one kind of ngram. The name of the model follows a pattern describing the holds one kind of ngram. The name of the model follows a pattern describing the
specific kind of ngam. specific kind of ngam.
For example: KeyA_TwoGram_lm_ns_year --> This model will create a table For example: KeyA_TwoGram_lm_ns_year --> This model will create a table
contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting with the contianing all lemmatized (lm) 2-grams without stopwords (ns) per year starting
letter "A" or "a". with the letter "A" or "a".
For example: Key_Non_ASCII_ThreeGram_tk_ws_speaker --> This model will create a For example: Key_Non_ASCII_ThreeGram_tk_ws_speaker --> This model will create a
table containing all tokenized (tk) 3-grams with stopwords (ws) per speaker table containing all tokenized (tk) 3-grams with stopwords (ws) per speaker
starting with any non ASCII letter like ü, ö, ä or é. starting with any non ASCII letter like ü, ö, ä or é.
The Idea behind these splits and a single table for every kind of ngram is to The Idea behind these splits and a single table for every kind of ngram is to
minimize search times for the user. It would have been possible to create a table minimize search times for the user. It would have been possible to create a table
for every 1-gram, 2-gram etc. But these would have benn pretty long (millions of) for every 1-gram, 2-gram etc. But these would have benn pretty long (100 millions
rows. of) rows.
""" """

View File

@ -10,9 +10,8 @@ class NgramSearch(object):
""" """
Class that handles the search for ngrams per year. Inputs are the user query Class that handles the search for ngrams per year. Inputs are the user query
and search options. User query will be splitted and every split will be used and search options. User query will be splitted and every split will be used
as a single query. Every singel query returns a QuerySet which will be as a single query.
searched again with a regex to either match full words or partial words. Every singel query returns a QuerySet. Data from those will be
New regex evaluated QuerySets will be returned. Data from those will be
retrived and converted to valid chart.js data sets. Besides the query the retrived and converted to valid chart.js data sets. Besides the query the
user can pass some search options to the class like case sensitive and case user can pass some search options to the class like case sensitive and case
insensitve. This Class handles search per year which is kind of the default. insensitve. This Class handles search per year which is kind of the default.
@ -163,8 +162,8 @@ class NgramSearch(object):
def query_sets_to_data(self): def query_sets_to_data(self):
""" """
Converts QuerySets to data dictionaries. Fills missing years with zero Converts QuerySets to data dictionaries. Fills missing years with zero
value counts for ngrams. Also sums upper and lower case n-grams to one ngram value counts for ngrams. Also sums upper and lower case n-grams to one
with one count. ngram with one count.
""" """
data = [] data = []
for key, query_sets in self.filtered_sets_dict.items(): for key, query_sets in self.filtered_sets_dict.items():
@ -216,11 +215,9 @@ class NgramSearch(object):
class NgramSearchSpeaker(NgramSearch): class NgramSearchSpeaker(NgramSearch):
""" """
Class that handles the search for ngrams per speaker. Inputs are the user Class that handles the search for ngrams per speaker. Inputs are the user
query and search options. User query will be splitted and every split will query and search options. User query can only contain one n-gram.
be used as a single query. Every singel query returns a QuerySet which will The query returns a QuerySet. Data from thise will be
be searched again with a regex to either match full words or partial words. retrived and converted to a valid chart.js data set. Besides the query the
New regex evaluated QuerySets will be returned. Data from those will be
retrived and converted to valid chart.js data sets. Besides the query the
user can pass some search options to the class like case sensitive and case user can pass some search options to the class like case sensitive and case
insensitve. Inherits from NgramSearch. insensitve. Inherits from NgramSearch.
""" """
@ -261,8 +258,8 @@ class NgramSearchSpeaker(NgramSearch):
def query_sets_to_data(self): def query_sets_to_data(self):
""" """
Converts QuerySets to data dictionaries. Fills missing years with zero Converts QuerySets to data dictionaries.
value counts for ngrams. Also sums upper and lower case n-grams to one ngram Also sums upper and lower case n-grams to one ngram
with one count. with one count.
""" """
data = [] data = []

View File

@ -1,5 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
<!-- This template creates the ngram viewer page for the user according to the
query. This creates the ngram viewer per speaker. -->
{% block nav-tabs %} {% block nav-tabs %}
<div class="nav-content"> <div class="nav-content">
<ul class="tabs tabs-transparent"> <ul class="tabs tabs-transparent">
@ -33,7 +36,7 @@
</div> </div>
<br /> <br />
<br /> <br />
Corpus:{{form.corpus_choice}} Korpus:{{form.corpus_choice}}
<div class="section"> <div class="section">
<div class="switch section "> <div class="switch section ">
<span>Case-sensitive Suche:</span> <span>Case-sensitive Suche:</span>

View File

@ -1,5 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
<!-- This template creates the ngram viewer page for the user according to the
query. This creates the ngram viewer per year. -->
{% block nav-tabs %} {% block nav-tabs %}
<div class="nav-content"> <div class="nav-content">
<ul class="tabs tabs-transparent"> <ul class="tabs tabs-transparent">
@ -28,7 +31,7 @@
</div> </div>
<br /> <br />
<br /> <br />
Corpus:{{form.corpus_choice}} Korpus:{{form.corpus_choice}}
<div class="section"> <div class="section">
<div class="switch section "> <div class="switch section ">
<span>Case-sensitive Suche:</span> <span>Case-sensitive Suche:</span>

View File

@ -1,6 +1,10 @@
from django.urls import path from django.urls import path
from . import views from . import views
"""
Url paths for all ngram_viewer views.
"""
urlpatterns = [ urlpatterns = [
path("pro-jahr/", views.ngram_viewer_year, name="ngram-viewer-jahr"), path("pro-jahr/", views.ngram_viewer_year, name="ngram-viewer-jahr"),
path("pro-mdb/", views.ngram_viewer_speaker, name="ngram-viewer-sprecher") path("pro-mdb/", views.ngram_viewer_speaker, name="ngram-viewer-sprecher")

View File

@ -6,6 +6,9 @@ from .ngram_search import NgramSearch, NgramSearchSpeaker
def ngram_viewer_year(request): def ngram_viewer_year(request):
"""
This view creates the Ngram Viewer page per year.
"""
# logger = logging.getLogger(__name__) # logger = logging.getLogger(__name__)
if(request.method == "GET"): if(request.method == "GET"):
form = NgramForm(request.GET) form = NgramForm(request.GET)
@ -49,6 +52,9 @@ def ngram_viewer_year(request):
def ngram_viewer_speaker(request): def ngram_viewer_speaker(request):
"""
This view creates the Ngram Viewer page per speaker.
"""
if(request.method == "GET"): if(request.method == "GET"):
form = NgramFormSpeaker(request.GET) form = NgramFormSpeaker(request.GET)
if(form.is_valid()): if(form.is_valid()):

View File

@ -6,7 +6,8 @@ from django_tables2.utils import A # alias for Accessor
class SpeakerTable(tables.Table): class SpeakerTable(tables.Table):
""" """
Configures the table showing all speakers. Inserts a column with links to Configures the table showing all speakers. Inserts a column with links to
the profile of one speaker. Also defines all shown columns. the profile of one speaker. Also defines all shown columns. The template
speakers/table.html is imported in line 19.
""" """
link = tables.LinkColumn("MdB", text="Profil", args=[A("id")], link = tables.LinkColumn("MdB", text="Profil", args=[A("id")],
orderable=False, orderable=False,

View File

@ -1,6 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template creates the profile page for one speaker. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,6 +1,9 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template creates the searchable list of all speakers MdBs of the
Bundestag since 1949. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,5 +1,9 @@
{% load django_tables2 %} {% load django_tables2 %}
{% load i18n %} {% load i18n %}
<!-- This template creates a table template which is used by the different table
classes defined in tables.py. Mostly used to display search results. -->
{% block table-wrapper %} {% block table-wrapper %}
{% block table %} {% block table %}
<table {% render_attrs table.attrs %} class="highlight"> <table {% render_attrs table.attrs %} class="highlight">

View File

@ -1,6 +1,10 @@
from django.urls import path from django.urls import path
from . import views from . import views
"""
Url paths for all speakers views.
"""
urlpatterns = [ urlpatterns = [
path("", views.speakers, name="MdBs"), path("", views.speakers, name="MdBs"),
path("mdb/<int:id>", views.speaker, name="MdB"), path("mdb/<int:id>", views.speaker, name="MdB"),

View File

@ -11,6 +11,9 @@ from speeches.forms import SearchFormSpeech
def speakers(request): def speakers(request):
"""
This view creates the page for the searchable speakers list.
"""
if(request.method == "GET"): if(request.method == "GET"):
form = SearchForm(request.GET) form = SearchForm(request.GET)
if(form.is_valid()): if(form.is_valid()):
@ -30,6 +33,9 @@ def speakers(request):
def speaker(request, id): def speaker(request, id):
"""
This view creates the profile page of one speaker.
"""
try: try:
current_speaker = Speaker.objects.get(pk=id) current_speaker = Speaker.objects.get(pk=id)
speech_count = len(Speech.objects.filter(foreign_speaker=id)) speech_count = len(Speech.objects.filter(foreign_speaker=id))

View File

@ -13,8 +13,8 @@ class Command(BaseCommand):
" syntax. Protocols will be added from the xml protocol files." " syntax. Protocols will be added from the xml protocol files."
" Input is a path pointing to all/multiple protocols in one" " Input is a path pointing to all/multiple protocols in one"
" directory with one level of subdirectories. First imports" " directory with one level of subdirectories. First imports"
" toc, attachments and metadata with model Protocol. Speeches will be put into realtion with the model Speech." " toc, attachments and metadata with model Protocol. Speeches will"
" to the protocols later on.") " be put into realtion with the model Speech.")
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument("input_path", parser.add_argument("input_path",

View File

@ -6,7 +6,8 @@ from django_tables2.utils import A # alias for Accessor
class SpeechTable(tables.Table): class SpeechTable(tables.Table):
""" """
Configures the table showing all speeches. Inserts a column with links to Configures the table showing all speeches. Inserts a column with links to
the speeches. Also defines all shown columns. the speeches. Also defines all shown columns. The template
speeches/table.html is imported in line 21.
""" """
link = tables.LinkColumn("Rede", text="Rede", args=[A("speech_id")], link = tables.LinkColumn("Rede", text="Rede", args=[A("speech_id")],
orderable=False, orderable=False,
@ -25,6 +26,8 @@ class SpeakerSpeechTable(tables.Table):
""" """
Configures the table showing all speeches of one speaker in his profile. Configures the table showing all speeches of one speaker in his profile.
Inserts a column with links to the speeches. Also defines all shown columns. Inserts a column with links to the speeches. Also defines all shown columns.
The template
speeches/table.html is imported in line 39.
""" """
link = tables.LinkColumn("Rede", text="Rede", args=[A("speech_id")], link = tables.LinkColumn("Rede", text="Rede", args=[A("speech_id")],
orderable=False, orderable=False,
@ -41,6 +44,8 @@ class ProtocolTable(tables.Table):
""" """
Configures the table showing all protocols. Configures the table showing all protocols.
Inserts a column with links to the protocols. Also defines all shown columns. Inserts a column with links to the protocols. Also defines all shown columns.
The template
speeches/table.html is imported in line 57.
""" """
link = tables.LinkColumn("Protokoll", text="Protokoll", args=[A("protocol_id")], link = tables.LinkColumn("Protokoll", text="Protokoll", args=[A("protocol_id")],
orderable=False, orderable=False,

View File

@ -1,6 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template is used to create the page of one protocol. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,6 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template is used to create the page for the searchable protocol list. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,6 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template is used to create the page of one speech. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,6 +1,8 @@
{% extends "blog/base.html" %} {% extends "blog/base.html" %}
{% load render_table from django_tables2 %} {% load render_table from django_tables2 %}
<!-- This template is used to create the page for the searchable speeches list. -->
{% block content %} {% block content %}
<div class="container"> <div class="container">
<div class="row"> <div class="row">

View File

@ -1,5 +1,9 @@
{% load django_tables2 %} {% load django_tables2 %}
{% load i18n %} {% load i18n %}
<!-- This template creates a table template which is used by the different table
classes defined in tables.py. Mostly used to display search results. -->
{% block table-wrapper %} {% block table-wrapper %}
{% block table %} {% block table %}
<table {% render_attrs table.attrs %} class="highlight"> <table {% render_attrs table.attrs %} class="highlight">

View File

@ -1,6 +1,10 @@
from django.urls import path from django.urls import path
from . import views from . import views
"""
Url paths for all speeches views.
"""
urlpatterns = [ urlpatterns = [
path("reden/", views.speeches, name="Reden"), path("reden/", views.speeches, name="Reden"),
path("liste-protokolle/", views.protocols, name="Protokoll-list"), path("liste-protokolle/", views.protocols, name="Protokoll-list"),

View File

@ -4,7 +4,7 @@ from lxml import etree
def create_html_speech(speech_content_xml_string): def create_html_speech(speech_content_xml_string):
""" """
COnverts the XML speech content into styled html. Also counts the words and Converts the XML speech content into styled html. Also counts the words and
shows the vocabulary. shows the vocabulary.
""" """
speech_html = "<div>" + speech_content_xml_string + "</div>" speech_html = "<div>" + speech_content_xml_string + "</div>"

View File

@ -10,6 +10,9 @@ from collections import Counter
def speech(request, speech_id): def speech(request, speech_id):
"""
This view creates the page of one speech.
"""
try: try:
current_speech = Speech.objects.get(pk=speech_id) current_speech = Speech.objects.get(pk=speech_id)
if(current_speech.previous_speech_id is not None): if(current_speech.previous_speech_id is not None):
@ -50,6 +53,9 @@ def speech(request, speech_id):
def speeches(request): def speeches(request):
"""
This view creates the searchable list of all speeches.
"""
if(request.method == "GET"): if(request.method == "GET"):
form = SearchFormSpeech(request.GET) form = SearchFormSpeech(request.GET)
if(form.is_valid()): if(form.is_valid()):
@ -69,6 +75,9 @@ def speeches(request):
def protocol(request, protocol_id): def protocol(request, protocol_id):
"""
This view creates the page of one protocol.
"""
try: try:
current_protocol = Protocol.objects.get(pk=protocol_id) current_protocol = Protocol.objects.get(pk=protocol_id)
related_speeches = Speech.objects.filter(foreign_protocol=protocol_id).order_by("speech_id") related_speeches = Speech.objects.filter(foreign_protocol=protocol_id).order_by("speech_id")
@ -91,6 +100,9 @@ def protocol(request, protocol_id):
def protocols(request): def protocols(request):
"""
This view creates the searchable list of all protocols.
"""
if(request.method == "GET"): if(request.method == "GET"):
form = SearchForm(request.GET) form = SearchForm(request.GET)
if(form.is_valid()): if(form.is_valid()):

View File

@ -1,5 +1,5 @@
""" """
Small script creating the models for the N-Gramm Viewer holding containing all Small script creating the models for the N-Gramm Viewer containing all
the different n-gramm data. the different n-gramm data.
""" """