Initial commit

This commit is contained in:
Stephan Porada
2019-02-28 14:09:53 +01:00
commit 96e84d083d
97 changed files with 66293 additions and 0 deletions

View File

@ -0,0 +1,115 @@
from django.core.management.base import BaseCommand
from speakers.models import Speaker, LegislativeInfo, LegislativeInstitution
import datetime
from lxml import etree
from tqdm import tqdm
class Command(BaseCommand):
help = ("Adds speakers (MdBs) to the database using the django models"
" syntax. Speakers will be added from the official"
" Stammdatenbank.xml. Input is the Stammdatenbank.xml specified"
" by a path.")
def add_arguments(self, parser):
parser.add_argument("input_path",
type=str)
def handle(self, *args, **options):
file_path = options["input_path"]
# self.stdout.write("Reading data from file: " + file_path)
tree = etree.parse(file_path)
speakers = tree.xpath("//MDB")
for speaker_element in tqdm(speakers, desc="Importing speaker data"):
speaker = Speaker()
id = speaker_element.xpath("./ID")[0]
speaker.id = id.text
last_name = speaker_element.xpath("./NAMEN/NAME/NACHNAME")[0]
speaker.last_name = last_name.text
first_name = speaker_element.xpath("./NAMEN/NAME/VORNAME")[0]
speaker.first_name = first_name.text
nobility = speaker_element.xpath("./NAMEN/NAME/ADEL")[0]
speaker.nobility = nobility.text
name_prefix = speaker_element.xpath("./NAMEN/NAME/PRAEFIX")[0]
speaker.name_prefix = name_prefix.text
# self.stdout.write("Reading data for speaker: "
# + str(id.text)
# + " "
# + str(first_name.text)
# + " "
# + str(last_name.text))
title = speaker_element.xpath("./NAMEN/NAME/ANREDE_TITEL")[0]
speaker.title = title.text
birthday = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/GEBURTSDATUM")[0]
speaker.birthday = birthday.text
birthplace = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/GEBURTSORT")[0]
speaker.birthplace = birthplace.text
country_of_birth = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/GEBURTSLAND")[0]
speaker.country_of_birth = country_of_birth.text
day_of_death = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/STERBEDATUM")[0]
speaker.day_of_death = day_of_death.text
occupation = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/BERUF")[0]
speaker.occupation = occupation.text
short_vita = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/VITA_KURZ")[0]
speaker.short_vita = short_vita.text
party = speaker_element.xpath("./BIOGRAFISCHE_ANGABEN/PARTEI_KURZ")[0]
speaker.party = party.text
speaker.save()
legislative_periods = speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE/WP")
legislative_period_start_dates = speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE/MDBWP_VON")
legislative_period_end_dates = speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE/MDBWP_BIS")
mandate_types = speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE/MANDATSART")
legislative_institutions = speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE/INSTITUTIONEN/INSTITUTION/INS_LANG")
zipped_infos = zip(legislative_periods,
legislative_period_start_dates,
legislative_period_end_dates,
mandate_types,
legislative_institutions)
for p, sd, ed, m, i in zipped_infos:
legislative_info = LegislativeInfo()
legislative_info.foreign_speaker = speaker
legislative_info.legislative_period = p.text
if(sd.text is not None):
sd = datetime.datetime.strptime(sd.text, "%d.%m.%Y")
sd = datetime.datetime.strftime(sd, "%Y-%m-%d")
legislative_info.legislative_period_start_date = sd
if(ed.text is not None):
ed = datetime.datetime.strptime(ed.text, "%d.%m.%Y")
ed = datetime.datetime.strftime(ed, "%Y-%m-%d")
legislative_info.legislative_period_end_date = ed
legislative_info.mandate_type = m.text
# legislative_info.legislative_institution = i.text
legislative_info.save()
for period in speaker_element.xpath("./WAHLPERIODEN/WAHLPERIODE"):
# print("==============")
legislative_institutions = period.xpath("./INSTITUTIONEN/INSTITUTION/INS_LANG")
# print([e.text for e in legislative_institutions])
instition_start_dates = period.xpath("./INSTITUTIONEN/INSTITUTION/MDBINS_VON")
# print([e.text for e in instition_start_dates])
instition_end_dates = period.xpath("./INSTITUTIONEN/INSTITUTION/MDBINS_BIS")
# print([e.text for e in instition_end_dates])
# print("==============")
zipped_institutions = zip(legislative_institutions,
instition_start_dates,
instition_end_dates)
for institution, start_date, end_date in zipped_institutions:
legislative_institution = LegislativeInstitution()
legislative_institution.foreign_speaker = speaker
current_period = period.xpath("./WP")[0]
legislative_institution.current_period = current_period.text
legislative_institution.institution = institution.text
if(start_date.text is not None):
start_date = datetime.datetime.strptime(start_date.text,
"%d.%m.%Y")
start_date = datetime.datetime.strftime(start_date,
"%Y-%m-%d")
legislative_institution.institution_start_date = start_date
if(end_date.text is not None):
end_date = datetime.datetime.strptime(end_date.text,
"%d.%m.%Y")
end_date = datetime.datetime.strftime(end_date,
"%Y-%m-%d")
legislative_institution.institution_end_date = end_date
legislative_institution.save()