bundesdata_web_app/app/utils/create_ngram_models.py

37 lines
1.2 KiB
Python
Raw Normal View History

2019-02-28 13:09:53 +00:00
"""
2019-03-01 19:55:41 +00:00
Small script creating the models for the N-Gramm Viewer containing all
2019-02-28 13:09:53 +00:00
the different n-gramm data.
"""
corpus_type_list = ["lm_ns_year", "tk_ws_year", "lm_ns_speaker", "tk_ws_speaker"]
sort_key_list = ([i for i in range(10)]
+ "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z".split()
+ ["_Non_ASCII"])
ngram_kinds = ["One", "Two", "Three", "Four", "Five"]
template_class = """
class Key{}_{}Gram_{}(models.Model):
ngram = models.CharField(verbose_name='{}Gram',
max_length=255,
default=None,
null=True,
blank=True)
key = models.CharField(max_length=255)
count = models.IntegerField()
def __str__(self):
return str(self.ngram) + " " + str(self.key)
"""
classes = []
for corpus_type in corpus_type_list:
for ngram_kind in ngram_kinds:
for key in sort_key_list:
cls = template_class.format(key, ngram_kind, corpus_type,
ngram_kind)
classes.append(cls)
with open("classes.txt", "w") as file:
for cls in classes:
file.write("{}\n".format(cls))