Added some documentation.

This commit is contained in:
Stephan Porada
2019-03-01 20:55:41 +01:00
parent 96e84d083d
commit 27aa61d91a
37 changed files with 277 additions and 115 deletions

View File

@ -13,26 +13,29 @@ class Command(BaseCommand):
" syntax. N-grams will be added from csv files with three columns."
" First column is the n-gram string, second column is the key "
" (e.g. year or speaker) and the third column is the counter."
" Input is a path pointing to one n-gram file. The user must specify"
" if the csv is containing 1-grams, 2-grams ... 5-grams with the"
" parameter 'n_grams'.")
" Input (input_path) is a path pointing to one folder containing all"
" 37 alphabetical sorted n-gram csv-files for one kind of n-gram."
" Thus the user must specify with the parameter n_grams if the"
" csv-files in the folder are 1-grams, 2-grams etc."
" parameter 'n_grams'. The user also need to specifiy the corpus_type.")
def add_arguments(self, parser):
parser.add_argument("n_grams",
type=int,
choices=[1, 2, 3, 4, 5],
help="Tells the script to either import given input\
csv as 1-grams 2-grams etc.")
csv-files as 1-grams 2-grams etc.")
parser.add_argument("input_folder",
type=str,
help="File path to the csv containing one kind of \
ngrams.")
help="File path to the csv-files containing one \
kind of ngrams.")
parser.add_argument("corpus_type",
choices=["lm_ns_year", "tk_ws_year", "lm_ns_speaker",
"tk_ws_speaker"],
help="user has to choose what kind of ngrams will \
be imported. lm_ns: Lemmatized without stopwords or\
tk_ws not lemmatized with stopwords.",
help="User has to choose what kind of ngrams will \
be imported. lm_ns_year: Lemmatized without \
stopwords per year, tk_ws_year: not lemmatized \
with stopwords per year etc.",
type=str)
parser.add_argument(
"--batch_size",
@ -41,7 +44,8 @@ class Command(BaseCommand):
default=1000000,
required=False,
help="Int to set how many rows(entries) should be \
inserted via bulk at once. Default is 1 million.")
inserted via bulk at once. Default is 1 million. \
Optional parameter.")
def handle(self, *args, **options):
start_time = datetime.now()