Initial commit
This commit is contained in:
0
bundesdata_markup_nlp/samples/__init__.py
Executable file
0
bundesdata_markup_nlp/samples/__init__.py
Executable file
95
bundesdata_markup_nlp/samples/create_samples.py
Executable file
95
bundesdata_markup_nlp/samples/create_samples.py
Executable file
@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import fnmatch
|
||||
import argparse
|
||||
import random
|
||||
import shutil
|
||||
|
||||
"""
|
||||
This is just a quick script to get randomized samples from the protocols.
|
||||
"""
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Argument Parser"""
|
||||
parser = argparse.ArgumentParser(description="Creates samples from given \
|
||||
directory with given size. Creates two \
|
||||
samples with no overlapping.")
|
||||
parser.add_argument("-p",
|
||||
"--path",
|
||||
help="Path to data files to create sample from.",
|
||||
required=True,
|
||||
type=str,
|
||||
metavar="")
|
||||
parser.add_argument("-s",
|
||||
"--size",
|
||||
help="Size of sample.",
|
||||
required=True,
|
||||
type=int,
|
||||
metavar="")
|
||||
parser.add_argument("-n", "--number_of_samples",
|
||||
help="How many smaples should be created? should be \
|
||||
created?",
|
||||
required=True,
|
||||
type=int,
|
||||
metavar="")
|
||||
parser.add_argument("-t",
|
||||
"--file_type",
|
||||
help="What file types should be used as the base for \
|
||||
the sample?. Accepts wildcars.",
|
||||
required=True,
|
||||
type=str)
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def get_files(path, file_type):
|
||||
"""Creates file list with full paths of all files in the given directory and
|
||||
its sub directories and returns it."""
|
||||
list_of_files = []
|
||||
for path, subdirs, files in os.walk(path):
|
||||
for name in files:
|
||||
if fnmatch.fnmatch(name, file_type):
|
||||
list_of_files.append(os.path.join(path, name))
|
||||
return list_of_files
|
||||
|
||||
|
||||
def get_files_to_copy(list_of_files, sample_size):
|
||||
"""Gets random filepaths from all filepaths to create a sample out of those.
|
||||
Filepaths that have already been use will be removed from the file list to
|
||||
create independent sampels."""
|
||||
counter = 0
|
||||
sample_list = []
|
||||
while counter < sample_size:
|
||||
counter += 1
|
||||
random_index = random.randint(0, len(list_of_files)-1)
|
||||
sample_list.append(list_of_files[random_index])
|
||||
del list_of_files[random_index]
|
||||
pass
|
||||
return list_of_files, sample_list
|
||||
|
||||
|
||||
def copy_files(path, sample_list, step_int):
|
||||
"""Copys the given files to new directories."""
|
||||
sample_path = os.path.join(path, str(step_int))
|
||||
print(sample_path)
|
||||
os.mkdir(sample_path)
|
||||
for file in sample_list:
|
||||
shutil.copy2(file, sample_path)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
path = args.path
|
||||
file_list = get_files(path, args.file_type)
|
||||
for step in range(1, args.number_of_samples + 1):
|
||||
file_list = get_files_to_copy(file_list, args.size)[0]
|
||||
sample_list = get_files_to_copy(file_list, args.size)[1]
|
||||
copy_files(path, sample_list, step)
|
||||
file_list = get_files_to_copy(file_list, args.size)[0]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user