mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-14 01:50:40 +00:00
Add compression to static corpus data, use chunked computation, hide read corpus ids in corpus analysis
This commit is contained in:
@ -28,19 +28,19 @@ def _create_build_corpus_service(corpus):
|
||||
''' ## Command ## '''
|
||||
command = ['bash', '-c']
|
||||
command.append(
|
||||
f'mkdir /corpora/data/nopaque_{corpus.id}'
|
||||
f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' && '
|
||||
'cwb-encode'
|
||||
' -c utf8'
|
||||
f' -d /corpora/data/nopaque_{corpus.id}'
|
||||
f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
|
||||
' -f /root/files/corpus.vrt'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
|
||||
f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
|
||||
' -P pos -P lemma -P simple_pos'
|
||||
' -S ent:0+type -S s:0'
|
||||
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
|
||||
' -xsB -9'
|
||||
' && '
|
||||
f'cwb-make -V NOPAQUE_{corpus.id}'
|
||||
f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
|
||||
)
|
||||
''' ## Constraints ## '''
|
||||
constraints = ['node.role==worker']
|
||||
@ -149,11 +149,15 @@ def _create_cqpserver_container(corpus):
|
||||
''' ### Corpus data volume ### '''
|
||||
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
|
||||
data_volume_target = '/corpora/data'
|
||||
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
|
||||
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
|
||||
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
|
||||
volumes.append(data_volume)
|
||||
''' ### Corpus registry volume ### '''
|
||||
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
|
||||
registry_volume_target = '/usr/local/share/cwb/registry'
|
||||
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
|
||||
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
|
||||
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
|
||||
volumes.append(registry_volume)
|
||||
# Check if a cqpserver container already exists. If this is the case,
|
||||
|
Reference in New Issue
Block a user