mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 12:22:47 +00:00 
			
		
		
		
	Add compression to static corpus data, use chunked computation, hide read corpus ids in corpus analysis
This commit is contained in:
		@@ -28,19 +28,19 @@ def _create_build_corpus_service(corpus):
 | 
			
		||||
    ''' ## Command ## '''
 | 
			
		||||
    command = ['bash', '-c']
 | 
			
		||||
    command.append(
 | 
			
		||||
        f'mkdir /corpora/data/nopaque_{corpus.id}'
 | 
			
		||||
        f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' && '
 | 
			
		||||
        'cwb-encode'
 | 
			
		||||
        ' -c utf8'
 | 
			
		||||
        f' -d /corpora/data/nopaque_{corpus.id}'
 | 
			
		||||
        f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' -f /root/files/corpus.vrt'
 | 
			
		||||
        f' -R /usr/local/share/cwb/registry/nopaque_{corpus.id}'
 | 
			
		||||
        f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' -P pos -P lemma -P simple_pos'
 | 
			
		||||
        ' -S ent:0+type -S s:0'
 | 
			
		||||
        ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
 | 
			
		||||
        ' -xsB -9'
 | 
			
		||||
        ' && '
 | 
			
		||||
        f'cwb-make -V NOPAQUE_{corpus.id}'
 | 
			
		||||
        f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
 | 
			
		||||
    )
 | 
			
		||||
    ''' ## Constraints ## '''
 | 
			
		||||
    constraints = ['node.role==worker']
 | 
			
		||||
@@ -149,11 +149,15 @@ def _create_cqpserver_container(corpus):
 | 
			
		||||
    ''' ### Corpus data volume ### '''
 | 
			
		||||
    data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
 | 
			
		||||
    data_volume_target = '/corpora/data'
 | 
			
		||||
    # data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
 | 
			
		||||
    # data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
 | 
			
		||||
    data_volume = f'{data_volume_source}:{data_volume_target}:rw'
 | 
			
		||||
    volumes.append(data_volume)
 | 
			
		||||
    ''' ### Corpus registry volume ### '''
 | 
			
		||||
    registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
 | 
			
		||||
    registry_volume_target = '/usr/local/share/cwb/registry'
 | 
			
		||||
    # registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
 | 
			
		||||
    # registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
 | 
			
		||||
    registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
 | 
			
		||||
    volumes.append(registry_volume)
 | 
			
		||||
    # Check if a cqpserver container already exists. If this is the case,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user