nopaque/app/jobs/handle_corpora.py
2024-11-28 10:02:27 +01:00

228 lines
9.1 KiB
Python

from flask import current_app
import docker
import os
import shutil
from app import db, docker_client, scheduler
from app.models import Corpus, CorpusStatus
def handle_corpora():
with scheduler.app.app_context():
_handle_corpora()
def _handle_corpora():
corpora = Corpus.query.all()
for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
_create_build_corpus_service(corpus)
for corpus in [x for x in corpora if x.status in [CorpusStatus.QUEUED, CorpusStatus.BUILDING]]:
_checkout_build_corpus_service(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0]:
corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
_checkout_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
_create_cqpserver_container(corpus)
for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
_remove_cqpserver_container(corpus)
db.session.commit()
def _create_build_corpus_service(corpus: Corpus):
''' # Docker service settings # '''
''' ## Command ## '''
command = ['bash', '-c']
command.append(
f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
' && '
'cwb-encode'
' -c utf8'
f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
' -f /root/files/corpus.vrt'
f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
' -P pos -P lemma -P simple_pos'
' -S ent:0+type -S s:0'
' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
' -xsB -9'
' && '
f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
)
''' ## Constraints ## '''
constraints = ['node.role==worker']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
''' ## Labels ## '''
labels = {
'nopaque.server_name': current_app.config['SERVER_NAME']
}
''' ## Mounts ## '''
mounts = []
''' ### Data mount ### '''
data_mount_source = os.path.join(corpus.path, 'cwb', 'data')
data_mount_target = '/corpora/data'
data_mount = f'{data_mount_source}:{data_mount_target}:rw'
# Make sure that their is no data in the data directory
shutil.rmtree(data_mount_source, ignore_errors=True)
os.makedirs(data_mount_source)
mounts.append(data_mount)
''' ### File mount ### '''
file_mount_source = os.path.join(corpus.path, 'cwb', 'corpus.vrt')
file_mount_target = '/root/files/corpus.vrt'
file_mount = f'{file_mount_source}:{file_mount_target}:ro'
mounts.append(file_mount)
''' ### Registry mount ### '''
registry_mount_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_mount_target = '/usr/local/share/cwb/registry'
registry_mount = f'{registry_mount_source}:{registry_mount_target}:rw'
# Make sure that their is no data in the registry directory
shutil.rmtree(registry_mount_source, ignore_errors=True)
os.makedirs(registry_mount_source)
mounts.append(registry_mount)
''' ## Name ## '''
name = f'build-corpus_{corpus.id}'
''' ## Restart policy ## '''
restart_policy = docker.types.RestartPolicy()
try:
docker_client.services.create(
image,
command=command,
constraints=constraints,
labels=labels,
mounts=mounts,
name=name,
restart_policy=restart_policy,
user='0:0'
)
except docker.errors.DockerException as e:
current_app.logger.error(f'Create service "{name}" failed: {e}')
return
corpus.status = CorpusStatus.QUEUED
def _checkout_build_corpus_service(corpus: Corpus):
service_name = f'build-corpus_{corpus.id}'
try:
service = docker_client.services.get(service_name)
except docker.errors.NotFound as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
corpus.status = CorpusStatus.FAILED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get service "{service_name}" failed: {e}')
service_tasks = service.tasks()
if not service_tasks:
return
task_state = service_tasks[0].get('Status').get('State')
if corpus.status == CorpusStatus.QUEUED and task_state != 'pending':
corpus.status = CorpusStatus.BUILDING
return
elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete':
corpus.status = CorpusStatus.BUILT
elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed':
corpus.status = CorpusStatus.FAILED
else:
return
try:
service.remove()
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
def _create_cqpserver_container(corpus: Corpus):
''' ## Command ## '''
command = []
command.append(
'echo "host *;" > cqpserver.init'
' && '
'echo "user anonymous \\"\\";" >> cqpserver.init'
' && '
'cqpserver -I cqpserver.init'
)
''' ## Detach ## '''
detach = True
''' ## Entrypoint ## '''
entrypoint = ['bash', '-c']
''' ## Image ## '''
image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1887'
''' ## Name ## '''
name = f'nopaque-cqpserver-{corpus.id}'
''' ## Network ## '''
network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
''' ## Volumes ## '''
volumes = []
''' ### Corpus data volume ### '''
data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
data_volume_target = '/corpora/data'
# data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
# data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
data_volume = f'{data_volume_source}:{data_volume_target}:rw'
volumes.append(data_volume)
''' ### Corpus registry volume ### '''
registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
registry_volume_target = '/usr/local/share/cwb/registry'
# registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
# registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
volumes.append(registry_volume)
# Check if a cqpserver container already exists. If this is the case,
# remove it and create a new one
try:
container = docker_client.containers.get(name)
except docker.errors.NotFound:
pass
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{name}" failed: {e}')
return
else:
try:
container.remove(force=True)
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove container "{name}" failed: {e}')
return
try:
docker_client.containers.run(
image,
command=command,
detach=detach,
entrypoint=entrypoint,
name=name,
network=network,
user='0:0',
volumes=volumes
)
except docker.errors.ImageNotFound as e:
current_app.logger.error(
f'Run container "{name}" failed '
f'due to "docker.errors.ImageNotFound" error: {e}'
)
corpus.status = CorpusStatus.FAILED
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Run container "{name}" failed: {e}')
return
corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
def _checkout_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
try:
docker_client.containers.get(container_name)
except docker.errors.NotFound as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
corpus.num_analysis_sessions = 0
corpus.status = CorpusStatus.BUILT
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
def _remove_cqpserver_container(corpus: Corpus):
container_name = f'nopaque-cqpserver-{corpus.id}'
try:
container = docker_client.containers.get(container_name)
except docker.errors.NotFound:
corpus.status = CorpusStatus.BUILT
return
except docker.errors.DockerException as e:
current_app.logger.error(f'Get container "{container_name}" failed: {e}')
return
try:
container.remove(force=True)
except docker.errors.DockerException as e:
current_app.logger.error(f'Remove container "{container_name}" failed: {e}')