mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-04 04:12:45 +00:00 
			
		
		
		
	move blueprints in dedicated folder
This commit is contained in:
		@@ -1,18 +1,2 @@
 | 
			
		||||
from flask import Blueprint
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bp = Blueprint('jobs', __name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.before_request
 | 
			
		||||
@login_required
 | 
			
		||||
def before_request():
 | 
			
		||||
    '''
 | 
			
		||||
    Ensures that the routes in this package can only be visited by users that
 | 
			
		||||
    are logged in.
 | 
			
		||||
    '''
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from . import routes, json_routes
 | 
			
		||||
from .handle_corpora import job as handle_corpora
 | 
			
		||||
from .handle_jobs import job as handle_jobs
 | 
			
		||||
 
 | 
			
		||||
@@ -1,138 +0,0 @@
 | 
			
		||||
from flask import current_app
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_socketio import Namespace
 | 
			
		||||
from app import db, hashids, socketio
 | 
			
		||||
from app.extensions.flask_socketio import admin_required, login_required
 | 
			
		||||
from app.models import Job, JobStatus
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class JobsNamespace(Namespace):
 | 
			
		||||
    @login_required
 | 
			
		||||
    def on_delete(self, job_hashid: str):
 | 
			
		||||
        # Decode the job hashid
 | 
			
		||||
        job_id = hashids.decode(job_hashid)
 | 
			
		||||
 | 
			
		||||
        # Validate job_id
 | 
			
		||||
        if not isinstance(job_id, int):
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 400,
 | 
			
		||||
                'body': 'job_id is invalid'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Load job from database
 | 
			
		||||
        job = Job.query.get(job_id)
 | 
			
		||||
        if job is None:
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 404,
 | 
			
		||||
                'body': 'Job not found'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Check if the current user is allowed to delete the job
 | 
			
		||||
        if not (job.user == current_user or current_user.is_administrator):
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 403,
 | 
			
		||||
                'body': 'Forbidden'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # TODO: This should be a method in the Job model
 | 
			
		||||
        def _delete_job(app, job_id):
 | 
			
		||||
            with app.app_context():
 | 
			
		||||
                job = Job.query.get(job_id)
 | 
			
		||||
                job.delete()
 | 
			
		||||
                db.session.commit()
 | 
			
		||||
 | 
			
		||||
        # Delete the job in a background task
 | 
			
		||||
        socketio.start_background_task(
 | 
			
		||||
            target=_delete_job,
 | 
			
		||||
            app=current_app._get_current_object(),
 | 
			
		||||
            job_id=job_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 202,
 | 
			
		||||
            'body': f'Job "{job.title}" marked for deletion'
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @admin_required
 | 
			
		||||
    def on_get_log(self, job_hashid: str):
 | 
			
		||||
        # Decode the job hashid
 | 
			
		||||
        job_id = hashids.decode(job_hashid)
 | 
			
		||||
 | 
			
		||||
        # Validate job_id
 | 
			
		||||
        if not isinstance(job_id, int):
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 400,
 | 
			
		||||
                'body': 'job_id is invalid'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Load job from database
 | 
			
		||||
        job = Job.query.get(job_id)
 | 
			
		||||
        if job is None:
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 404,
 | 
			
		||||
                'body': 'Job not found'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Check if the job is already processed
 | 
			
		||||
        if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 409,
 | 
			
		||||
                'body': 'Job is not done processing'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Read the log file
 | 
			
		||||
        with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
 | 
			
		||||
            job_log = log_file.read()
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 200,
 | 
			
		||||
            'body': job_log
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @login_required
 | 
			
		||||
    def on_restart(self, job_hashid: str):
 | 
			
		||||
        # Decode the job hashid
 | 
			
		||||
        job_id = hashids.decode(job_hashid)
 | 
			
		||||
 | 
			
		||||
        # Validate job_id
 | 
			
		||||
        if not isinstance(job_id, int):
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 400,
 | 
			
		||||
                'body': 'job_id is invalid'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Load job from database
 | 
			
		||||
        job = Job.query.get(job_id)
 | 
			
		||||
        if job is None:
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 404,
 | 
			
		||||
                'body': 'Job not found'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # Check if the current user is allowed to restart the job
 | 
			
		||||
        if not (job.user == current_user or current_user.is_administrator):
 | 
			
		||||
            return {
 | 
			
		||||
                'code': 403,
 | 
			
		||||
                'body': 'Forbidden'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        # TODO: This should be a method in the Job model
 | 
			
		||||
        def _restart_job(app, job_id):
 | 
			
		||||
            with app.app_context():
 | 
			
		||||
                job = Job.query.get(job_id)
 | 
			
		||||
                job.restart()
 | 
			
		||||
                db.session.commit()
 | 
			
		||||
 | 
			
		||||
        # Restart the job in a background task
 | 
			
		||||
        socketio.start_background_task(
 | 
			
		||||
            target=_restart_job,
 | 
			
		||||
            app=current_app._get_current_object(),
 | 
			
		||||
            job_id=job_id
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'code': 202,
 | 
			
		||||
            'body': f'Job "{job.title}" restarted'
 | 
			
		||||
        }
 | 
			
		||||
							
								
								
									
										230
									
								
								app/jobs/handle_corpora.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										230
									
								
								app/jobs/handle_corpora.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,230 @@
 | 
			
		||||
from app import db, docker_client, scheduler
 | 
			
		||||
from app.models import Corpus, CorpusStatus
 | 
			
		||||
from flask import current_app
 | 
			
		||||
import docker
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def job():
 | 
			
		||||
    with scheduler.app.app_context():
 | 
			
		||||
        _handle_corpora()
 | 
			
		||||
 | 
			
		||||
def _handle_corpora():
 | 
			
		||||
    corpora = Corpus.query.all()
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.SUBMITTED]:
 | 
			
		||||
        _create_build_corpus_service(corpus)
 | 
			
		||||
    for corpus in [x for x in corpora if x.status in [CorpusStatus.QUEUED, CorpusStatus.BUILDING]]:
 | 
			
		||||
        _checkout_build_corpus_service(corpus)
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.BUILT and x.num_analysis_sessions > 0]:
 | 
			
		||||
        corpus.status = CorpusStatus.STARTING_ANALYSIS_SESSION
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION and x.num_analysis_sessions == 0]:
 | 
			
		||||
        corpus.status = CorpusStatus.CANCELING_ANALYSIS_SESSION
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.RUNNING_ANALYSIS_SESSION]:
 | 
			
		||||
        _checkout_analysing_corpus_container(corpus)
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.STARTING_ANALYSIS_SESSION]:
 | 
			
		||||
        _create_cqpserver_container(corpus)
 | 
			
		||||
    for corpus in [x for x in corpora if x.status == CorpusStatus.CANCELING_ANALYSIS_SESSION]:
 | 
			
		||||
        _remove_cqpserver_container(corpus)
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
def _create_build_corpus_service(corpus):
 | 
			
		||||
    ''' # Docker service settings # '''
 | 
			
		||||
    ''' ## Command ## '''
 | 
			
		||||
    command = ['bash', '-c']
 | 
			
		||||
    command.append(
 | 
			
		||||
        f'mkdir /corpora/data/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' && '
 | 
			
		||||
        'cwb-encode'
 | 
			
		||||
        ' -c utf8'
 | 
			
		||||
        f' -d /corpora/data/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' -f /root/files/corpus.vrt'
 | 
			
		||||
        f' -R /usr/local/share/cwb/registry/nopaque-{corpus.hashid.lower()}'
 | 
			
		||||
        ' -P pos -P lemma -P simple_pos'
 | 
			
		||||
        ' -S ent:0+type -S s:0'
 | 
			
		||||
        ' -S text:0+address+author+booktitle+chapter+editor+institution+journal+pages+publisher+publishing_year+school+title'
 | 
			
		||||
        ' -xsB -9'
 | 
			
		||||
        ' && '
 | 
			
		||||
        f'cwb-make -V NOPAQUE-{corpus.hashid.upper()}'
 | 
			
		||||
    )
 | 
			
		||||
    ''' ## Constraints ## '''
 | 
			
		||||
    constraints = ['node.role==worker']
 | 
			
		||||
    ''' ## Image ## '''
 | 
			
		||||
    image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
 | 
			
		||||
    ''' ## Labels ## '''
 | 
			
		||||
    labels = {
 | 
			
		||||
        'origin': current_app.config['SERVER_NAME'],
 | 
			
		||||
        'type': 'corpus.build',
 | 
			
		||||
        'corpus_id': str(corpus.id)
 | 
			
		||||
    }
 | 
			
		||||
    ''' ## Mounts ## '''
 | 
			
		||||
    mounts = []
 | 
			
		||||
    ''' ### Data mount ### '''
 | 
			
		||||
    data_mount_source = os.path.join(corpus.path, 'cwb', 'data')
 | 
			
		||||
    data_mount_target = '/corpora/data'
 | 
			
		||||
    data_mount = f'{data_mount_source}:{data_mount_target}:rw'
 | 
			
		||||
    # Make sure that their is no data in the data directory
 | 
			
		||||
    shutil.rmtree(data_mount_source, ignore_errors=True)
 | 
			
		||||
    os.makedirs(data_mount_source)
 | 
			
		||||
    mounts.append(data_mount)
 | 
			
		||||
    ''' ### File mount ### '''
 | 
			
		||||
    file_mount_source = os.path.join(corpus.path, 'cwb', 'corpus.vrt')
 | 
			
		||||
    file_mount_target = '/root/files/corpus.vrt'
 | 
			
		||||
    file_mount = f'{file_mount_source}:{file_mount_target}:ro'
 | 
			
		||||
    mounts.append(file_mount)
 | 
			
		||||
    ''' ### Registry mount ### '''
 | 
			
		||||
    registry_mount_source = os.path.join(corpus.path, 'cwb', 'registry')
 | 
			
		||||
    registry_mount_target = '/usr/local/share/cwb/registry'
 | 
			
		||||
    registry_mount = f'{registry_mount_source}:{registry_mount_target}:rw'
 | 
			
		||||
    # Make sure that their is no data in the registry directory
 | 
			
		||||
    shutil.rmtree(registry_mount_source, ignore_errors=True)
 | 
			
		||||
    os.makedirs(registry_mount_source)
 | 
			
		||||
    mounts.append(registry_mount)
 | 
			
		||||
    ''' ## Name ## '''
 | 
			
		||||
    name = f'build-corpus_{corpus.id}'
 | 
			
		||||
    ''' ## Restart policy ## '''
 | 
			
		||||
    restart_policy = docker.types.RestartPolicy()
 | 
			
		||||
    try:
 | 
			
		||||
        docker_client.services.create(
 | 
			
		||||
            image,
 | 
			
		||||
            command=command,
 | 
			
		||||
            constraints=constraints,
 | 
			
		||||
            labels=labels,
 | 
			
		||||
            mounts=mounts,
 | 
			
		||||
            name=name,
 | 
			
		||||
            restart_policy=restart_policy,
 | 
			
		||||
            user='0:0'
 | 
			
		||||
        )
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Create service "{name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    corpus.status = CorpusStatus.QUEUED
 | 
			
		||||
 | 
			
		||||
def _checkout_build_corpus_service(corpus):
 | 
			
		||||
    service_name = f'build-corpus_{corpus.id}'
 | 
			
		||||
    try:
 | 
			
		||||
        service = docker_client.services.get(service_name)
 | 
			
		||||
    except docker.errors.NotFound as e:
 | 
			
		||||
        current_app.logger.error(f'Get service "{service_name}" failed: {e}')
 | 
			
		||||
        corpus.status = CorpusStatus.FAILED
 | 
			
		||||
        return
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get service "{service_name}" failed: {e}')
 | 
			
		||||
    service_tasks = service.tasks()
 | 
			
		||||
    if not service_tasks:
 | 
			
		||||
        return
 | 
			
		||||
    task_state = service_tasks[0].get('Status').get('State')
 | 
			
		||||
    if corpus.status == CorpusStatus.QUEUED and task_state != 'pending':
 | 
			
		||||
        corpus.status = CorpusStatus.BUILDING
 | 
			
		||||
        return
 | 
			
		||||
    elif corpus.status == CorpusStatus.BUILDING and task_state == 'complete':
 | 
			
		||||
        corpus.status = CorpusStatus.BUILT
 | 
			
		||||
    elif corpus.status == CorpusStatus.BUILDING and task_state == 'failed':
 | 
			
		||||
        corpus.status = CorpusStatus.FAILED
 | 
			
		||||
    else:
 | 
			
		||||
        return
 | 
			
		||||
    try:
 | 
			
		||||
        service.remove()
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
 | 
			
		||||
 | 
			
		||||
def _create_cqpserver_container(corpus):
 | 
			
		||||
    ''' # Docker container settings # '''
 | 
			
		||||
    ''' ## Command ## '''
 | 
			
		||||
    command = []
 | 
			
		||||
    command.append(
 | 
			
		||||
        'echo "host *;" > cqpserver.init'
 | 
			
		||||
        ' && '
 | 
			
		||||
        'echo "user anonymous \\"\\";" >> cqpserver.init'
 | 
			
		||||
        ' && '
 | 
			
		||||
        'cqpserver -I cqpserver.init'
 | 
			
		||||
    )
 | 
			
		||||
    ''' ## Detach ## '''
 | 
			
		||||
    detach = True
 | 
			
		||||
    ''' ## Entrypoint ## '''
 | 
			
		||||
    entrypoint = ['bash', '-c']
 | 
			
		||||
    ''' ## Image ## '''
 | 
			
		||||
    image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}cwb:r1879'
 | 
			
		||||
    ''' ## Name ## '''
 | 
			
		||||
    name = f'cqpserver_{corpus.id}'
 | 
			
		||||
    ''' ## Network ## '''
 | 
			
		||||
    network = f'{current_app.config["NOPAQUE_DOCKER_NETWORK_NAME"]}'
 | 
			
		||||
    ''' ## Volumes ## '''
 | 
			
		||||
    volumes = []
 | 
			
		||||
    ''' ### Corpus data volume ### '''
 | 
			
		||||
    data_volume_source = os.path.join(corpus.path, 'cwb', 'data')
 | 
			
		||||
    data_volume_target = '/corpora/data'
 | 
			
		||||
    # data_volume_source = os.path.join(corpus.path, 'cwb', 'data', f'nopaque_{corpus.id}')
 | 
			
		||||
    # data_volume_target = f'/corpora/data/nopaque_{corpus.hashid.lower()}'
 | 
			
		||||
    data_volume = f'{data_volume_source}:{data_volume_target}:rw'
 | 
			
		||||
    volumes.append(data_volume)
 | 
			
		||||
    ''' ### Corpus registry volume ### '''
 | 
			
		||||
    registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry')
 | 
			
		||||
    registry_volume_target = '/usr/local/share/cwb/registry'
 | 
			
		||||
    # registry_volume_source = os.path.join(corpus.path, 'cwb', 'registry', f'nopaque_{corpus.id}')
 | 
			
		||||
    # registry_volume_target = f'/usr/local/share/cwb/registry/nopaque_{corpus.hashid.lower()}'
 | 
			
		||||
    registry_volume = f'{registry_volume_source}:{registry_volume_target}:rw'
 | 
			
		||||
    volumes.append(registry_volume)
 | 
			
		||||
    # Check if a cqpserver container already exists. If this is the case,
 | 
			
		||||
    # remove it and create a new one
 | 
			
		||||
    try:
 | 
			
		||||
        container = docker_client.containers.get(name)
 | 
			
		||||
    except docker.errors.NotFound:
 | 
			
		||||
        pass
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get container "{name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    else:
 | 
			
		||||
        try:
 | 
			
		||||
            container.remove(force=True)
 | 
			
		||||
        except docker.errors.DockerException as e:
 | 
			
		||||
            current_app.logger.error(f'Remove container "{name}" failed: {e}')
 | 
			
		||||
            return
 | 
			
		||||
    try:
 | 
			
		||||
        docker_client.containers.run(
 | 
			
		||||
            image,
 | 
			
		||||
            command=command,
 | 
			
		||||
            detach=detach,
 | 
			
		||||
            entrypoint=entrypoint,
 | 
			
		||||
            name=name,
 | 
			
		||||
            network=network,
 | 
			
		||||
            user='0:0',
 | 
			
		||||
            volumes=volumes
 | 
			
		||||
        )
 | 
			
		||||
    except docker.errors.ImageNotFound as e:
 | 
			
		||||
        current_app.logger.error(
 | 
			
		||||
            f'Run container "{name}" failed '
 | 
			
		||||
            f'due to "docker.errors.ImageNotFound" error: {e}'
 | 
			
		||||
        )
 | 
			
		||||
        corpus.status = CorpusStatus.FAILED
 | 
			
		||||
        return
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Run container "{name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    corpus.status = CorpusStatus.RUNNING_ANALYSIS_SESSION
 | 
			
		||||
 | 
			
		||||
def _checkout_analysing_corpus_container(corpus):
 | 
			
		||||
    container_name = f'cqpserver_{corpus.id}'
 | 
			
		||||
    try:
 | 
			
		||||
        docker_client.containers.get(container_name)
 | 
			
		||||
    except docker.errors.NotFound as e:
 | 
			
		||||
        current_app.logger.error(f'Get container "{container_name}" failed: {e}')
 | 
			
		||||
        corpus.num_analysis_sessions = 0
 | 
			
		||||
        corpus.status = CorpusStatus.BUILT
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get container "{container_name}" failed: {e}')
 | 
			
		||||
 | 
			
		||||
def _remove_cqpserver_container(corpus):
 | 
			
		||||
    container_name = f'cqpserver_{corpus.id}'
 | 
			
		||||
    try:
 | 
			
		||||
        container = docker_client.containers.get(container_name)
 | 
			
		||||
    except docker.errors.NotFound:
 | 
			
		||||
        corpus.status = CorpusStatus.BUILT
 | 
			
		||||
        return
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get container "{container_name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    try:
 | 
			
		||||
        container.remove(force=True)
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Remove container "{container_name}" failed: {e}')
 | 
			
		||||
							
								
								
									
										239
									
								
								app/jobs/handle_jobs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										239
									
								
								app/jobs/handle_jobs.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,239 @@
 | 
			
		||||
from app import db, docker_client, hashids, scheduler
 | 
			
		||||
from app.models import (
 | 
			
		||||
    Job,
 | 
			
		||||
    JobResult,
 | 
			
		||||
    JobStatus,
 | 
			
		||||
    TesseractOCRPipelineModel,
 | 
			
		||||
    SpaCyNLPPipelineModel
 | 
			
		||||
)
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from flask import current_app
 | 
			
		||||
from werkzeug.utils import secure_filename
 | 
			
		||||
import docker
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def job():
 | 
			
		||||
    with scheduler.app.app_context():
 | 
			
		||||
        _handle_jobs()
 | 
			
		||||
 | 
			
		||||
def _handle_jobs():
 | 
			
		||||
    jobs = Job.query.all()
 | 
			
		||||
    for job in [x for x in jobs if x.status == JobStatus.SUBMITTED]:
 | 
			
		||||
        _create_job_service(job)
 | 
			
		||||
    for job in [x for x in jobs if x.status in [JobStatus.QUEUED, JobStatus.RUNNING]]:
 | 
			
		||||
        _checkout_job_service(job)
 | 
			
		||||
    for job in [x for x in jobs if x.status == JobStatus.CANCELING]:
 | 
			
		||||
        _remove_job_service(job)
 | 
			
		||||
    db.session.commit()
 | 
			
		||||
 | 
			
		||||
def _create_job_service(job):
 | 
			
		||||
    ''' # Docker service settings # '''
 | 
			
		||||
    ''' ## Service specific settings ## '''
 | 
			
		||||
    if job.service == 'file-setup-pipeline':
 | 
			
		||||
        mem_mb = 512
 | 
			
		||||
        n_cores = 2
 | 
			
		||||
        executable = 'file-setup-pipeline'
 | 
			
		||||
        image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}file-setup-pipeline:v{job.service_version}'
 | 
			
		||||
    elif job.service == 'tesseract-ocr-pipeline':
 | 
			
		||||
        mem_mb = 1024
 | 
			
		||||
        n_cores = 4
 | 
			
		||||
        executable = 'tesseract-ocr-pipeline'
 | 
			
		||||
        image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}tesseract-ocr-pipeline:v{job.service_version}'
 | 
			
		||||
    elif job.service == 'transkribus-htr-pipeline':
 | 
			
		||||
        mem_mb = 1024
 | 
			
		||||
        n_cores = 4
 | 
			
		||||
        executable = 'transkribus-htr-pipeline'
 | 
			
		||||
        image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}transkribus-htr-pipeline:v{job.service_version}'
 | 
			
		||||
    elif job.service == 'spacy-nlp-pipeline':
 | 
			
		||||
        mem_mb = 1024
 | 
			
		||||
        n_cores = 1
 | 
			
		||||
        executable = 'spacy-nlp-pipeline'
 | 
			
		||||
        image = f'{current_app.config["NOPAQUE_DOCKER_IMAGE_PREFIX"]}spacy-nlp-pipeline:v{job.service_version}'
 | 
			
		||||
    ''' ## Command ## '''
 | 
			
		||||
    command = f'{executable} -i /input -o /output'
 | 
			
		||||
    command += ' --log-dir /logs'
 | 
			
		||||
    command += f' --mem-mb {mem_mb}'
 | 
			
		||||
    command += f' --n-cores {n_cores}'
 | 
			
		||||
    if job.service == 'spacy-nlp-pipeline':
 | 
			
		||||
        model_id = hashids.decode(job.service_args['model'])
 | 
			
		||||
        model = SpaCyNLPPipelineModel.query.get(model_id)
 | 
			
		||||
        if model is None:
 | 
			
		||||
            job.status = JobStatus.FAILED
 | 
			
		||||
            return
 | 
			
		||||
        command += f' -m {model.pipeline_name}'
 | 
			
		||||
        if 'encoding_detection' in job.service_args and job.service_args['encoding_detection']:
 | 
			
		||||
            command += ' --check-encoding'
 | 
			
		||||
    elif job.service == 'tesseract-ocr-pipeline':
 | 
			
		||||
        command += f' -m {job.service_args["model"]}'
 | 
			
		||||
        if 'binarization' in job.service_args and job.service_args['binarization']:
 | 
			
		||||
            command += ' --binarize'
 | 
			
		||||
        if 'ocropus_nlbin_threshold' in job.service_args and job.service_args['ocropus_nlbin_threshold']:
 | 
			
		||||
            value = job.service_args['ocropus_nlbin_threshold']
 | 
			
		||||
            command += f' --ocropus-nlbin-threshold {value}'
 | 
			
		||||
    elif job.service == 'transkribus-htr-pipeline':
 | 
			
		||||
        transkribus_htr_pipeline_model_id = job.service_args['model']
 | 
			
		||||
        command += f' -m {transkribus_htr_pipeline_model_id}'
 | 
			
		||||
        readcoop_username = current_app.config.get('NOPAQUE_READCOOP_USERNAME')
 | 
			
		||||
        command += f' --readcoop-username "{readcoop_username}"'
 | 
			
		||||
        readcoop_password = current_app.config.get('NOPAQUE_READCOOP_PASSWORD')
 | 
			
		||||
        command += f' --readcoop-password "{readcoop_password}"'
 | 
			
		||||
        if 'binarization' in job.service_args and job.service_args['binarization']:
 | 
			
		||||
            command += ' --binarize'
 | 
			
		||||
    ''' ## Constraints ## '''
 | 
			
		||||
    constraints = ['node.role==worker']
 | 
			
		||||
    ''' ## Labels ## '''
 | 
			
		||||
    labels = {
 | 
			
		||||
        'origin': current_app.config['SERVER_NAME'],
 | 
			
		||||
        'type': 'job',
 | 
			
		||||
        'job_id': str(job.id)
 | 
			
		||||
    }
 | 
			
		||||
    ''' ## Mounts ## '''
 | 
			
		||||
    mounts = []
 | 
			
		||||
    ''' ### Input mount(s) ### '''
 | 
			
		||||
    input_mount_target_base = '/input'
 | 
			
		||||
    if job.service == 'file-setup-pipeline':
 | 
			
		||||
        input_mount_target_base += f'/{secure_filename(job.title)}'
 | 
			
		||||
    for job_input in job.inputs:
 | 
			
		||||
        input_mount_source = job_input.path
 | 
			
		||||
        input_mount_target = f'{input_mount_target_base}/{job_input.filename}'
 | 
			
		||||
        input_mount = f'{input_mount_source}:{input_mount_target}:ro'
 | 
			
		||||
        mounts.append(input_mount)
 | 
			
		||||
    if job.service == 'tesseract-ocr-pipeline':
 | 
			
		||||
        if isinstance(job.service_args['model'], str):
 | 
			
		||||
            model_id = hashids.decode(job.service_args['model'])
 | 
			
		||||
        elif isinstance(job.service_args['model'], int):
 | 
			
		||||
            model_id = job.service_args['model']
 | 
			
		||||
        else:
 | 
			
		||||
            job.status = JobStatus.FAILED
 | 
			
		||||
            return
 | 
			
		||||
        model = TesseractOCRPipelineModel.query.get(model_id)
 | 
			
		||||
        if model is None:
 | 
			
		||||
            job.status = JobStatus.FAILED
 | 
			
		||||
            return
 | 
			
		||||
        models_mount_source = model.path
 | 
			
		||||
        models_mount_target = f'/usr/local/share/tessdata/{model.id}.traineddata'
 | 
			
		||||
        models_mount = f'{models_mount_source}:{models_mount_target}:ro'
 | 
			
		||||
        mounts.append(models_mount)
 | 
			
		||||
    elif job.service == 'spacy-nlp-pipeline':
 | 
			
		||||
        model_id = hashids.decode(job.service_args['model'])
 | 
			
		||||
        model = SpaCyNLPPipelineModel.query.get(model_id)
 | 
			
		||||
        if model is None:
 | 
			
		||||
            job.status = JobStatus.FAILED
 | 
			
		||||
            return
 | 
			
		||||
        models_mount_source = model.path
 | 
			
		||||
        models_mount_target = f'/usr/local/share/spacy/models/{model.filename}'
 | 
			
		||||
        models_mount = f'{models_mount_source}:{models_mount_target}:ro'
 | 
			
		||||
        mounts.append(models_mount)
 | 
			
		||||
    ''' ### Output mount ### '''
 | 
			
		||||
    output_mount_source = os.path.join(job.path, 'results')
 | 
			
		||||
    output_mount_target = '/output'
 | 
			
		||||
    output_mount = f'{output_mount_source}:{output_mount_target}:rw'
 | 
			
		||||
    # Make sure that their is no data in the output directory
 | 
			
		||||
    shutil.rmtree(output_mount_source, ignore_errors=True)
 | 
			
		||||
    os.makedirs(output_mount_source)
 | 
			
		||||
    mounts.append(output_mount)
 | 
			
		||||
    ''' ### Pipeline data mount ### '''
 | 
			
		||||
    pyflow_data_mount_source = os.path.join(job.path, 'pipeline_data')
 | 
			
		||||
    pyflow_data_mount_target = '/logs/pyflow.data'
 | 
			
		||||
    pyflow_data_mount = f'{pyflow_data_mount_source}:{pyflow_data_mount_target}:rw'
 | 
			
		||||
    # Make sure that their is no data in the output directory
 | 
			
		||||
    shutil.rmtree(pyflow_data_mount_source, ignore_errors=True)
 | 
			
		||||
    os.makedirs(pyflow_data_mount_source)
 | 
			
		||||
    mounts.append(pyflow_data_mount)
 | 
			
		||||
    ''' ## Name ## '''
 | 
			
		||||
    name = f'job_{job.id}'
 | 
			
		||||
    ''' ## Resources ## '''
 | 
			
		||||
    resources = docker.types.Resources(
 | 
			
		||||
        cpu_reservation=n_cores * (10 ** 9),
 | 
			
		||||
        mem_reservation=mem_mb * (10 ** 6)
 | 
			
		||||
    )
 | 
			
		||||
    ''' ## Restart policy ## '''
 | 
			
		||||
    restart_policy = docker.types.RestartPolicy()
 | 
			
		||||
    try:
 | 
			
		||||
        docker_client.services.create(
 | 
			
		||||
            image,
 | 
			
		||||
            command=command,
 | 
			
		||||
            constraints=constraints,
 | 
			
		||||
            labels=labels,
 | 
			
		||||
            mounts=mounts,
 | 
			
		||||
            name=name,
 | 
			
		||||
            resources=resources,
 | 
			
		||||
            restart_policy=restart_policy,
 | 
			
		||||
            user='0:0'
 | 
			
		||||
        )
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Create service "{name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    job.status = JobStatus.QUEUED
 | 
			
		||||
 | 
			
		||||
def _checkout_job_service(job):
 | 
			
		||||
    service_name = f'job_{job.id}'
 | 
			
		||||
    try:
 | 
			
		||||
        service = docker_client.services.get(service_name)
 | 
			
		||||
    except docker.errors.NotFound as e:
 | 
			
		||||
        current_app.logger.error(f'Get service "{service_name}" failed: {e}')
 | 
			
		||||
        job.status = JobStatus.FAILED
 | 
			
		||||
        return
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get service "{service_name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    service_tasks = service.tasks()
 | 
			
		||||
    if not service_tasks:
 | 
			
		||||
        return
 | 
			
		||||
    task_state = service_tasks[0].get('Status').get('State')
 | 
			
		||||
    if job.status == JobStatus.QUEUED and task_state != 'pending':
 | 
			
		||||
        job.status = JobStatus.RUNNING
 | 
			
		||||
        return
 | 
			
		||||
    elif job.status == JobStatus.RUNNING and task_state == 'complete':
 | 
			
		||||
        job.status = JobStatus.COMPLETED
 | 
			
		||||
        results_dir = os.path.join(job.path, 'results')
 | 
			
		||||
        with open(os.path.join(results_dir, 'outputs.json')) as f:
 | 
			
		||||
            outputs = json.load(f)
 | 
			
		||||
        for output in outputs:
 | 
			
		||||
            filename = os.path.basename(output['file'])
 | 
			
		||||
            job_result = JobResult(
 | 
			
		||||
                filename=filename,
 | 
			
		||||
                job=job,
 | 
			
		||||
                mimetype=output['mimetype']
 | 
			
		||||
            )
 | 
			
		||||
            if 'description' in output:
 | 
			
		||||
                job_result.description = output['description']
 | 
			
		||||
            db.session.add(job_result)
 | 
			
		||||
            db.session.flush(objects=[job_result])
 | 
			
		||||
            db.session.refresh(job_result)
 | 
			
		||||
            os.rename(
 | 
			
		||||
                os.path.join(results_dir, output['file']),
 | 
			
		||||
                job_result.path
 | 
			
		||||
            )
 | 
			
		||||
    elif job.status == JobStatus.RUNNING and task_state == 'failed':
 | 
			
		||||
        job.status = JobStatus.FAILED
 | 
			
		||||
    else:
 | 
			
		||||
        return
 | 
			
		||||
    job.end_date = datetime.utcnow()
 | 
			
		||||
    try:
 | 
			
		||||
        service.remove()
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Remove service "{service_name}" failed: {e}')
 | 
			
		||||
 | 
			
		||||
def _remove_job_service(job):
 | 
			
		||||
    service_name = f'job_{job.id}'
 | 
			
		||||
    try:
 | 
			
		||||
        service = docker_client.services.get(service_name)
 | 
			
		||||
    except docker.errors.NotFound:
 | 
			
		||||
        job.status = JobStatus.CANCELED
 | 
			
		||||
        return
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Get service "{service_name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    try:
 | 
			
		||||
        service.update(mounts=None)
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Update service "{service_name}" failed: {e}')
 | 
			
		||||
        return
 | 
			
		||||
    try:
 | 
			
		||||
        service.remove()
 | 
			
		||||
    except docker.errors.DockerException as e:
 | 
			
		||||
        current_app.logger.error(f'Remove "{service_name}" service failed: {e}')
 | 
			
		||||
@@ -1,72 +0,0 @@
 | 
			
		||||
from flask import abort, current_app
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from threading import Thread
 | 
			
		||||
from app import db
 | 
			
		||||
from app.decorators import admin_required, content_negotiation
 | 
			
		||||
from app.models import Job, JobStatus
 | 
			
		||||
from . import bp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>', methods=['DELETE'])
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def delete_job(job_id):
 | 
			
		||||
    def _delete_job(app, job_id):
 | 
			
		||||
        with app.app_context():
 | 
			
		||||
            job = Job.query.get(job_id)
 | 
			
		||||
            job.delete()
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
    job = Job.query.get_or_404(job_id)
 | 
			
		||||
    if not (job.user == current_user or current_user.is_administrator):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_delete_job,
 | 
			
		||||
        args=(current_app._get_current_object(), job_id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'message': f'Job "{job.title}" marked for deletion'
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>/log')
 | 
			
		||||
@admin_required
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def job_log(job_id):
 | 
			
		||||
    job = Job.query.get_or_404(job_id)
 | 
			
		||||
    if job.status not in [JobStatus.COMPLETED, JobStatus.FAILED]:
 | 
			
		||||
        response = {'errors': {'message': 'Job status is not completed or failed'}}
 | 
			
		||||
        return response, 409
 | 
			
		||||
    with open(job.path / 'pipeline_data' / 'logs' / 'pyflow_log.txt') as log_file:
 | 
			
		||||
        log = log_file.read()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'jobLog': log
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 200
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>/restart', methods=['POST'])
 | 
			
		||||
@content_negotiation(produces='application/json')
 | 
			
		||||
def restart_job(job_id):
 | 
			
		||||
    def _restart_job(app, job_id):
 | 
			
		||||
        with app.app_context():
 | 
			
		||||
            job = Job.query.get(job_id)
 | 
			
		||||
            job.restart()
 | 
			
		||||
            db.session.commit()
 | 
			
		||||
 | 
			
		||||
    job = Job.query.get_or_404(job_id)
 | 
			
		||||
    if not (job.user == current_user or current_user.is_administrator):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    if job.status == JobStatus.FAILED:
 | 
			
		||||
        response = {'errors': {'message': 'Job status is not "failed"'}}
 | 
			
		||||
        return response, 409
 | 
			
		||||
    thread = Thread(
 | 
			
		||||
        target=_restart_job,
 | 
			
		||||
        args=(current_app._get_current_object(), job_id)
 | 
			
		||||
    )
 | 
			
		||||
    thread.start()
 | 
			
		||||
    response_data = {
 | 
			
		||||
        'message': f'Job "{job.title}" marked for restarting'
 | 
			
		||||
    }
 | 
			
		||||
    return response_data, 202
 | 
			
		||||
@@ -1,55 +0,0 @@
 | 
			
		||||
from flask import (
 | 
			
		||||
    abort,
 | 
			
		||||
    redirect,
 | 
			
		||||
    render_template,
 | 
			
		||||
    send_from_directory,
 | 
			
		||||
    url_for
 | 
			
		||||
)
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from app.models import Job, JobInput, JobResult
 | 
			
		||||
from . import bp
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('')
 | 
			
		||||
def jobs():
 | 
			
		||||
    return redirect(url_for('main.dashboard', _anchor='jobs'))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>')
 | 
			
		||||
def job(job_id):
 | 
			
		||||
    job = Job.query.get_or_404(job_id)
 | 
			
		||||
    if not (job.user == current_user or current_user.is_administrator):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return render_template(
 | 
			
		||||
        'jobs/job.html.j2',
 | 
			
		||||
        title='Job',
 | 
			
		||||
        job=job
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>/inputs/<hashid:job_input_id>/download')
 | 
			
		||||
def download_job_input(job_id, job_input_id):
 | 
			
		||||
    job_input = JobInput.query.filter_by(job_id=job_id, id=job_input_id).first_or_404()
 | 
			
		||||
    if not (job_input.job.user == current_user or current_user.is_administrator):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        job_input.path.parent,
 | 
			
		||||
        job_input.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        download_name=job_input.filename,
 | 
			
		||||
        mimetype=job_input.mimetype
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@bp.route('/<hashid:job_id>/results/<hashid:job_result_id>/download')
 | 
			
		||||
def download_job_result(job_id, job_result_id):
 | 
			
		||||
    job_result = JobResult.query.filter_by(job_id=job_id, id=job_result_id).first_or_404()
 | 
			
		||||
    if not (job_result.job.user == current_user or current_user.is_administrator):
 | 
			
		||||
        abort(403)
 | 
			
		||||
    return send_from_directory(
 | 
			
		||||
        job_result.path.parent,
 | 
			
		||||
        job_result.path.name,
 | 
			
		||||
        as_attachment=True,
 | 
			
		||||
        download_name=job_result.filename,
 | 
			
		||||
        mimetype=job_result.mimetype
 | 
			
		||||
    )
 | 
			
		||||
		Reference in New Issue
	
	Block a user