mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2024-12-25 02:44:18 +00:00
Add prototype
This commit is contained in:
parent
b8fa8f47ab
commit
86557443a2
81
ask_for_jobs
Executable file
81
ask_for_jobs
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from threading import Thread
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
|
||||
ERRORRMESSAGE = urllib.parse.quote("Bei der Verarbeitung der Daten ist ein Fehler aufgetreten.")
|
||||
VRE_MANAGER = "http://localhost:5000/vre/jobs"
|
||||
IMAGES = {
|
||||
"nlp": "gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node",
|
||||
"ocr": "gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node"
|
||||
}
|
||||
|
||||
|
||||
def manage_jobs(service):
|
||||
# Get queued jobs
|
||||
queued = json.load(
|
||||
urllib.request.urlopen(VRE_MANAGER + "?service=" + service + "&status=queued")
|
||||
)
|
||||
|
||||
# Return if no jobs are available
|
||||
if len(queued) == 0:
|
||||
return
|
||||
|
||||
# Get a list of compute nodes
|
||||
compute_nodes = subprocess.run(
|
||||
["docker", "ps", "--filter", "ancestor=" + IMAGES[service], "--format", "{{.Names}}"],
|
||||
encoding="utf-8",
|
||||
stdout=subprocess.PIPE
|
||||
).stdout.split()
|
||||
|
||||
# Filter occupied compute nodes out
|
||||
compute_nodes = [compute_node for compute_node in compute_nodes if bool(subprocess.run(["docker", "exec", compute_node, "test", "-f", "pyflow.data/active_pyflow_process.txt"]).returncode)]
|
||||
|
||||
# Return if no compute nodes are available
|
||||
if len(compute_nodes) == 0:
|
||||
return
|
||||
|
||||
# Assign jobs to compute nodes
|
||||
job_assignments = zip(queued, compute_nodes)
|
||||
|
||||
for job_assignment in job_assignments:
|
||||
# Prepare and start the assigned jobs
|
||||
jobThread = Thread(target=start_job, args=(service, job_assignment[1], job_assignment[0]))
|
||||
jobThread.start()
|
||||
|
||||
|
||||
def start_job(service, compute_node, job):
|
||||
# Update job status to "running"
|
||||
urllib.request.urlopen(
|
||||
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=running", method="PUT")
|
||||
)
|
||||
|
||||
# Start the service
|
||||
process = subprocess.run(
|
||||
["docker", "exec", compute_node, service, "-i", "/root/vre_files/jobs/" + job["id"], "-o", "/root/vre_files/jobs/" + job["id"], "-l", job["language"]],
|
||||
stderr=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL
|
||||
)
|
||||
if process.returncode != 0:
|
||||
urllib.request.urlopen(
|
||||
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=failed&report=" + ERRORRMESSAGE, method="PUT")
|
||||
)
|
||||
return
|
||||
|
||||
# Update job status to "finished"
|
||||
urllib.request.urlopen(
|
||||
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=finished", method="PUT")
|
||||
)
|
||||
|
||||
# TODO: Send E-Mail to user
|
||||
|
||||
|
||||
manage_jobs("ocr")
|
||||
manage_jobs("nlp")
|
@ -1,39 +0,0 @@
|
||||
## Manager Service:
|
||||
- Joblist
|
||||
- Thread safe
|
||||
- REST API (https://github.com/zalando/connexion)
|
||||
- CreateNewJob
|
||||
- DelteJobs
|
||||
- GetJobInfo
|
||||
- AlterJob
|
||||
- Scheduling
|
||||
- Ressource management
|
||||
- manages also files on file server
|
||||
|
||||
## Web Server
|
||||
- serves content.
|
||||
- serves forms for user request and inputs
|
||||
- has copy of Joblist to display those for the user according to requests etc.
|
||||
- talks to the Manager service
|
||||
- Users CANNOT talk directly with the manager
|
||||
- has list of all currently running user sessions (maybe used for authentication)
|
||||
|
||||
## Authentication via LDAP (not sure if we need that)
|
||||
- university internal authentication with LDAP
|
||||
- LDAP and connexion can use (OAuth 2)
|
||||
- https://ldapwiki.com/wiki/Best%20Practices%20for%20LDAP%20Security
|
||||
- https://connexion.readthedocs.io/en/latest/security.html?highlight=authentication
|
||||
- university external authentication with something else
|
||||
|
||||
|
||||
## Compute pool: Docker Cluster
|
||||
- How to handle job and resource management for user Jobs and processes.
|
||||
- gets requests and tasks from the manager
|
||||
|
||||
## File Server
|
||||
- stores user input and
|
||||
- output files
|
||||
- Maybe WebDAV
|
||||
|
||||
## Database
|
||||
- document based data base like mongoDB preferred
|
@ -1 +0,0 @@
|
||||
<mxfile modified="2019-04-17T12:49:28.623Z" host="www.draw.io" agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0" etag="JmPgRII5KerP40fNuYKp" version="10.6.3" type="device"><diagram id="u0ifZDnm1cNV0CSORuqK" name="Page-1">3VpZc9o6FP41zKQPyXhhCY+U5ba905lOaSfNU0fYAruRJSrJYPLr75Et45XgsIUbXpCOtZzlO8efBC17GET/cLT0vjIXk5ZluFHLHrUsyzLsNnwpySaRmKbdSSQL7rtalgmm/jPWQkNLQ9/FojBQMkakvywKHUYpdmRBhjhn6+KwOSPFXZdogSuCqYNIVfrgu9JLpPdWL5N/wv7CS3c2u/3kSYDSwdoS4SGXrXMie9yyh5wxmbSCaIiJ8l7ql2TeZMfTrWIcU9lkwvSPYfwcPM+eJ/aPaCxR/9Ye3+pVVoiE2uCHhwetr9ykThBrPyCIQu+jHo+5xNFORcyteQAMzAIs+QaG6Alt7ZBNsbvO3LuVeTnX2qkjkQ7pYrtyZjU0tOGvcIJVcULL6hLY9qPrr6C5kLHhiWiWCiYEiadUCvvOKiN5WQKjalasEc0ZuDIfgu7fkKUPbkWcJQMYYFrLKHsYpwFhvDCjZdn9+JMXpbvZMOUPA80N4gsZz1+qSN1wjFz4ZpRsPuS0T/T6Hxi1wFLAF8dzjoWHlS0YQKtsExhqhbvHqANit0I88ebvxJdHg+B4/BzgOI7/hjiBAnI8/KIup0DD4bGdk/CQyDaMo/bD74IXrjt0AgvhMypO44Ca1a7Y9lBgpbRPJebIkaC4QkhI4+Y+jxygsVpaVU228OlNSFEAGBkqPZZIiDXj7ofXLgWo/sJm4mbuE7BBNJ9/d3f3tsVm++IbLJNkmVDwSpRbp/JqvIhae6eWWA4wGiX3ZADsYWRCU0jOnvBQw5GymAFBgEhJhIi/oNAleB4nELAjHyjkQIslW4J07fkST8FFars10GWQcRZSFytuYmw12sOtdtO4KuHSDMtuyLCs/rkYVqfCsL4iCtSZn5lqdqxr45rdJlwTOItgIQekFOnZ+ZMnv7OqqeIiibi/tAzV8S6Ky/oRVeX7ePoDDBt8+xwfBfnF3Vux699whjnFEovjDMuFLYgzK8D0FRz0/VTCzpVXwl4l/yc+UdG/aB3c9t+uEN43KYSxbzBfqRdFw1SYTj+pg6vK7TWeuWj1/jHfu3LM96uXTIw/vfzyFxJxqa8DgeGfJAtMw7g2OpBec9akwfYtMGLOUyEBZu8RxP0rB7FZrdyaTjgsCEIK3kqOnWVMe2ipmnOCo4G6kAYvYerq5sghcGD0nRTxVXEuluBAvvmlPR13HlXnrpN2R1H+4WiT9iJf/krXgHZuFvSySaqzKQQSu5Xb8VKyga0J8dh7vQrmwRl3HzWuIiAX8k5NxFMZxwQCsCqqW4cCvcM35seXDfWX0n2zhKPESj0pf8teWqdfXOe+W1on8UJlnRiSW6OPuMqu1pPkpBVfjgDEqFNHNk6I0QOwdiiuT4jRbkOMmkbrDUF6bxbRZZntA1Ha3bPQuWFa/cnle/KLA1IXuMaaw4tGndHqufE5SmqGtscC2F4uqRm0H3OgP3tJ7TSE6w5meBm0dkoga5drYVO03pdp26WLavX6qvYMvhvAjU4gZ6jCRqEK95qWYT2yaTKcENhpgbtuZHc7RUDavQOR3SsV9HaZvp4b2TWkNoHxkZW3rsSaBUzlMLa3xlr5Imtcqsj2GkLReksoXj1vhW72H5dkePZXIXv8Hw==</diagram></mxfile>
|
Binary file not shown.
Before Width: | Height: | Size: 53 KiB |
@ -1,13 +0,0 @@
|
||||
##### books:
|
||||
- Flask By Example
|
||||
http://web.b.ebscohost.com/ehost/detail/detail?vid=3&sid=88df57d6-b0af-42aa-aeda-3f96f2d80c8e%40sessionmgr120&bdata=JnNpdGU9ZWhvc3QtbGl2ZQ%3d%3d#AN=1215157&db=nlebk
|
||||
|
||||
#### videos:
|
||||
- Learning Flask
|
||||
- https://www.lynda.com/Flask-tutorials/Learning-Flask/704154-2.html
|
||||
- Building Web APIs with Flask
|
||||
- https://www.lynda.com/Flask-tutorials/Web-API-Development-Flask/521200-2.html?
|
||||
|
||||
#### online:
|
||||
- The Flask Mega-Tutorial
|
||||
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world
|
84
start.sh
Executable file
84
start.sh
Executable file
@ -0,0 +1,84 @@
|
||||
#!/bin/bash
|
||||
|
||||
COLOR_BLUE="\033[0;34m"
|
||||
COLOR_END="\033[0m"
|
||||
COLOR_GREEN="\033[0;32m"
|
||||
PROTOTYPE_URL="http://"$(hostname -I | cut -d' ' -f1)"/vre"
|
||||
|
||||
if [ "$1" == "-clean" ]; then
|
||||
echo -e "${COLOR_BLUE}Remove existing prototype related containers...${COLOR_END}"
|
||||
docker rm -f \
|
||||
vre_manager \
|
||||
vre_nlp_node \
|
||||
vre_ocr_node \
|
||||
vre_www
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Remove existing prototype related images...${COLOR_END}"
|
||||
docker rmi \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_manager \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_www
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Remove the existing prototype related volume...${COLOR_END}"
|
||||
docker volume rm vre_files
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
exit 0;
|
||||
fi
|
||||
|
||||
echo -e "${COLOR_BLUE}Build container images from corresponding directories...${COLOR_END}"
|
||||
for dir in *; do
|
||||
if [ -d "$dir" ]; then
|
||||
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/"$dir" "$dir"
|
||||
fi
|
||||
done
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Create prototype files volume...${COLOR_END}"
|
||||
docker volume create vre_files
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Start prototype manager container...${COLOR_END}"
|
||||
docker run \
|
||||
--name vre_manager \
|
||||
-d \
|
||||
-p 5000:5000 \
|
||||
-v vre_files:/root/vre_files \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_manager:latest
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Start prototype nlp node container...${COLOR_END}"
|
||||
docker run \
|
||||
--name vre_nlp_node \
|
||||
-dit \
|
||||
-v vre_files:/root/vre_files \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node:latest
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Start prototype ocr node container...${COLOR_END}"
|
||||
docker run \
|
||||
--name vre_ocr_node \
|
||||
-dit \
|
||||
-v vre_files:/root/vre_files \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node:latest
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_BLUE}Start prototype www container...${COLOR_END}"
|
||||
docker run \
|
||||
--name vre_www \
|
||||
-d \
|
||||
-p 80:80 \
|
||||
-v vre_files:/usr/share/nginx/html/vre_files \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_www:latest
|
||||
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
|
||||
|
||||
echo -e "${COLOR_GREEN}The prototype is now completly loaded and reachable under ${PROTOTYPE_URL}!${COLOR_END}"
|
||||
|
||||
while true; do
|
||||
sleep 30s
|
||||
echo -e "${COLOR_BLUE}Ask for jobs...${COLOR_END}"
|
||||
$(dirname "$0")/ask_for_jobs
|
||||
done
|
23
vre_manager/Dockerfile
Normal file
23
vre_manager/Dockerfile
Normal file
@ -0,0 +1,23 @@
|
||||
FROM python:3-slim
|
||||
|
||||
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
RUN pip install \
|
||||
connexion[swagger-ui] \
|
||||
flask-cors
|
||||
|
||||
RUN mkdir -p \
|
||||
/root/vre_manager/request_handlers \
|
||||
/root/vre_manager/swagger
|
||||
|
||||
WORKDIR /root/vre_manager
|
||||
|
||||
COPY vre_manager.py /root/vre_manager
|
||||
COPY swagger/vre_manager.yml /root/vre_manager/swagger
|
||||
COPY request_handlers/jobs.py /root/vre_manager/request_handlers
|
||||
|
||||
ENTRYPOINT ["python", "/root/vre_manager/vre_manager.py"]
|
114
vre_manager/request_handlers/jobs.py
Normal file
114
vre_manager/request_handlers/jobs.py
Normal file
@ -0,0 +1,114 @@
|
||||
import flask
|
||||
import os
|
||||
|
||||
|
||||
UPLOAD_DIRECTORY = os.path.abspath("/root/vre_files/jobs")
|
||||
|
||||
|
||||
job_counter = 0
|
||||
jobs = []
|
||||
|
||||
|
||||
def create_job(file, language, name, service, user):
|
||||
global job_counter
|
||||
|
||||
if (service == "nlp" and file.content_type != "text/plain"):
|
||||
flask.abort(415)
|
||||
if (service == "ocr" and file.content_type != "application/pdf"):
|
||||
flask.abort(415)
|
||||
|
||||
job = {}
|
||||
job["file"] = file.filename
|
||||
job["id"] = "job-" + str(job_counter + 1)
|
||||
job["language"] = language
|
||||
job["name"] = name
|
||||
job["report"] = ""
|
||||
job["service"] = service
|
||||
job["status"] = "queued"
|
||||
job["user"] = user
|
||||
|
||||
job_directory = os.path.join(UPLOAD_DIRECTORY, job["id"])
|
||||
if not os.path.exists(job_directory):
|
||||
os.makedirs(job_directory)
|
||||
file.save(os.path.join(job_directory, file.filename))
|
||||
|
||||
jobs.append(job)
|
||||
job_counter += 1
|
||||
|
||||
return flask.make_response(flask.jsonify(job), 201)
|
||||
|
||||
|
||||
def delete_job(id):
|
||||
job_directory = os.path.join(UPLOAD_DIRECTORY, id)
|
||||
|
||||
for job in jobs:
|
||||
if job["id"] == id:
|
||||
if job["status"] != "queued":
|
||||
flask.abort(423)
|
||||
os.remove(os.path.join(job_directory, job["file"]))
|
||||
os.rmdir(job_directory)
|
||||
job["file"] = None
|
||||
job["language"] = None
|
||||
job["name"] = None
|
||||
job["report"] = None
|
||||
job["service"] = None
|
||||
job["status"] = "deleted"
|
||||
return flask.make_response("", 204)
|
||||
|
||||
flask.abort(404)
|
||||
|
||||
|
||||
def get_job(id):
|
||||
for job in jobs:
|
||||
if job["id"] == id:
|
||||
return job
|
||||
|
||||
flask.abort(404)
|
||||
|
||||
|
||||
def get_jobs(name = None, service = None, status = None, user = None):
|
||||
filtered_jobs = []
|
||||
|
||||
for job in jobs:
|
||||
if name and job["name"] != name:
|
||||
continue
|
||||
if service and job["service"] != service:
|
||||
continue
|
||||
if status and job["status"] != status:
|
||||
continue
|
||||
if user and job["user"] != user:
|
||||
continue
|
||||
filtered_jobs.append(job)
|
||||
|
||||
return filtered_jobs
|
||||
|
||||
|
||||
def update_job(id, file = None, language = None, name = None, report = None, status = None):
|
||||
job_directory = os.path.join(UPLOAD_DIRECTORY, id)
|
||||
|
||||
for job in jobs:
|
||||
if job["id"] == id:
|
||||
if status:
|
||||
job["status"] = status
|
||||
if report:
|
||||
job["report"] = report
|
||||
return job
|
||||
if job["status"] != "queued":
|
||||
flask.abort(423)
|
||||
if file:
|
||||
if (job["service"] == "nlp" and file.content_type != "text/plain"):
|
||||
flask.abort(415)
|
||||
if (job["service"] == "ocr" and file.content_type != "application/pdf"):
|
||||
flask.abort(415)
|
||||
os.remove(os.path.join(job_directory, job["file"]))
|
||||
file.save(os.path.join(job_directory, file.filename))
|
||||
job["file"] = file.filename
|
||||
if language:
|
||||
job["language"] = language
|
||||
if report:
|
||||
job["report"] = report
|
||||
if name:
|
||||
job["name"] = name
|
||||
return job
|
||||
|
||||
flask.abort(404)
|
225
vre_manager/swagger/vre_manager.yml
Normal file
225
vre_manager/swagger/vre_manager.yml
Normal file
@ -0,0 +1,225 @@
|
||||
swagger: "2.0"
|
||||
info:
|
||||
contact:
|
||||
email: "p.jentsch@uni-bielefeld.de"
|
||||
name: "Patrick Jentsch"
|
||||
title: "SFB 1288 | INF: Plattformname - ReST API"
|
||||
version: "1.0.0"
|
||||
consumes:
|
||||
- application/json
|
||||
produces:
|
||||
- application/json
|
||||
|
||||
|
||||
basePath: /vre
|
||||
|
||||
|
||||
definitions:
|
||||
# Enums
|
||||
Service:
|
||||
type: string
|
||||
enum: &SERVICES
|
||||
- ocr
|
||||
- nlp
|
||||
Status:
|
||||
type: string
|
||||
enum: &STATUSES
|
||||
- deleted
|
||||
- failed
|
||||
- finished
|
||||
- queued
|
||||
- running
|
||||
Job:
|
||||
type: object
|
||||
properties:
|
||||
file:
|
||||
type: string
|
||||
id:
|
||||
type: string
|
||||
language:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
report:
|
||||
type: string
|
||||
service:
|
||||
type: string
|
||||
enum: *SERVICES
|
||||
status:
|
||||
type: string
|
||||
enum: *STATUSES
|
||||
user:
|
||||
type: string
|
||||
Jobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/definitions/Job'
|
||||
|
||||
|
||||
paths:
|
||||
/jobs:
|
||||
get:
|
||||
operationId: request_handlers.jobs.get_jobs
|
||||
tags:
|
||||
- jobs
|
||||
summary: Get a list of all jobs matching the filter(s)
|
||||
description: Get a list of all jobs matching the filter(s)
|
||||
parameters:
|
||||
- name: name
|
||||
in: query
|
||||
type: string
|
||||
description: The name to filter for
|
||||
required: False
|
||||
- name: service
|
||||
in: query
|
||||
type: string
|
||||
enum: *SERVICES
|
||||
description: The service to filter for
|
||||
required: False
|
||||
- name: status
|
||||
in: query
|
||||
type: string
|
||||
description: The status to filter for
|
||||
required: False
|
||||
- name: user
|
||||
in: query
|
||||
type: string
|
||||
description: The user to filter for
|
||||
required: False
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
$ref: '#/definitions/Jobs'
|
||||
post:
|
||||
operationId: request_handlers.jobs.create_job
|
||||
tags:
|
||||
- jobs
|
||||
summary: Create a new job
|
||||
description: Create a new job
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
parameters:
|
||||
- name: file
|
||||
in: formData
|
||||
type: file
|
||||
description: File
|
||||
required: True
|
||||
- name: language
|
||||
in: query
|
||||
description: Language
|
||||
type: string
|
||||
required: True
|
||||
- name: name
|
||||
in: query
|
||||
description: Name
|
||||
type: string
|
||||
required: True
|
||||
- name: service
|
||||
in: query
|
||||
description: Service
|
||||
type: string
|
||||
enum: *SERVICES
|
||||
required: True
|
||||
- name: user
|
||||
in: query
|
||||
description: User
|
||||
type: string
|
||||
required: True
|
||||
responses:
|
||||
201:
|
||||
description: Created
|
||||
schema:
|
||||
$ref: '#/definitions/Job'
|
||||
415:
|
||||
description: Unsupported Media Type
|
||||
/jobs/{id}:
|
||||
delete:
|
||||
operationId: request_handlers.jobs.delete_job
|
||||
tags:
|
||||
- jobs
|
||||
summary: Delete job (specified by {job_id})
|
||||
description: Delete job (specified by {job_id})
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: Job identifier
|
||||
type: string
|
||||
required: True
|
||||
responses:
|
||||
204:
|
||||
description: No Content
|
||||
404:
|
||||
description: Not Found
|
||||
423:
|
||||
description: Locked
|
||||
get:
|
||||
operationId: request_handlers.jobs.get_job
|
||||
tags:
|
||||
- jobs
|
||||
summary: Get job (specified by {job_id})
|
||||
description: Get job (specified by {job_id})
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: Job identifier
|
||||
type: string
|
||||
required: True
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
$ref: '#/definitions/Job'
|
||||
404:
|
||||
description: Not Found
|
||||
put:
|
||||
operationId: request_handlers.jobs.update_job
|
||||
tags:
|
||||
- jobs
|
||||
summary: Update job (specified by {job_id})
|
||||
description: Update job (specified by {job_id})
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
parameters:
|
||||
- name: file
|
||||
in: formData
|
||||
type: file
|
||||
description: File
|
||||
required: False
|
||||
- name: id
|
||||
in: path
|
||||
description: Job identifier
|
||||
type: string
|
||||
required: True
|
||||
- name: language
|
||||
in: query
|
||||
description: Language
|
||||
type: string
|
||||
required: False
|
||||
- name: name
|
||||
in: query
|
||||
description: Name
|
||||
type: string
|
||||
required: False
|
||||
- name: report
|
||||
in: query
|
||||
description: Report
|
||||
type: string
|
||||
required: False
|
||||
- name: status
|
||||
in: query
|
||||
description: Status
|
||||
type: string
|
||||
enum: *STATUSES
|
||||
required: False
|
||||
responses:
|
||||
200:
|
||||
description: OK
|
||||
schema:
|
||||
$ref: '#/definitions/Job'
|
||||
404:
|
||||
description: Not Found
|
||||
415:
|
||||
description: Unsupported Media Type
|
||||
423:
|
||||
description: Locked
|
7
vre_manager/vre_manager.py
Normal file
7
vre_manager/vre_manager.py
Normal file
@ -0,0 +1,7 @@
|
||||
import connexion
|
||||
import flask_cors
|
||||
|
||||
app = connexion.FlaskApp(__name__, specification_dir='swagger/')
|
||||
app.add_api('vre_manager.yml')
|
||||
flask_cors.CORS(app.app)
|
||||
app.run(port=5000)
|
43
vre_nlp_node/Dockerfile
Normal file
43
vre_nlp_node/Dockerfile
Normal file
@ -0,0 +1,43 @@
|
||||
FROM debian:stretch-slim
|
||||
|
||||
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
python2.7 \
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
python3-setuptools \
|
||||
wget \
|
||||
zip
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
# Install pyFlow
|
||||
ENV PYFLOW_VERSION 1.1.20
|
||||
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
rm pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
cd pyflow-"$PYFLOW_VERSION" && \
|
||||
python2.7 setup.py build install && \
|
||||
cd ..
|
||||
|
||||
# Install spaCy
|
||||
RUN pip3 install wheel && pip3 install -U spacy && \
|
||||
python3 -m spacy download de && \
|
||||
python3 -m spacy download en && \
|
||||
python3 -m spacy download es && \
|
||||
python3 -m spacy download fr && \
|
||||
python3 -m spacy download pt
|
||||
|
||||
RUN mkdir files_for_nlp files_from_nlp
|
||||
|
||||
COPY nlp /usr/local/bin
|
||||
COPY spacy_nlp /usr/local/bin
|
||||
|
||||
CMD ["/bin/bash"]
|
38
vre_nlp_node/README.md
Normal file
38
vre_nlp_node/README.md
Normal file
@ -0,0 +1,38 @@
|
||||
# vre_nlp_node
|
||||
Dieses Repository stellt ein Dockerfile zur Erstellung eines Dockerimages zur linguistischen Datenverarbeitung (NLP) zur Verfügung. Es werden Textdateien entgegengenommen und verticalized text-Dateien ausgegeben.
|
||||
|
||||
## Dockerimage erstellen
|
||||
Die GitLab Registry stellt ein automatisch erstelltes Dockerimage zur Verfügung, das stehts die neusten Änderungen beinhaltet. Das Dockerimage kann aber auch lokal erstellt werden, dazu muss folgender Befehl ins Terminal eingegeben werden.
|
||||
```bash
|
||||
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node .
|
||||
```
|
||||
|
||||
## Nutzung
|
||||
|
||||
### Starten eines Dockercontainers
|
||||
```bash
|
||||
docker run \
|
||||
--name <containername> \
|
||||
-dit \
|
||||
-v <datalocation>/files_for_nlp:/root/files_for_nlp \
|
||||
-v <datalocation>/files_from_nlp:/root/files_from_nlp \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node
|
||||
```
|
||||
|
||||
### Daten zur linguistischen Datenverarbeitung in das Eingabeverzeichnis kopieren
|
||||
```bash
|
||||
cp <textfile1> <textfile2> ... <textfilen> <datalocation>/files_for_nlp
|
||||
```
|
||||
|
||||
### Linguistische Datenverarbeitung starten
|
||||
```bash
|
||||
docker exec -it <containername> ocr -i /root/files_for_nlp -o /root/files_from_nlp -l <languagecode>
|
||||
```
|
||||
Valide Angaben für `<languagecode>` sind:
|
||||
* de (Deutsch)
|
||||
* en (Englisch)
|
||||
* es (Spanish)
|
||||
* fr (Französisch)
|
||||
* pt (Portugisisch)
|
||||
|
||||
Nach Beendigung des Vorgangs, stehen die aufbereitet Daten im Verzeichnis `<datalocation>/files_from_nlp` zur Verfügung.
|
131
vre_nlp_node/nlp
Executable file
131
vre_nlp_node/nlp
Executable file
@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python2.7
|
||||
# coding=utf-8
|
||||
|
||||
"""
|
||||
nlp
|
||||
|
||||
Usage: For usage instructions run with option --help
|
||||
Author: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
from pyflow import WorkflowRunner
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
"Performs NLP of documents utilizing spaCy. \
|
||||
Output is .vrt."
|
||||
)
|
||||
|
||||
parser.add_argument("-i",
|
||||
dest="inputDir",
|
||||
help="Input directory.",
|
||||
required=True)
|
||||
parser.add_argument("-l",
|
||||
dest='lang',
|
||||
help="Language for NLP",
|
||||
required=True)
|
||||
parser.add_argument("-o",
|
||||
dest="outputDir",
|
||||
help="Output directory.",
|
||||
required=True)
|
||||
parser.add_argument("--nCores",
|
||||
default=multiprocessing.cpu_count(),
|
||||
dest="nCores",
|
||||
help="Total number of cores available.",
|
||||
required=False,
|
||||
type=int)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class NLPWorkflow(WorkflowRunner):
|
||||
def __init__(self, jobs, lang, nCores):
|
||||
self.jobs = jobs
|
||||
self.lang = lang
|
||||
self.nCores = nCores
|
||||
|
||||
|
||||
def workflow(self):
|
||||
###
|
||||
# Task "mkdir_job": create output directories
|
||||
# Dependencies: None
|
||||
###
|
||||
mkdir_jobs = []
|
||||
mkdir_job_number = 0
|
||||
for job in self.jobs:
|
||||
mkdir_job_number += 1
|
||||
cmd = 'mkdir -p "%s"' % (
|
||||
job["output_dir"]
|
||||
)
|
||||
mkdir_jobs.append(self.addTask(label="mkdir_job_-_%i" % (mkdir_job_number), command=cmd))
|
||||
|
||||
###
|
||||
# Task "spacy_nlp_job": perform NLP
|
||||
# Dependencies: mkdir_jobs
|
||||
###
|
||||
self.waitForTasks()
|
||||
nlp_jobs = []
|
||||
nlp_job_number = 0
|
||||
for job in self.jobs:
|
||||
nlp_job_number += 1
|
||||
cmd = 'spacy_nlp -i "%s" -o "%s" -l "%s"' % (
|
||||
job["path"],
|
||||
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".vrt"),
|
||||
self.lang
|
||||
)
|
||||
nlp_jobs.append(self.addTask(label="nlp_job_-_%i" % (nlp_job_number), command=cmd, dependencies=mkdir_jobs))
|
||||
|
||||
###
|
||||
# Task "zip_job": compress output
|
||||
# Dependencies: nlp_jobs
|
||||
###
|
||||
zip_jobs = []
|
||||
zip_job_number = 0
|
||||
for job in self.jobs:
|
||||
zip_job_number += 1
|
||||
cmd = 'zip -jqr %s %s' % (
|
||||
job["output_dir"] + "_-_nlp",
|
||||
job["output_dir"]
|
||||
)
|
||||
zip_jobs.append(self.addTask(label="zip_job_-_%i" % (zip_job_number), command=cmd, dependencies=nlp_jobs))
|
||||
|
||||
|
||||
def analyze_jobs(inputDir, outputDir, level=1):
|
||||
jobs = []
|
||||
|
||||
if level > 2:
|
||||
return jobs
|
||||
|
||||
for file in os.listdir(inputDir):
|
||||
if os.path.isdir(os.path.join(inputDir, file)):
|
||||
jobs += analyze_jobs(
|
||||
os.path.join(inputDir, file),
|
||||
os.path.join(outputDir, file),
|
||||
level + 1
|
||||
)
|
||||
elif file.endswith(".txt"):
|
||||
jobs.append({"path": os.path.join(inputDir, file), "output_dir": os.path.join(outputDir, file.rsplit(".", 1)[0])})
|
||||
|
||||
return jobs
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
wflow = NLPWorkflow(
|
||||
analyze_jobs(args.inputDir, args.outputDir),
|
||||
args.lang,
|
||||
args.nCores
|
||||
)
|
||||
|
||||
retval = wflow.run(nCores=args.nCores)
|
||||
sys.exit(retval)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
59
vre_nlp_node/spacy_nlp
Executable file
59
vre_nlp_node/spacy_nlp
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import spacy
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="Tag a .txt file with spaCy and \
|
||||
save it in .vrt format")
|
||||
parser.add_argument("-i",
|
||||
dest="input",
|
||||
help="Input file.",
|
||||
required=True)
|
||||
parser.add_argument("-l",
|
||||
choices=["de", "en", "es", "fr", "pt"],
|
||||
dest="lang",
|
||||
help="Language for tagging",
|
||||
required=True)
|
||||
parser.add_argument("-o",
|
||||
dest="output",
|
||||
help="Output file.",
|
||||
required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm",
|
||||
"es": "es_core_news_sm", "fr": "fr_core_news_sm",
|
||||
"pt": "pt_core_news_sm"}
|
||||
|
||||
|
||||
# Set the language model for spacy
|
||||
nlp = spacy.load(SPACY_MODELS[args.lang])
|
||||
|
||||
# Read text from the input file
|
||||
with open(args.input) as input_file:
|
||||
text = input_file.read()
|
||||
|
||||
# Run spacy nlp over the text
|
||||
doc = nlp(text)
|
||||
|
||||
# Create and open the output file
|
||||
output_file = open(args.output, "w+")
|
||||
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + os.path.basename(args.input).rsplit(".", 1)[0] + '">\n')
|
||||
for sent in doc.sents:
|
||||
output_file.write('<s>\n')
|
||||
for token in sent:
|
||||
# Skip whitespace tokens like "\n" or "\t"
|
||||
if token.text.isspace():
|
||||
continue
|
||||
# Write all information in .vrt style to the output file
|
||||
# text, lemma, simple_pos, pos, ner
|
||||
output_file.write(token.text + "\t" + token.lemma_ + "\t"
|
||||
+ token.pos_ + "\t" + token.tag_ + "\t"
|
||||
+ (token.ent_type_ if token.ent_type_ != "" else "NULL") + "\n")
|
||||
output_file.write('</s>\n')
|
||||
output_file.write('</text>\n</corpus>')
|
||||
output_file.close()
|
49
vre_ocr_node/Dockerfile
Normal file
49
vre_ocr_node/Dockerfile
Normal file
@ -0,0 +1,49 @@
|
||||
FROM debian:stretch-slim
|
||||
|
||||
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
apt-transport-https \
|
||||
ca-certificates \
|
||||
gnupg2 \
|
||||
pdftk \
|
||||
poppler-utils \
|
||||
python2.7 \
|
||||
python3 \
|
||||
wget \
|
||||
zip
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
# Install pyFlow
|
||||
ENV PYFLOW_VERSION 1.1.20
|
||||
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
rm pyflow-"$PYFLOW_VERSION".tar.gz && \
|
||||
cd pyflow-"$PYFLOW_VERSION" && \
|
||||
python2.7 setup.py build install && \
|
||||
cd ..
|
||||
|
||||
# Install Tesseract OCR and Data Files
|
||||
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
|
||||
wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends tesseract-ocr && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata/raw/master/deu_frak.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
|
||||
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata
|
||||
|
||||
RUN mkdir files_for_ocr files_from_ocr
|
||||
|
||||
COPY hocrtotei /usr/local/bin
|
||||
COPY ocr /usr/local/bin
|
||||
|
||||
CMD ["/bin/bash"]
|
47
vre_ocr_node/README.md
Normal file
47
vre_ocr_node/README.md
Normal file
@ -0,0 +1,47 @@
|
||||
# vre_ocr_node
|
||||
Dieses Repository stellt ein Dockerfile zur Erstellung eines Dockerimages zur automatischen Zeichenerkennung zur Verfügung. Es werden PDF-Dateien entgegengenommen und PDF-Dateien, TEI konforme XML-Dateien und Textdateien ausgegeben.
|
||||
|
||||
## Funktionsweise
|
||||
Eingabedateien durchlaufen eine Verarbeitungsroutine, die in Form einer Pipeline realisiert wurde. Diese Pipeline besteht aus drei Schritten:
|
||||
1. Jede PDF-Datei aus dem Eingabeverzeichnis wird in einzelne Seiten aufgespalten.
|
||||
2. Die resultierenden Dateien aus Schritt 1 werden durch eine automatische Texterkennung (OCR) weiterverarbeitet.
|
||||
3. Die verarbeiteten Einzelseiten werden wieder zusammenführt.
|
||||
|
||||
## Dockerimage erstellen
|
||||
Die GitLab Registry stellt ein automatisch erstelltes Dockerimage zur Verfügung, das stehts die neusten Änderungen beinhaltet. Das Dockerimage kann aber auch lokal erstellt werden, dazu muss folgender Befehl ins Terminal eingegeben werden.
|
||||
```bash
|
||||
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node .
|
||||
```
|
||||
|
||||
## Nutzung
|
||||
|
||||
### Starten eines Dockercontainers
|
||||
```bash
|
||||
docker run \
|
||||
--name <containername> \
|
||||
-dit \
|
||||
-v <datalocation>/files_for_ocr:/root/files_for_ocr \
|
||||
-v <datalocation>/files_from_ocr:/root/files_from_ocr \
|
||||
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node
|
||||
```
|
||||
|
||||
### Daten zur Texterkennung in das Eingabeverzeichnis kopieren
|
||||
```bash
|
||||
cp <pdffile1> <pdffile2> ... <pdffilen> <datalocation>/files_for_ocr
|
||||
```
|
||||
|
||||
### Texterkennung starten
|
||||
```bash
|
||||
docker exec -it <containername> ocr -i /root/files_for_ocr -o /root/files_from_ocr -l <languagecode>
|
||||
```
|
||||
Valide Angaben für `<languagecode>` sind:
|
||||
* deu (Deutsch)
|
||||
* deu_frak (Deutsch Fraktur)
|
||||
* eng (English)
|
||||
* enm (Mittelenglisch)
|
||||
* fra (Französisch)
|
||||
* frm (Mittelfranzösisch)
|
||||
* por (Portugisisch)
|
||||
* spa (Spanish)
|
||||
|
||||
Nach Beendigung des Vorgangs, stehen die aufbereitet Daten im Verzeichnis `<datalocation>/files_from_ocr` zur Verfügung.
|
44
vre_ocr_node/hocrtotei
Executable file
44
vre_ocr_node/hocrtotei
Executable file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
# coding=utf-8
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from xml.sax.saxutils import escape
|
||||
import os
|
||||
import sys
|
||||
|
||||
input_files = filter(lambda x: x.endswith(".hocr"), sorted(os.listdir(sys.argv[1])))
|
||||
output_file = open(sys.argv[2], "w")
|
||||
|
||||
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
|
||||
'<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="dtabf">\n' +
|
||||
' <teiHeader>\n' +
|
||||
' <fileDesc>\n' +
|
||||
' <titleStmt/>\n' +
|
||||
' <publicationStmt/>\n' +
|
||||
' <sourceDesc/>\n' +
|
||||
' </fileDesc>\n' +
|
||||
' <encodingDesc/>\n' +
|
||||
' <profileDesc/>\n' +
|
||||
' </teiHeader>\n' +
|
||||
' <text>\n' +
|
||||
' <body>\n')
|
||||
|
||||
for input_file in input_files:
|
||||
tree = ET.parse(os.path.join(sys.argv[1], input_file))
|
||||
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
|
||||
for para in tree.findall(".//*[@class='ocr_par']"):
|
||||
output_file.write(' <p>\n')
|
||||
for line in para.findall(".//*[@class='ocr_line']"):
|
||||
first_word_in_line = True
|
||||
for word in line.findall(".//*[@class='ocrx_word']"):
|
||||
if word.text is not None:
|
||||
output_file.write((" " if first_word_in_line else " ") + escape(word.text.strip()))
|
||||
first_word_in_line = False
|
||||
if not first_word_in_line:
|
||||
output_file.write('<lb/>\n')
|
||||
output_file.write(' </p>\n')
|
||||
|
||||
output_file.write(' </body>\n' +
|
||||
' </text>\n' +
|
||||
'</TEI>')
|
||||
output_file.close()
|
214
vre_ocr_node/ocr
Executable file
214
vre_ocr_node/ocr
Executable file
@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python2
|
||||
# coding=utf-8
|
||||
|
||||
"""
|
||||
ocr
|
||||
|
||||
Usage: For usage instructions run with option --help
|
||||
Author: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
|
||||
"""
|
||||
|
||||
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import os
|
||||
import sys
|
||||
from pyflow import WorkflowRunner
|
||||
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
"Performs OCR of documents utilizing Tesseract OCR. \
|
||||
Outputs are .pdf and .txt."
|
||||
)
|
||||
|
||||
parser.add_argument("-i",
|
||||
dest="inputDir",
|
||||
help="Input directory.",
|
||||
required=True)
|
||||
parser.add_argument("-l",
|
||||
dest='lang',
|
||||
help="Language for OCR",
|
||||
required=True)
|
||||
parser.add_argument("-o",
|
||||
dest="outputDir",
|
||||
help="Output directory.",
|
||||
required=True)
|
||||
parser.add_argument("--keep-intermediates",
|
||||
action='store_true',
|
||||
default=False,
|
||||
dest="keepIntermediates",
|
||||
help="Keep intermediate files.",
|
||||
required=False)
|
||||
parser.add_argument("--nCores",
|
||||
default=multiprocessing.cpu_count(),
|
||||
dest="nCores",
|
||||
help="Total number of cores available.",
|
||||
required=False,
|
||||
type=int)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
class OCRWorkflow(WorkflowRunner):
|
||||
def __init__(self, jobs, keepIntermediates, lang, nCores):
|
||||
self.jobs = jobs
|
||||
self.keepIntermediates = keepIntermediates
|
||||
self.lang = lang
|
||||
self.nCores = nCores
|
||||
|
||||
|
||||
def workflow(self):
|
||||
###
|
||||
# Task "mkdir_job": create output directories
|
||||
# Dependencies: None
|
||||
###
|
||||
mkdir_jobs = []
|
||||
mkdir_job_number = 0
|
||||
for job in self.jobs:
|
||||
mkdir_job_number += 1
|
||||
cmd = 'mkdir -p "%s" "%s" "%s"' % (
|
||||
job["output_dir"],
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
os.path.join(job["output_dir"], "tmp", "tiff_files")
|
||||
)
|
||||
mkdir_jobs.append(self.addTask(label="mkdir_job_-_%i" % (mkdir_job_number), command=cmd))
|
||||
|
||||
###
|
||||
# Task "split_job": split input file into one .tif file per page
|
||||
# Dependencies: mkdir_jobs
|
||||
###
|
||||
split_jobs = []
|
||||
split_job_number = 0
|
||||
for job in self.jobs:
|
||||
split_job_number += 1
|
||||
cmd = 'pdftoppm "%s" "%s" -tiff -r 300 -tiffcompression lzw -cropbox' % (
|
||||
job["path"],
|
||||
os.path.join(job["output_dir"], "tmp", "tiff_files", "page")
|
||||
)
|
||||
split_jobs.append(self.addTask(label="split_job_-_%i" % (mkdir_job_number), command=cmd))
|
||||
|
||||
###
|
||||
# Task "tesseract_job": perform OCR
|
||||
# Dependencies: split_jobs
|
||||
###
|
||||
self.waitForTasks()
|
||||
tesseract_jobs = []
|
||||
tesseract_job_number = 0
|
||||
for job in self.jobs:
|
||||
# This list is empty if you don't wait for split_jobs to complete
|
||||
for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")):
|
||||
tesseract_job_number += 1
|
||||
cmd = 'tesseract "%s" "%s" -l "%s" hocr pdf txt' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tiff_files", file),
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract", file.rsplit(".", 1)[0]),
|
||||
self.lang
|
||||
)
|
||||
tesseract_jobs.append(self.addTask(label="tesseract_job_-_%i" % (tesseract_job_number), command=cmd, dependencies=split_jobs, nCores=min(4, self.nCores)))
|
||||
|
||||
###
|
||||
# Task "hocr_to_teip5_job": create TEI P5 file from hocr files
|
||||
# Dependencies: tesseract_jobs
|
||||
###
|
||||
hocr_to_tei_jobs = []
|
||||
hocr_to_tei_job_number = 0
|
||||
for job in self.jobs:
|
||||
hocr_to_tei_job_number += 1
|
||||
cmd = 'hocrtotei "%s" "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".xml")
|
||||
)
|
||||
hocr_to_tei_jobs.append(self.addTask(label="hocr_to_tei_job_-_%i" % (hocr_to_tei_job_number), command=cmd, dependencies=tesseract_jobs))
|
||||
|
||||
###
|
||||
# Task "pdf_merge_job": Merge .pdf files
|
||||
# Dependencies: tesseract_jobs
|
||||
###
|
||||
pdf_merge_jobs = []
|
||||
pdf_merge_job_number = 0
|
||||
for job in self.jobs:
|
||||
pdf_merge_job_number += 1
|
||||
cmd = 'pdftk "%s"/*.pdf cat output "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".pdf")
|
||||
)
|
||||
pdf_merge_jobs.append(self.addTask(label="pdf_merge_job_-_%i" % (pdf_merge_job_number), command=cmd, dependencies=tesseract_jobs))
|
||||
|
||||
###
|
||||
# Task "txt_merge_job": Merge .txt files
|
||||
# Dependencies: tesseract_jobs
|
||||
###
|
||||
txt_merge_jobs = []
|
||||
txt_merge_job_number = 0
|
||||
for job in self.jobs:
|
||||
txt_merge_job_number += 1
|
||||
cmd = 'cat "%s"/*.txt > "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp", "tesseract"),
|
||||
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".txt")
|
||||
)
|
||||
txt_merge_jobs.append(self.addTask(label="txt_merge_job_-_%i" % (txt_merge_job_number), command=cmd, dependencies=tesseract_jobs))
|
||||
|
||||
###
|
||||
# Task "cleanup_job": remove temporary files
|
||||
# Dependencies: hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs
|
||||
###
|
||||
cleanup_jobs = []
|
||||
cleanup_job_counter = 0
|
||||
if not self.keepIntermediates:
|
||||
for job in self.jobs:
|
||||
cleanup_job_counter += 1
|
||||
cmd = 'rm -r "%s"' % (
|
||||
os.path.join(job["output_dir"], "tmp")
|
||||
)
|
||||
cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs))
|
||||
|
||||
###
|
||||
# Task "zip_job": compress output
|
||||
# Dependencies: cleanup_jobs
|
||||
###
|
||||
zip_jobs = []
|
||||
zip_job_number = 0
|
||||
for job in self.jobs:
|
||||
zip_job_number += 1
|
||||
cmd = 'zip -jqr %s %s' % (
|
||||
job["output_dir"] + "_-_ocr",
|
||||
job["output_dir"]
|
||||
)
|
||||
zip_jobs.append(self.addTask(label="zip_job_-_%i" % (zip_job_number), command=cmd, dependencies=cleanup_jobs))
|
||||
|
||||
|
||||
def analyze_jobs(inputDir, outputDir, level=1):
|
||||
jobs = []
|
||||
|
||||
if level > 2:
|
||||
return jobs
|
||||
|
||||
for file in os.listdir(inputDir):
|
||||
if os.path.isdir(os.path.join(inputDir, file)):
|
||||
jobs += analyze_jobs(
|
||||
os.path.join(inputDir, file),
|
||||
os.path.join(outputDir, file),
|
||||
level + 1
|
||||
)
|
||||
elif file.endswith(".pdf"):
|
||||
jobs.append({"path": os.path.join(inputDir, file), "output_dir": os.path.join(outputDir, file.rsplit(".", 1)[0])})
|
||||
|
||||
return jobs
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_arguments()
|
||||
|
||||
wflow = OCRWorkflow(
|
||||
analyze_jobs(args.inputDir, args.outputDir),
|
||||
args.keepIntermediates,
|
||||
args.lang,
|
||||
args.nCores
|
||||
)
|
||||
|
||||
retval = wflow.run(nCores=args.nCores)
|
||||
sys.exit(retval)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
5
vre_www/Dockerfile
Normal file
5
vre_www/Dockerfile
Normal file
@ -0,0 +1,5 @@
|
||||
FROM nginx:1.14.2
|
||||
|
||||
RUN mkdir -p /usr/share/nginx/html/vre
|
||||
|
||||
COPY website /usr/share/nginx/html/vre
|
222
vre_www/website/account.html
Normal file
222
vre_www/website/account.html
Normal file
@ -0,0 +1,222 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Benutzerverwaltung</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<header>
|
||||
<ul id="main-nav-account-dropdown" class="dropdown-content">
|
||||
<li class="active">
|
||||
<a href="account.html">Benutzerverwaltung</a>
|
||||
</li>
|
||||
<li class="divider"></li>
|
||||
<li>
|
||||
<a href="javascript:logout();">Abmelden</a>
|
||||
</li>
|
||||
</ul>
|
||||
<nav class="blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="nav-wrapper">
|
||||
<a href="portal.html" class="brand-logo">
|
||||
<i class="material-icons hide-on-small-only">cloud</i>
|
||||
Opaque
|
||||
</a>
|
||||
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
|
||||
<i class="material-icons">menu</i>
|
||||
</a>
|
||||
<ul id="main-nav" class="right hide-on-med-and-down">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
|
||||
<i class="material-icons left">account_circle</i>
|
||||
Benutzer
|
||||
<i class="material-icons right">arrow_drop_down</i>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<ul class="sidenav" id="main-nav-mobile">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a href="account.html">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Benutzer
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="javascript:logout();">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Abmelden
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12 m8">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Benutzerverwaltung</span>
|
||||
<p> </p>
|
||||
<form id="user_form" class="row">
|
||||
<div class="input-field col s12 m6">
|
||||
<i class="material-icons prefix">account_circle</i>
|
||||
<input id="user-form-first-name" type="text">
|
||||
<label for="first-form-first-name">Vorname</label>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<input id="user-form-last-name" type="text">
|
||||
<label for="first-form-last-name">Nachname</label>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<i class="material-icons prefix">email</i>
|
||||
<input id="user-form-e-mail" type="text">
|
||||
<label for="first-form-e-mail">E-Mail</label>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<p style="height: 64px;">
|
||||
<label>
|
||||
<input id="user-form-notifications" type="checkbox" checked="checked">
|
||||
<span>Benachrichtigungen per E-Mail erhalten</span>
|
||||
</label>
|
||||
</p>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<i class="material-icons prefix">phone</i>
|
||||
<input id="user-form-phone" type="text">
|
||||
<label for="first-form-phone">Phone</label>
|
||||
</div>
|
||||
<div class="input-field col s12 right-align">
|
||||
<a id="user-form-reset" class="waves-effect waves-light btn disabled">Verwerfen</a>
|
||||
<a id="user-form-submit" class="waves-effect waves-light btn disabled">Bestätigen</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Texterkennung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
|
||||
Textdaten werden aus Scans oder Fotos zur anschließenden
|
||||
Weiterverarbeitung extrahiert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Linguistische Datenverarbeitung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
|
||||
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
|
||||
weiteren Informationen angereichert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftragsübersicht</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
|
||||
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
|
||||
Aufträge aufgelistet.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer class="page-footer blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<p class="grey-text text-lighten-4 left">
|
||||
<a href="https://www.uni-bielefeld.de/">
|
||||
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
|
||||
</a>
|
||||
</p>
|
||||
<p class="grey-text text-lighten-4 right">
|
||||
Gefördert durch die<br>
|
||||
<a href="https://www.dfg.de/">
|
||||
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-copyright">
|
||||
<div class="container">
|
||||
© 2019 Copyright
|
||||
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
|
||||
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
<script src="js/account.js"></script>
|
||||
</body>
|
||||
</html>
|
9067
vre_www/website/css/materialize.css
vendored
Normal file
9067
vre_www/website/css/materialize.css
vendored
Normal file
File diff suppressed because it is too large
Load Diff
13
vre_www/website/css/materialize.min.css
vendored
Normal file
13
vre_www/website/css/materialize.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
43
vre_www/website/css/style.css
Normal file
43
vre_www/website/css/style.css
Normal file
@ -0,0 +1,43 @@
|
||||
body {
|
||||
display: flex;
|
||||
min-height: 100vh;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1 0 auto;
|
||||
}
|
||||
|
||||
.tabs .tab {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.tabs .tab a {
|
||||
color: rgba(0,0,0,0.87);
|
||||
}
|
||||
|
||||
.tabs .tab a:hover {
|
||||
background-color: #eceff1;
|
||||
color: rgba(0,0,0,0.87);
|
||||
}
|
||||
|
||||
.tabs .tab a.active, .tabs .tab a:focus.active {
|
||||
background-color: #fff;
|
||||
color: rgba(0,0,0,0.87);
|
||||
}
|
||||
|
||||
.tabs .indicator {
|
||||
background-color: #455a64;
|
||||
}
|
||||
|
||||
.no-padding {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.overflow-hidden {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.overflow-visible {
|
||||
overflow: visible;
|
||||
}
|
BIN
vre_www/website/images/dfg.png
Normal file
BIN
vre_www/website/images/dfg.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.6 KiB |
BIN
vre_www/website/images/sfb_1288-alt.png
Normal file
BIN
vre_www/website/images/sfb_1288-alt.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
BIN
vre_www/website/images/sfb_1288.png
Normal file
BIN
vre_www/website/images/sfb_1288.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 7.1 KiB |
BIN
vre_www/website/images/universitaet_bielefeld.png
Normal file
BIN
vre_www/website/images/universitaet_bielefeld.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.3 KiB |
BIN
vre_www/website/images/workflow.png
Normal file
BIN
vre_www/website/images/workflow.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.4 KiB |
1
vre_www/website/images/workflow.xml
Normal file
1
vre_www/website/images/workflow.xml
Normal file
File diff suppressed because one or more lines are too long
54
vre_www/website/index.html
Normal file
54
vre_www/website/index.html
Normal file
@ -0,0 +1,54 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Anmeldung</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<main class="valign-wrapper">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col card s12 m6 offset-m3">
|
||||
<div class="card-content">
|
||||
<span class="card-title"><i class="material-icons left">cloud</i> Opaque</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col card s12 m6 offset-m3">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Anmeldung</span>
|
||||
<form id="login_form" class="row" action="javascript:LoginFormSubmitHandler();">
|
||||
<div class="input-field col s12">
|
||||
<input id="login-form-user" type="text" class="validate" autofocus="autofocus" onfocus="this.select()">
|
||||
<label for="login-form-user">Benutzername</label>
|
||||
</div>
|
||||
<div class="input-field col s12">
|
||||
<input id="login-form-password" type="password" class="validate">
|
||||
<label for="login-form-password">Passwort</label>
|
||||
<span class="helper-text" data-error="Benutzername oder Passwort falsch"></span>
|
||||
</div>
|
||||
<div class="input-field col s12 hide">
|
||||
<input id="login_form_submit" type="submit">
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="javascript:LoginFormSubmitHandler();" class="waves-effect waves-light btn">Anmelden</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
<script src="js/index.js"></script>
|
||||
</body>
|
||||
</html>
|
197
vre_www/website/job_manager.html
Normal file
197
vre_www/website/job_manager.html
Normal file
@ -0,0 +1,197 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Auftragsverwaltung</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<header>
|
||||
<ul id="main-nav-account-dropdown" class="dropdown-content">
|
||||
<li>
|
||||
<a href="account.html">Benutzerverwaltung</a>
|
||||
</li>
|
||||
<li class="divider"></li>
|
||||
<li>
|
||||
<a href="javascript:logout();">Abmelden</a>
|
||||
</li>
|
||||
</ul>
|
||||
<nav class="blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="nav-wrapper">
|
||||
<a href="portal.html" class="brand-logo">
|
||||
<i class="material-icons hide-on-small-only">cloud</i>
|
||||
Opaque
|
||||
</a>
|
||||
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
|
||||
<i class="material-icons">menu</i>
|
||||
</a>
|
||||
<ul id="main-nav" class="right hide-on-med-and-down">
|
||||
<li class="active">
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
|
||||
<i class="material-icons left">account_circle</i>
|
||||
Benutzer
|
||||
<i class="material-icons right">arrow_drop_down</i>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<ul class="sidenav" id="main-nav-mobile">
|
||||
<li class="active">
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="account.html">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Benutzer
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="javascript:logout();">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Abmelden
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12 m8">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftragsübersicht</span>
|
||||
<p>
|
||||
Die Auftragsübersicht bietet eine Übersicht über Ihre
|
||||
Aufträge. Nach erfolgreichem Abschluss können die Ergebnisse
|
||||
hier heruntergeladen werden.
|
||||
</p>
|
||||
<p> </p>
|
||||
<div class="card-panel no-padding z-depth-0">
|
||||
<ul class="tabs">
|
||||
<li class="tab col s3">
|
||||
<a data-job-filter="" class="active">Alle</a>
|
||||
</li>
|
||||
<li class="tab col s3">
|
||||
<a data-job-filter="finished">Abgeschlossen</a>
|
||||
</li>
|
||||
<li class="tab col s3">
|
||||
<a data-job-filter="running">Laufend</a>
|
||||
</li>
|
||||
<li class="tab col s3">
|
||||
<a data-job-filter="queued">Wartend</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<ul id="jobs-collapsible" class="collapsible expandable z-depth-0"></ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Linguistische Datenverarbeitung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
|
||||
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
|
||||
weiteren Informationen angereichert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Texterkennung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
|
||||
Textdaten werden aus Scans oder Fotos zur anschließenden
|
||||
Weiterverarbeitung extrahiert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer class="page-footer blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<p class="grey-text text-lighten-4 left">
|
||||
<a href="https://www.uni-bielefeld.de/">
|
||||
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
|
||||
</a>
|
||||
</p>
|
||||
<p class="grey-text text-lighten-4 right">
|
||||
Gefördert durch die<br>
|
||||
<a href="https://www.dfg.de/">
|
||||
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-copyright">
|
||||
<div class="container">
|
||||
© 2019 Copyright
|
||||
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
|
||||
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
<script src="js/job_manager.js"></script>
|
||||
</body>
|
||||
</html>
|
110
vre_www/website/js/account.js
Normal file
110
vre_www/website/js/account.js
Normal file
@ -0,0 +1,110 @@
|
||||
function setUserCallback() {
|
||||
userFormEMailElement.value = user["e-mail"];
|
||||
userFormFirstNameElement.value = user["firstName"];
|
||||
userFormLastNameElement.value = user["lastName"];
|
||||
userFormNotificationsElement.checked = user["notifications"];
|
||||
userFormPhoneElement.value = user["phone"];
|
||||
M.updateTextFields();
|
||||
|
||||
userFormEMailElement.addEventListener("input", event => {
|
||||
eMailChanged = (user["e-mail"] != userFormEMailElement.value) ? true : false;
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormFirstNameElement.addEventListener("input", event => {
|
||||
firstNameChanged = (user["firstName"] != userFormFirstNameElement.value) ? true : false;
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormLastNameElement.addEventListener("input", event => {
|
||||
lastNameChanged = (user["lastName"] != userFormLastNameElement.value) ? true : false;
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormNotificationsElement.addEventListener("change", event => {
|
||||
notificationsChanged = (user["notifications"] != userFormNotificationsElement.checked) ? true : false;
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormPhoneElement.addEventListener("input", event => {
|
||||
phoneChanged = (user["phone"] != userFormPhoneElement.value) ? true : false;
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormResetElement.addEventListener("click", event => {
|
||||
eMailChanged = false;
|
||||
firstNameChanged = false;
|
||||
lastNameChanged = false;
|
||||
notificationsChanged = false;
|
||||
phoneChanged = false;
|
||||
userFormEMailElement.value = user["e-mail"];
|
||||
userFormFirstNameElement.value = user["firstName"];
|
||||
userFormLastNameElement.value = user["lastName"];
|
||||
userFormNotificationsElement.checked = user["notifications"];
|
||||
userFormPhoneElement.value = user["phone"];
|
||||
evaluateUserForm();
|
||||
});
|
||||
userFormSubmitElement.addEventListener("click", event => {
|
||||
if (eMailChanged) {
|
||||
user["e-mail"] = userFormEMailElement.value;
|
||||
eMailChanged = false;
|
||||
}
|
||||
if (firstNameChanged) {
|
||||
user["firstName"] = userFormFirstNameElement.value;
|
||||
firstNameChanged = false;
|
||||
}
|
||||
if (lastNameChanged) {
|
||||
user["lastName"] = userFormLastNameElement.value;
|
||||
lastNameChanged = false;
|
||||
}
|
||||
if (notificationsChanged) {
|
||||
user["notifications"] = userFormNotificationsElement.checked;
|
||||
notificationsChanged = false;
|
||||
}
|
||||
if (phoneChanged) {
|
||||
user["phone"] = userFormPhoneElement.value;
|
||||
phoneChanged = false;
|
||||
}
|
||||
localStorage.setItem("user", JSON.stringify(user));
|
||||
M.toast({html: "Benutzerdaten wurden aktualisiert."});
|
||||
evaluateUserForm();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
function evaluateUserForm() {
|
||||
var somethingChanged;
|
||||
|
||||
somethingChanged = eMailChanged || firstNameChanged || lastNameChanged || notificationsChanged || phoneChanged;
|
||||
|
||||
if (somethingChanged) {
|
||||
userFormResetElement.classList.remove("disabled");
|
||||
userFormSubmitElement.classList.remove("disabled");
|
||||
} else {
|
||||
userFormResetElement.classList.add("disabled");
|
||||
userFormSubmitElement.classList.add("disabled");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
var eMailChanged;
|
||||
var firstNameChanged;
|
||||
var lastNameChanged;
|
||||
var notificationsChanged;
|
||||
var phoneChanged;
|
||||
var userFormEMailElement;
|
||||
var userFormFirstNameElement;
|
||||
var userFormLastNameElement;
|
||||
var userFormNotificationsElement;
|
||||
var userFormPhoneElement;
|
||||
var userFormResetElement;
|
||||
var userFormSubmitElement;
|
||||
|
||||
|
||||
eMailChanged = false;
|
||||
firstNameChanged = false;
|
||||
lastNameChanged = false;
|
||||
notificationsChanged = false;
|
||||
phoneChanged = false;
|
||||
userFormFirstNameElement = document.getElementById("user-form-first-name");
|
||||
userFormLastNameElement = document.getElementById("user-form-last-name");
|
||||
userFormEMailElement = document.getElementById("user-form-e-mail");
|
||||
userFormNotificationsElement = document.getElementById("user-form-notifications");
|
||||
userFormPhoneElement = document.getElementById("user-form-phone");
|
||||
userFormResetElement = document.getElementById("user-form-reset");
|
||||
userFormSubmitElement = document.getElementById("user-form-submit");
|
17
vre_www/website/js/index.js
Normal file
17
vre_www/website/js/index.js
Normal file
@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Process the submit event from the login form.
|
||||
*/
|
||||
function LoginFormSubmitHandler() {
|
||||
var loginFormPasswordElement;
|
||||
var loginFormUserElement;
|
||||
|
||||
loginFormPasswordElement = document.getElementById("login-form-password");
|
||||
loginFormUserElement = document.getElementById("login-form-user");
|
||||
|
||||
try {
|
||||
login(loginFormPasswordElement.value, loginFormUserElement.value);
|
||||
} catch(e) {
|
||||
loginFormPasswordElement.classList.add("invalid");
|
||||
loginFormUserElement.classList.add("invalid");
|
||||
}
|
||||
}
|
43
vre_www/website/js/job_form_handler.js
Normal file
43
vre_www/website/js/job_form_handler.js
Normal file
@ -0,0 +1,43 @@
|
||||
function jobFormChangeHandler() {
|
||||
if (jobFormFileElement.files.length === 0 || jobFormLanguageElement.value === "" || jobFormNameElement.value === "") {
|
||||
jobFormSubmitElement.classList.add("disabled");
|
||||
} else {
|
||||
jobFormSubmitElement.classList.remove("disabled");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function jobFormSubmitHandler() {
|
||||
createJob(jobFormFileElement.files[0], jobFormLanguageElement.value, jobFormNameElement.value, jobFormServiceElement.value, user["id"]);
|
||||
}
|
||||
|
||||
|
||||
var jobFormElement;
|
||||
var jobFormFileElement;
|
||||
var jobFormLanguageElement;
|
||||
var jobFormNameElement;
|
||||
var jobFormServiceElement;
|
||||
var jobFormSubmitElement;
|
||||
|
||||
|
||||
jobFormElement = document.getElementById("job_form");
|
||||
jobFormFileElement = document.getElementById("job_form_file");
|
||||
jobFormLanguageElement = document.getElementById("job_form_language");
|
||||
jobFormNameElement = document.getElementById("job_form_name");
|
||||
jobFormServiceElement = document.getElementById("job_form_service");
|
||||
jobFormSubmitElement = document.getElementById("job_form_submit");
|
||||
|
||||
|
||||
jobFormFileElement.addEventListener("change", jobFormChangeHandler);
|
||||
jobFormLanguageElement.addEventListener("change", jobFormChangeHandler);
|
||||
jobFormNameElement.addEventListener("input", jobFormChangeHandler);
|
||||
|
||||
M.Modal.init(
|
||||
document.getElementById("create_job"),
|
||||
{
|
||||
"onCloseEnd": modal => {
|
||||
jobFormElement.reset();
|
||||
M.updateTextFields();
|
||||
}
|
||||
}
|
||||
);
|
172
vre_www/website/js/job_manager.js
Normal file
172
vre_www/website/js/job_manager.js
Normal file
@ -0,0 +1,172 @@
|
||||
var JOBLANGUAGES = {
|
||||
"nlp": {
|
||||
"de": "Deutsch",
|
||||
"en": "Englisch",
|
||||
"fr": "Französisch",
|
||||
"pt": "Portugisisch",
|
||||
"es": "Spanisch"
|
||||
},
|
||||
"ocr": {
|
||||
"deu": "Deutsch",
|
||||
"deu_frak": "Deutsch (Fraktur)",
|
||||
"eng": "Englisch",
|
||||
"fra": "Französisch",
|
||||
"por": "Portugisisch",
|
||||
"spa": "Spanisch"
|
||||
}
|
||||
};
|
||||
var JOBSERVICEICONS = {
|
||||
"nlp": "format_textdirection_l_to_r",
|
||||
"ocr": "find_in_page"
|
||||
};
|
||||
var JOBSTATUSCOLORS = {
|
||||
"failed": "pink",
|
||||
"finished": "teal",
|
||||
"queued": "amber",
|
||||
"running": "indigo"
|
||||
};
|
||||
var JOBSTATUSNAMES = {
|
||||
"failed": 'Fehlgeschlagen',
|
||||
"finished": 'Abgeschlossen',
|
||||
"queued": 'Wartend',
|
||||
"running": 'Laufend'
|
||||
}
|
||||
var JOBSTATUSTEXTS = {
|
||||
'failed': 'Es ist ein Fehler bei der Bearbeitung des Auftrages aufgetreten. \
|
||||
Für weitere Informationen wenden Sie sich mit dem \
|
||||
Auftragskennzeichen an den Seitenbetreiber.',
|
||||
'finished': 'Der Auftrag ist abgeschlossen, die Ergebnisse können nun \
|
||||
heruntergeladen werden. Die Quelldatei bleibt verfügbar und \
|
||||
kann jederzeit abgerufen werden.',
|
||||
'queued': 'Der Auftrag befindet sich in der Warteschlange und wird vom \
|
||||
System bearbeitet, sobald Rechenkapazität verfügbar ist.',
|
||||
'running': 'Dieser Auftrag wird gerade bearbeitet, sobald der Vorgang \
|
||||
abgeschlossen wurde, können die Ergebnisse hier heruntergeladen \
|
||||
werden.'
|
||||
}
|
||||
|
||||
|
||||
var jobsCollapsibleElement;
|
||||
var jobStatusFilter;
|
||||
|
||||
jobsCollapsibleElement = document.getElementById("jobs-collapsible");
|
||||
jobStatusFilter = '';
|
||||
|
||||
|
||||
M.Collapsible.init(
|
||||
jobsCollapsibleElement,
|
||||
{accordion: false}
|
||||
);
|
||||
document.querySelectorAll('[data-job-filter]').forEach(jobFilterElement => {
|
||||
jobFilterElement.addEventListener("click", event => {
|
||||
jobStatusFilter = event.currentTarget.getAttribute("data-job-filter");
|
||||
jobListManager();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
function jobListManager() {
|
||||
jobs.forEach(job => {
|
||||
var jobElement;
|
||||
|
||||
jobElement = document.getElementById(job["id"]);
|
||||
// Check whether an element with id = job['id'] is found
|
||||
if (jobElement) {
|
||||
var jobResultElement;
|
||||
var jobStatusElement;
|
||||
var jobStatusTextElement;
|
||||
|
||||
jobResultElement = jobElement.querySelector('.job-result');
|
||||
jobStatusElement = jobElement.querySelector('.job-status');
|
||||
jobStatusTextElement = jobElement.querySelector('.job-status-text');
|
||||
|
||||
// Check whether the job status changed since it got printed
|
||||
if (jobStatusElement.innerHTML != JOBSTATUSNAMES[job["status"]]) {
|
||||
// Update the job status element (label and color)
|
||||
jobStatusElement.innerHTML = JOBSTATUSNAMES[job["status"]];
|
||||
jobStatusElement.classList.remove("pink", "teal", "amber", "indigo");
|
||||
jobStatusElement.classList.add(JOBSTATUSCOLORS[job["status"]]);
|
||||
// Update the job status text element
|
||||
jobStatusTextElement.innerHTML = JOBSTATUSTEXTS[job["status"]];
|
||||
// Check whether the new job status is 'finished'
|
||||
if (job["status"] === "finished") {
|
||||
// Unhide the jobs result element
|
||||
jobResultElement.classList.remove("hide");
|
||||
}
|
||||
}
|
||||
// If the job status doesn't match the current filter, remove it
|
||||
//if (jobStatusFilter != "" && job["status"] != jobStatusFilter) jobElement.remove();
|
||||
// If the job status doesn't match the current filter, hide it
|
||||
if (jobStatusFilter != "" && job["status"] != jobStatusFilter) {
|
||||
jobElement.classList.add('hide');
|
||||
} else {
|
||||
jobElement.classList.remove('hide');
|
||||
}
|
||||
} else {
|
||||
// If the job status doesn't match the current filter, skip it
|
||||
//if (jobStatusFilter != "" && job["status"] != jobStatusFilter) return;
|
||||
jobElement = document.createElement("li");
|
||||
jobElement.setAttribute("id", job["id"]);
|
||||
// If the job status doesn't match the current filter, skip it
|
||||
if (jobStatusFilter != "" && job["status"] != jobStatusFilter) {
|
||||
jobElement.classList.add('hide');
|
||||
} else {
|
||||
jobElement.classList.remove('hide');
|
||||
}
|
||||
jobElement.innerHTML =
|
||||
'<div class="collapsible-header">'
|
||||
+ '<i class="material-icons dropdown-indicator">arrow_drop_down</i>'
|
||||
+ '<i class="material-icons">'
|
||||
+ JOBSERVICEICONS[job["service"]]
|
||||
+ '</i>'
|
||||
+ job["name"]
|
||||
+ '<span class="job-status new badge ' + JOBSTATUSCOLORS[job["status"]] + '" data-badge-caption="">'
|
||||
+ JOBSTATUSNAMES[job["status"]]
|
||||
+ '</span>'
|
||||
+ '</div>'
|
||||
+ '<div class="collapsible-body">'
|
||||
+ '<p class="job-status-text">' + JOBSTATUSTEXTS[job["status"]] + '</p>'
|
||||
+ '<p> </p>'
|
||||
+ '<p class="overflow-hidden">'
|
||||
+ '<i class="material-icons blue-grey-text text-darken-2 left">bookmark</i>'
|
||||
+ 'Auftragskennzeichen: '
|
||||
+ job["id"]
|
||||
+ '</p>'
|
||||
+ '<p class="overflow-hidden">'
|
||||
+ '<i class="material-icons blue-grey-text text-darken-2 left">insert_drive_file</i>'
|
||||
+ 'Datei: '
|
||||
+ '<a href="../vre_files/jobs/' + job["id"] + '/' + job["file"] + '">'
|
||||
+ job["file"]
|
||||
+ '</a>'
|
||||
+ '</p>'
|
||||
+ '<p class="overflow-hidden">'
|
||||
+ '<i class="material-icons blue-grey-text text-darken-2 left">language</i>'
|
||||
+ 'Sprache: '
|
||||
+ JOBLANGUAGES[job["service"]][job["language"]]
|
||||
+ '</p>'
|
||||
+ '<p class="job-result' + (job['status'] != 'finished' ? ' hide' : '') + ' overflow-hidden">'
|
||||
+ '<i class="material-icons blue-grey-text text-darken-2 left">archive</i>'
|
||||
+ 'Ergebnisse: '
|
||||
+ '<a href="../vre_files/jobs/' + job["id"] + '/' + job["file"].split(".").slice(0, -1).join(".") + '_-_' + job["service"] + '.zip">'
|
||||
+ 'Download'
|
||||
+ '</a>'
|
||||
+ '</p>'
|
||||
+ '</div>';
|
||||
|
||||
jobElement.querySelector('.collapsible-header').addEventListener("click", event => {
|
||||
var dropdownIndicatorElement;
|
||||
|
||||
dropdownIndicatorElement = event.currentTarget.querySelector('.dropdown-indicator');
|
||||
if (dropdownIndicatorElement.innerHTML === "arrow_drop_down") {
|
||||
dropdownIndicatorElement.innerHTML = "arrow_drop_up";
|
||||
} else {
|
||||
dropdownIndicatorElement.innerHTML = "arrow_drop_down";
|
||||
}
|
||||
});
|
||||
jobsCollapsibleElement.appendChild(jobElement);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
setJobsCallback = jobListManager;
|
12374
vre_www/website/js/materialize.js
vendored
Normal file
12374
vre_www/website/js/materialize.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
vre_www/website/js/materialize.min.js
vendored
Normal file
6
vre_www/website/js/materialize.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
154
vre_www/website/js/vre.js
Normal file
154
vre_www/website/js/vre.js
Normal file
@ -0,0 +1,154 @@
|
||||
var JOBSPOLLINTERVALL = 1000;
|
||||
var USERS = {
|
||||
"testbenutzer": {
|
||||
"e-mail": "t.benutzer@uni-bielefeld.de",
|
||||
"firstName": "Test",
|
||||
"id": "testbenutzer",
|
||||
"lastName": "Benutzer",
|
||||
"notifications": true,
|
||||
"password": "passwort",
|
||||
"phone": "+49 521 106-XXXXX"
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// The job list of the current user
|
||||
var jobs;
|
||||
// The current user
|
||||
var user;
|
||||
|
||||
|
||||
/**
|
||||
* Login a user and redirect the visitor to the portal page.
|
||||
* @param {string} password - The password to use for login.
|
||||
* @param {string} user - The user to use for login.
|
||||
*/
|
||||
function login(password, user) {
|
||||
// Check if the user exists
|
||||
if (USERS[user]) {
|
||||
// Check if the password is correct
|
||||
if (USERS[user]["password"] === password) {
|
||||
// Save the user data to the local storage
|
||||
localStorage.setItem("user", JSON.stringify(USERS[user]));
|
||||
// Redirect the visitor to the portal page
|
||||
window.location = "/vre/portal.html";
|
||||
return;
|
||||
}
|
||||
}
|
||||
// The function only ends here, when the user doesn't exists or when the
|
||||
// password was wrong, in both cases it should throw an exception
|
||||
throw "User doesn't exist or password was wrong!";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Logout the current user and redirect the visitor to the login page.
|
||||
*/
|
||||
function logout() {
|
||||
// delete all data from the local storage
|
||||
localStorage.clear();
|
||||
// redirect to the login page
|
||||
window.location = "/vre/";
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The main function, which is executed on each page after it's completly
|
||||
* loaded.
|
||||
*/
|
||||
function main() {
|
||||
M.Dropdown.init(
|
||||
document.getElementById("main-nav-account"),
|
||||
{"constrainWidth": false, "coverTrigger": false}
|
||||
);
|
||||
|
||||
// Indicates whether the current page is the login page
|
||||
var isLoginPage;
|
||||
// Indicates whether the current page is a service page (like ocr or nlp)
|
||||
var isServicePage;
|
||||
|
||||
|
||||
isLoginPage = window.location.pathname === "/vre/" || window.location.pathname === "/vre/index.html";
|
||||
isServicePage = window.location.pathname === "/vre/nlp.html" || window.location.pathname === "/vre/ocr.html";
|
||||
|
||||
// Check if the visitor is logged in, by checking if the local storage
|
||||
// contains a user
|
||||
if (localStorage.getItem("user")) {
|
||||
// Redirect to the portal page if the current page is the login page
|
||||
if (isLoginPage) window.location = "portal.html";
|
||||
// Load user from the local storage
|
||||
user = JSON.parse(localStorage.getItem("user"));
|
||||
// If a function named setUserCallback exists, call it
|
||||
if (typeof(setUserCallback) === "function") setUserCallback();
|
||||
} else {
|
||||
// Redirect to the login page if the current page isn't the login page
|
||||
if (!isLoginPage) window.location = "index.html";
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the local storage contains a copy of the users job list
|
||||
if (localStorage.getItem("jobs")) {
|
||||
// Load the job list from the local storage
|
||||
jobs = JSON.parse(localStorage.getItem("jobs"));
|
||||
// If a function named setJobsCallback exists, call it
|
||||
if (typeof(setJobsCallback) === "function") setJobsCallback();
|
||||
} else {
|
||||
// Load the job list from the vre server
|
||||
getJobs({"user": user["id"]}, function(newJobs) {
|
||||
jobs = newJobs;
|
||||
// If a function named setJobsCallback exists, call it
|
||||
if (typeof(setJobsCallback) === "function") setJobsCallback();
|
||||
});
|
||||
}
|
||||
|
||||
// Set a continous poll for the job list
|
||||
setInterval(getJobs, JOBSPOLLINTERVALL, {"user": user["id"]}, updateJobs);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Updates the job list in the local storage and the contents of the global
|
||||
* job list variable. It also informs the visitor about job status changes.
|
||||
* @param {Object[]} newJobs - The new job list.
|
||||
*/
|
||||
function updateJobs(newJobs) {
|
||||
var i;
|
||||
// Indicates whether the new job list differs from the old one
|
||||
var hasJobsChanged;
|
||||
// The new job list as JSON string
|
||||
var newJobsAsJSONString;
|
||||
|
||||
newJobsAsJSONString = JSON.stringify(newJobs);
|
||||
hasJobsChanged = localStorage.getItem("jobs") != newJobsAsJSONString;
|
||||
|
||||
if (hasJobsChanged) {
|
||||
// Iterate over the old job list
|
||||
for (i = 0; i < jobs.length; i++) {
|
||||
// Check whether the status of the currently iterated job changed
|
||||
if (jobs[i]["status"] != newJobs[i]["status"]) {
|
||||
// Inform the visitor with a toast message about the new status
|
||||
switch (newJobs[i]["status"]) {
|
||||
case "failed":
|
||||
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' is fehlgeschlagen."});
|
||||
break;
|
||||
case "finished":
|
||||
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' wurde abgeschlossen."});
|
||||
break;
|
||||
case "running":
|
||||
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' wird bearbeitet."});
|
||||
break;
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
// Save the new job list to the local storage
|
||||
localStorage.setItem("jobs", newJobsAsJSONString);
|
||||
// Replace the global job list variable with the new job list
|
||||
jobs = newJobs;
|
||||
// If a function named setJobsCallback exists, call it
|
||||
if (typeof(setJobsCallback) === "function") setJobsCallback();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
window.onload = main;
|
180
vre_www/website/js/vre_manager_rest.js
Normal file
180
vre_www/website/js/vre_manager_rest.js
Normal file
@ -0,0 +1,180 @@
|
||||
var VREMANAGER = "http://" + window.location.hostname + ":5000";
|
||||
|
||||
|
||||
/**
|
||||
* Sends an asynchronous HTTP request to the vre manager in order to create a
|
||||
* new job.
|
||||
* @param {File} file - The file to process with the job.
|
||||
* @param {string} language - The language of the file contents.
|
||||
* @param {string} name - A name for the job, this doesn't have to be unique.
|
||||
* @param {string} service - The service this job belongs to.
|
||||
* @param {string} user - The user this job belongs to.
|
||||
* @param {function} [callback] - A function, that is called after the HTTP
|
||||
* request finished.
|
||||
*/
|
||||
function createJob(file, language, name, service, user, callback = console.log) {
|
||||
var formData;
|
||||
var oReq;
|
||||
var query;
|
||||
|
||||
formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
oReq = new XMLHttpRequest();
|
||||
oReq.addEventListener("loadend", function(oEvent) {
|
||||
if (this.status === 201) {
|
||||
M.toast({html: "Auftrag '" + name + "' wurde erstellt."});
|
||||
callback(JSON.parse(this.responseText));
|
||||
} else {
|
||||
if (this.status === 415) {
|
||||
M.toast({html: "Auftrag '" + name + "' konnte nicht erstellt werden.<br>(Unerwarteter Dateityp)"});
|
||||
} else {
|
||||
M.toast({html: "Auftrag '" + name + "' konnte nicht erstellt werden.<br>(Fehlercode: " + this.status + ")"});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
query = "?language=" + language + "&name=" + name + "&service=" + service + "&user=" + user;
|
||||
|
||||
oReq.open("POST", VREMANAGER + "/vre/jobs" + query);
|
||||
oReq.send(formData);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sends an asynchronous HTTP request to the vre manager in order to delete an
|
||||
* existing job.
|
||||
* @param {string} id - The id of the job to be deleted.
|
||||
* @param {function} [callback] - A function, that is called after the HTTP
|
||||
* request finished.
|
||||
*/
|
||||
function deleteJob(id, callback = console.log) {
|
||||
var oReq;
|
||||
|
||||
oReq = new XMLHttpRequest();
|
||||
oReq.addEventListener("loadend", function(oEvent) {
|
||||
if (this.status === 204) {
|
||||
M.toast({html: "Auftrag '" + id + "' wurde gelöscht."});
|
||||
callback();
|
||||
} else {
|
||||
M.toast({html: "Auftrag '" + id + "' konnte nicht gelöscht werden.<br>(Fehlercode: " + this.status + ")"});
|
||||
}
|
||||
});
|
||||
|
||||
oReq.open("DELETE", VREMANAGER + "/vre/jobs/" + id);
|
||||
oReq.send();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sends an asynchronous HTTP request to the vre manager in order to get all
|
||||
* informations about a specific and existing job.
|
||||
* @param {string} id - The id of the job to get the information from.
|
||||
* @param {function} [callback] - A function, that is called after the HTTP
|
||||
* request finished.
|
||||
*/
|
||||
function getJob(id, callback = console.log) {
|
||||
var oReq;
|
||||
|
||||
oReq = new XMLHttpRequest();
|
||||
oReq.addEventListener("loadend", function(oEvent) {
|
||||
if (this.status === 200) {
|
||||
callback(JSON.parse(this.responseText));
|
||||
}
|
||||
});
|
||||
|
||||
oReq.open("GET", VREMANAGER + "/vre/jobs/" + id);
|
||||
oReq.send();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sends an asynchronous HTTP request to the vre manager in order to get a
|
||||
* (filtered) list of all jobs.
|
||||
* @param {object} [filters=null] - An object containing filters. E.g.
|
||||
* {'service': 'nlp', 'status': 'finished'}
|
||||
* @param {function} [callback] - A function, that is called after the HTTP
|
||||
* request finished.
|
||||
*/
|
||||
function getJobs(filters = null, callback = console.log) {
|
||||
var oReq;
|
||||
var query;
|
||||
|
||||
oReq = new XMLHttpRequest();
|
||||
oReq.addEventListener("loadend", function(oEvent) {
|
||||
if (this.status === 200) {
|
||||
callback(JSON.parse(this.responseText));
|
||||
}
|
||||
});
|
||||
|
||||
query = ""
|
||||
if (filters) {
|
||||
if (filters["name"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "name=" + filters["name"];
|
||||
}
|
||||
if (filters["service"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "service=" + filters["service"];
|
||||
}
|
||||
if (filters["status"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "status=" + filters["status"];
|
||||
}
|
||||
if (filters["user"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "user=" + filters["user"];
|
||||
}
|
||||
}
|
||||
|
||||
oReq.open("GET", VREMANAGER + "/vre/jobs" + query);
|
||||
oReq.send();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sends an asynchronous HTTP request to the vre manager in order to update the
|
||||
* values of a specific and existing job.
|
||||
* @param {string} id - The id of the job to update.
|
||||
* @param {object} [newValues=null] - An object containing the new values. E.g.
|
||||
* {'status': 'finished'}
|
||||
* @param {function} [callback] - A function, that is called after the HTTP
|
||||
* request finished.
|
||||
*/
|
||||
function updateJob(id, newValues = null, callback = console.log) {
|
||||
var oReq;
|
||||
var query;
|
||||
|
||||
oReq = new XMLHttpRequest();
|
||||
oReq.addEventListener("loadend", function(oEvent) {
|
||||
if (this.status === 200) {
|
||||
M.toast({html: "Auftrag '" + id + "' wurde aktualisiert."});
|
||||
callback(JSON.parse(this.responseText));
|
||||
} else {
|
||||
M.toast({html: "Auftrag '" + id + "' konnte nicht aktualisiert werden.<br>(Fehlercode: " + this.status + ")"});
|
||||
}
|
||||
});
|
||||
|
||||
query = ""
|
||||
if (newValues) {
|
||||
if (newValues["name"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "name=" + newValues["name"];
|
||||
}
|
||||
if (newValues["language"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "language=" + newValues["language"];
|
||||
}
|
||||
if (newValues["report"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "report=" + newValues["report"];
|
||||
}
|
||||
if (newValues["status"]) {
|
||||
query += (query === "") ? "?" : "&";
|
||||
query += "status=" + newValues["status"];
|
||||
}
|
||||
}
|
||||
|
||||
oReq.open("PUT", VREMANAGER + "/vre/jobs/" + id + query);
|
||||
oReq.send();
|
||||
}
|
295
vre_www/website/nlp.html
Normal file
295
vre_www/website/nlp.html
Normal file
@ -0,0 +1,295 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Linguistische Datenverarbeitung</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<header>
|
||||
<ul id="main-nav-account-dropdown" class="dropdown-content">
|
||||
<li>
|
||||
<a href="account.html">Benutzerverwaltung</a>
|
||||
</li>
|
||||
<li class="divider"></li>
|
||||
<li>
|
||||
<a href="javascript:logout();">Abmelden</a>
|
||||
</li>
|
||||
</ul>
|
||||
<nav class="blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="nav-wrapper">
|
||||
<a href="portal.html" class="brand-logo">
|
||||
<i class="material-icons hide-on-small-only">cloud</i>
|
||||
Opaque
|
||||
</a>
|
||||
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
|
||||
<i class="material-icons">menu</i>
|
||||
</a>
|
||||
<ul id="main-nav" class="right hide-on-med-and-down">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
|
||||
<i class="material-icons left">account_circle</i>
|
||||
Benutzer
|
||||
<i class="material-icons right">arrow_drop_down</i>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<ul class="sidenav" id="main-nav-mobile">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="account.html">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Benutzer
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="javascript:logout();">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Abmelden
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12 m8">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Linguistische Datenverarbeitung</span>
|
||||
<p>
|
||||
Mit Hilfe computergestützter linguistischer
|
||||
Datenverarbeitungsmethoden können Textdateien mit weiteren
|
||||
Informationen angereichert werden. Auf dieser Plattform werden
|
||||
derzeit die im folgenden aufgeführten
|
||||
Textverarbeitungsroutinen in automatisierter Form zur
|
||||
Verfügung gestellt.
|
||||
</p>
|
||||
<div class="row">
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Tokenisierung
|
||||
</span>
|
||||
<p>
|
||||
Aufteilung eines Textes in Sätze und Wörter. Dies
|
||||
ist zur weiteren Verarbeitung notwendig.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Lemmatisierung
|
||||
</span>
|
||||
<p>
|
||||
Reduktion der Flexionsformen eines Wortes auf dessen
|
||||
Grundform.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Part-of-speech-Tagging
|
||||
</span>
|
||||
<p>
|
||||
Kontext- und definitionsbezogene Zuordnung von Wörtern
|
||||
und Satzzeichen zu Wortarten.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Eigennamenerkennung
|
||||
</span>
|
||||
<p>
|
||||
Identifikation von Wörtern, die eine Entität
|
||||
beschreiben, wie Firmen- und Personennamen.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>
|
||||
Die Ausgabe erfolgt in Form einer <i>verticalized text</i>
|
||||
Datei (Dateiendung: .vrt). In diesem Format kann der Text
|
||||
einfach in Auswertungsprogramme, wie zum Beispiel der <a href="http://cwb.sourceforge.net/">IMS Open Corpus Workbench</a>,
|
||||
eingefügt werden.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftrag erstellen</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>
|
||||
Auftragsformular zur linguistische Datenverarbeitung von
|
||||
Textdateien.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="#create_job" class="waves-effect waves-light btn modal-trigger">Erstellen</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftragsübersicht</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
|
||||
Hier wird der aktuelle Status sämtlicher vom Benutzer
|
||||
erstellten Aufträge aufgelistet.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="create_job" class="modal overflow-visible">
|
||||
<div class="modal-content">
|
||||
<h4 style="line-height: 60px;"><i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>Linguistische Datenverarbeitung</h4>
|
||||
<p>
|
||||
Um den Auftrag abschicken zu können, müssen alle Formularfelder
|
||||
ausgefüllt werden. Anschließend erhalten Sie fortlaufende
|
||||
Benachrichtigungen über den aktuellen Bearbeitungsstand des
|
||||
Auftrags. Nach erfolgreicher Bearbeitung, stehen die Ergebnisse in
|
||||
der <a href="job_manager.html">Auftragsverwaltung</a> zum Download
|
||||
zur Verfügung.
|
||||
</p>
|
||||
<form id="job_form" class="row">
|
||||
<div class="input-field col s12">
|
||||
<input placeholder="Mein Auftragsname" id="job_form_name" type="text" class="validate">
|
||||
<label for="job_form_name">Auftragsname</label>
|
||||
</div>
|
||||
<div class="file-field input-field col s12 m6">
|
||||
<div class="btn">
|
||||
<span>Datei</span>
|
||||
<input id="job_form_file" type="file" accept="text/plain">
|
||||
</div>
|
||||
<div class="file-path-wrapper">
|
||||
<input class="file-path validate" type="text">
|
||||
</div>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<select id="job_form_language">
|
||||
<option value="" disabled selected>Option auswählen</option>
|
||||
<option value="de">Deutsch</option>
|
||||
<option value="en">Englisch</option>
|
||||
<option value="fr">Französisch</option>
|
||||
<option value="pt">Portugisisch</option>
|
||||
<option value="es">Spanisch</option>
|
||||
</select>
|
||||
<label>Sprache der Eingabedatei</label>
|
||||
</div>
|
||||
<div class="input-field col s12 m6 hide">
|
||||
<select id="job_form_service">
|
||||
<option value="nlp" selected></option>
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<a href="#!" class="modal-close waves-effect waves-green btn-flat">Abbrechen</a>
|
||||
<a href="javascript:jobFormSubmitHandler();" id="job_form_submit" class="modal-close waves-effect waves-green btn disabled">Abschicken</a>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer class="page-footer blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<p class="grey-text text-lighten-4 left">
|
||||
<a href="https://www.uni-bielefeld.de/">
|
||||
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
|
||||
</a>
|
||||
</p>
|
||||
<p class="grey-text text-lighten-4 right">
|
||||
Gefördert durch die<br>
|
||||
<a href="https://www.dfg.de/">
|
||||
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-copyright">
|
||||
<div class="container">
|
||||
© 2019 Copyright
|
||||
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
|
||||
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
<script src="js/job_form_handler.js"></script>
|
||||
</body>
|
||||
</html>
|
293
vre_www/website/ocr.html
Normal file
293
vre_www/website/ocr.html
Normal file
@ -0,0 +1,293 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Texterkennung</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<header>
|
||||
<ul id="main-nav-account-dropdown" class="dropdown-content">
|
||||
<li>
|
||||
<a href="account.html">Benutzerverwaltung</a>
|
||||
</li>
|
||||
<li class="divider"></li>
|
||||
<li>
|
||||
<a href="javascript:logout();">Abmelden</a>
|
||||
</li>
|
||||
</ul>
|
||||
<nav class="blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="nav-wrapper">
|
||||
<a href="portal.html" class="brand-logo">
|
||||
<i class="material-icons hide-on-small-only">cloud</i>
|
||||
Opaque
|
||||
</a>
|
||||
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
|
||||
<i class="material-icons">menu</i>
|
||||
</a>
|
||||
<ul id="main-nav" class="right hide-on-med-and-down">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
|
||||
<i class="material-icons left">account_circle</i>
|
||||
Benutzer
|
||||
<i class="material-icons right">arrow_drop_down</i>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<ul class="sidenav" id="main-nav-mobile">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li class="active">
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="account.html">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Benutzer
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="javascript:logout();">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Abmelden
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12 m8">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Texterkennung</span>
|
||||
<p>
|
||||
Der hier zur Verfügung gestellte Dienst basiert auf Verfahren der
|
||||
optischen Zeichenerkennung. Dabei werden durch optische Analysemethoden
|
||||
und einen anschließenden Abgleich mit Wörterbüchern aus Bilddaten,
|
||||
wie Fotos oder Scans, Textdaten erzeugt. Erst dieser Vorverarbeitungsschritt
|
||||
ermöglicht eine computergestützte Verarbeitung von Dokumenten.
|
||||
</p>
|
||||
<div class="row">
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Eingabe von Bilddaten
|
||||
</span>
|
||||
<p>
|
||||
Über ein Auftragsformular können Bilddaten in Form von
|
||||
PDF-Dateien hochgeladen werden.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Optische Zeichenerkennung
|
||||
</span>
|
||||
<p>
|
||||
Die optische Zeichenerkennung erfolgt in der
|
||||
Recheninfrastruktur der Plattform.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Fehlerkorrektur
|
||||
</span>
|
||||
<p>
|
||||
Je nach Qualität der Eingabedaten kann es zu
|
||||
Fehlern kommen, die korrigiert werden sollten.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m6">
|
||||
<div class="card z-depth-0">
|
||||
<div class="card-content">
|
||||
<span class="card-title">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
|
||||
Weiterverarbeitung
|
||||
</span>
|
||||
<p>
|
||||
Die Textdaten können weiterverarbeitet<a class="tooltipped" data-position="top" data-tooltip="Zum Beispiel durch die hier angebotene linguistische Datenverarbeitung."><sup>[*]</sup></a>
|
||||
oder in dieser Form bereits genutzt<a class="tooltipped" data-position="top" data-tooltip="Zum Beispiel mit dem Programm "AntConc"."><sup>[*]</sup></a> werden.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<p>
|
||||
Die Ausgabe dieses Dienstes erfolgt in Form von Textdateien,
|
||||
PDF Dateien mit hinterlegtem Text und <a href="https://tei-c.org/guidelines/p5/" target="_blank">TEI P5</a>
|
||||
konformen XML Dateien.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col s12 m4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftrag erstellen</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>
|
||||
Auftragsformular zur Texterkennung von PDF-Dateien.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="#create_job" class="waves-effect waves-light btn modal-trigger">Erstellen</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftragsübersicht</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
|
||||
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
|
||||
Aufträge aufgelistet.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="create_job" class="modal overflow-visible">
|
||||
<div class="modal-content">
|
||||
<h4 style="line-height: 60px;"><i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>Texterkennung</h4>
|
||||
<p>
|
||||
Um den Auftrag erstellen zu können, müssen alle Formularfelder
|
||||
ausgefüllt werden. Nachdem der Auftrag abgeschickt wurde, erhalten
|
||||
Sie fortlaufende Benachrichtigungen über den aktuellen Bearbeitungsstand.
|
||||
Nach erfolgreicher Bearbeitung stehen die Ergebnisse in
|
||||
der <a href="job_manager.html">Auftragsübersicht</a> zum Download
|
||||
zur Verfügung.
|
||||
</p>
|
||||
<form id="job_form" class="row">
|
||||
<div class="input-field col s12">
|
||||
<input placeholder="Mein Auftragsname" id="job_form_name" type="text" class="validate">
|
||||
<label for="job_form_name">Auftragsname</label>
|
||||
</div>
|
||||
<div class="file-field input-field col s12 m6">
|
||||
<div class="btn">
|
||||
<span>Datei</span>
|
||||
<input id="job_form_file" type="file" accept="application/pdf">
|
||||
</div>
|
||||
<div class="file-path-wrapper">
|
||||
<input class="file-path validate" type="text">
|
||||
</div>
|
||||
</div>
|
||||
<div class="input-field col s12 m6">
|
||||
<select id="job_form_language">
|
||||
<option value="" disabled selected>Option auswählen</option>
|
||||
<option value="deu">Deutsch</option>
|
||||
<option value="deu_frak">Deutsch (Fraktur)</option>
|
||||
<option value="eng">Englisch</option>
|
||||
<option value="fra">Französisch</option>
|
||||
<option value="por">Portugisisch</option>
|
||||
<option value="spa">Spanisch</option>
|
||||
</select>
|
||||
<label>Sprache der Eingabedatei</label>
|
||||
</div>
|
||||
<div class="input-field col s12 m6 hide">
|
||||
<select id="job_form_service">
|
||||
<option value="ocr" selected></option>
|
||||
</select>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<a href="#!" class="modal-close waves-effect waves-green btn-flat">Abbrechen</a>
|
||||
<a href="javascript:jobFormSubmitHandler();" id="job_form_submit" class="modal-close waves-effect waves-green btn disabled">Abschicken</a>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer class="page-footer blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<p class="grey-text text-lighten-4 left">
|
||||
<a href="https://www.uni-bielefeld.de/">
|
||||
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
|
||||
</a>
|
||||
</p>
|
||||
<p class="grey-text text-lighten-4 right">
|
||||
Gefördert durch die<br>
|
||||
<a href="https://www.dfg.de/">
|
||||
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-copyright">
|
||||
<div class="container">
|
||||
© 2019 Copyright
|
||||
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
|
||||
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
<script src="js/job_form_handler.js"></script>
|
||||
</body>
|
||||
</html>
|
233
vre_www/website/portal.html
Normal file
233
vre_www/website/portal.html
Normal file
@ -0,0 +1,233 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Opaque | Portal</title>
|
||||
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
|
||||
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
|
||||
<link rel="stylesheet" href="css/materialize.min.css">
|
||||
<link rel="stylesheet" href="css/style.css">
|
||||
<script src="js/vre_manager_rest.js"></script>
|
||||
<script src="js/vre.js"></script>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body class="blue-grey lighten-5">
|
||||
<header>
|
||||
<ul id="main-nav-account-dropdown" class="dropdown-content">
|
||||
<li>
|
||||
<a href="account.html">Benutzerverwaltung</a>
|
||||
</li>
|
||||
<li class="divider"></li>
|
||||
<li>
|
||||
<a href="javascript:logout();">Abmelden</a>
|
||||
</li>
|
||||
</ul>
|
||||
<nav class="blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="nav-wrapper">
|
||||
<a href="portal.html" class="brand-logo">
|
||||
<i class="material-icons hide-on-small-only">cloud</i>
|
||||
Opaque
|
||||
</a>
|
||||
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
|
||||
<i class="material-icons">menu</i>
|
||||
</a>
|
||||
<ul id="main-nav" class="right hide-on-med-and-down">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
|
||||
<i class="material-icons left">account_circle</i>
|
||||
Benutzer
|
||||
<i class="material-icons right">arrow_drop_down</i>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<ul class="sidenav" id="main-nav-mobile">
|
||||
<li>
|
||||
<a href="job_manager.html">
|
||||
<i class="material-icons left">work</i>
|
||||
Auftragsübersicht
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="nlp.html">
|
||||
<i class="material-icons left">format_textdirection_l_to_r</i>
|
||||
Linguistische Datenverarbeitung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="ocr.html">
|
||||
<i class="material-icons left">find_in_page</i>
|
||||
Texterkennung
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="account.html">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Benutzer
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="javascript:logout();">
|
||||
<i class="material-icons">account_circle</i>
|
||||
Abmelden
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12 m8">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Portal</span>
|
||||
<p class="overflow-hidden">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left" style="font-size: 135px;">cloud</i>
|
||||
Opaque ist eine freie Plattform zur Unterstützung
|
||||
textverarbeitender Prozesse. Sie soll es ermöglichen,
|
||||
komplexe Programme zur einfachen Nutzung bereitzustellen und
|
||||
bereits etablierte Textverarbeitungsroutinen ohne die Hürde langer
|
||||
Einarbeitungszeiten zugänglich zu machen. Alle Berechnungen
|
||||
werden dabei von den vom SFB 1288 zur Verfügung gestellten
|
||||
Servern ausgeführt. So können Aufträge bearbeitet werden, ohne
|
||||
dass der Computer des Benutzers eingeschaltet sein muss.
|
||||
</p>
|
||||
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
<div class="divider"></div>
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
|
||||
<p class="overflow-hidden">
|
||||
<i class="material-icons blue-grey-text text-darken-2 left" style="font-size: 135px;">lock_open</i>
|
||||
Alle hier zur Verfügung gestellten Dienste wurden durch den
|
||||
Einsatz freier Software realisiert. Um den Ansprüchen modernen
|
||||
Forschungsdatenmanagements zu entsprechen, muss die
|
||||
Nachnutzbarkeit von Ergebnissen sichergestellt werden. Aus diesem Grund
|
||||
wird bewusst auf proprietäre Dateiformate verzichtet. Stattdessen werden nur
|
||||
standardisierte und offene Formate genutzt, die eine
|
||||
einfache Verbreitung und Weiterverarbeitung ermöglichen.
|
||||
</p>
|
||||
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
<div class="divider"></div>
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
|
||||
<p class="overflow-hidden">
|
||||
<img src="images/sfb_1288.png" alt="Logo: SFB 1288" class="left" style="width: 135px; height: 135px; margin-right: 15px;">
|
||||
Diese Seite wurde vom Teilprojekt INF des Sonderforschungsbereich 1288
|
||||
vor dem Hintergrund der gesammelten Erfahrungen entwickelt, die in einem
|
||||
Pilotprojekt zur digitalen Verarbeitung von Textdaten im SFB gemacht wurden.<br>
|
||||
Für Fragen und Anregungen steht das TP INF gerne via E-Mail zur Verfügung:
|
||||
<br><a href="mailto:mailto:inf_sfb1288@lists.uni-bielefeld.de">inf_sfb1288@lists.uni-bielefeld.de</a>
|
||||
</p>
|
||||
|
||||
<p> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="col s12 m4">
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Linguistische Datenverarbeitung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
|
||||
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
|
||||
weiteren Informationen angereichert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Texterkennung</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
|
||||
Textdaten werden aus Scans oder Fotos zur anschließenden
|
||||
Weiterverarbeitung extrahiert.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-content">
|
||||
<span class="card-title">Auftragsübersicht</span>
|
||||
<p>
|
||||
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
|
||||
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
|
||||
Aufträge aufgelistet.
|
||||
</p>
|
||||
</div>
|
||||
<div class="card-action right-align">
|
||||
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
||||
<footer class="page-footer blue-grey darken-2">
|
||||
<div class="container">
|
||||
<div class="row">
|
||||
<div class="col s12">
|
||||
<p class="grey-text text-lighten-4 left">
|
||||
<a href="https://www.uni-bielefeld.de/">
|
||||
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
|
||||
</a>
|
||||
</p>
|
||||
<p class="grey-text text-lighten-4 right">
|
||||
Gefördert durch die<br>
|
||||
<a href="https://www.dfg.de/">
|
||||
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-copyright">
|
||||
<div class="container">
|
||||
© 2019 Copyright
|
||||
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
|
||||
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
|
||||
</div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="js/materialize.min.js"></script>
|
||||
<script>
|
||||
M.AutoInit();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user