Add prototype

This commit is contained in:
Patrick Jentsch 2019-06-03 14:57:09 +02:00
parent b8fa8f47ab
commit 86557443a2
43 changed files with 24638 additions and 53 deletions

View File

81
ask_for_jobs Executable file
View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# coding=utf-8
import json
import os
import subprocess
from threading import Thread
import urllib.parse
import urllib.request
ERRORRMESSAGE = urllib.parse.quote("Bei der Verarbeitung der Daten ist ein Fehler aufgetreten.")
VRE_MANAGER = "http://localhost:5000/vre/jobs"
IMAGES = {
"nlp": "gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node",
"ocr": "gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node"
}
def manage_jobs(service):
# Get queued jobs
queued = json.load(
urllib.request.urlopen(VRE_MANAGER + "?service=" + service + "&status=queued")
)
# Return if no jobs are available
if len(queued) == 0:
return
# Get a list of compute nodes
compute_nodes = subprocess.run(
["docker", "ps", "--filter", "ancestor=" + IMAGES[service], "--format", "{{.Names}}"],
encoding="utf-8",
stdout=subprocess.PIPE
).stdout.split()
# Filter occupied compute nodes out
compute_nodes = [compute_node for compute_node in compute_nodes if bool(subprocess.run(["docker", "exec", compute_node, "test", "-f", "pyflow.data/active_pyflow_process.txt"]).returncode)]
# Return if no compute nodes are available
if len(compute_nodes) == 0:
return
# Assign jobs to compute nodes
job_assignments = zip(queued, compute_nodes)
for job_assignment in job_assignments:
# Prepare and start the assigned jobs
jobThread = Thread(target=start_job, args=(service, job_assignment[1], job_assignment[0]))
jobThread.start()
def start_job(service, compute_node, job):
# Update job status to "running"
urllib.request.urlopen(
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=running", method="PUT")
)
# Start the service
process = subprocess.run(
["docker", "exec", compute_node, service, "-i", "/root/vre_files/jobs/" + job["id"], "-o", "/root/vre_files/jobs/" + job["id"], "-l", job["language"]],
stderr=subprocess.DEVNULL,
stdout=subprocess.DEVNULL
)
if process.returncode != 0:
urllib.request.urlopen(
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=failed&report=" + ERRORRMESSAGE, method="PUT")
)
return
# Update job status to "finished"
urllib.request.urlopen(
urllib.request.Request(url=VRE_MANAGER + "/" + job["id"] + "?status=finished", method="PUT")
)
# TODO: Send E-Mail to user
manage_jobs("ocr")
manage_jobs("nlp")

View File

@ -1,39 +0,0 @@
## Manager Service:
- Joblist
- Thread safe
- REST API (https://github.com/zalando/connexion)
- CreateNewJob
- DelteJobs
- GetJobInfo
- AlterJob
- Scheduling
- Ressource management
- manages also files on file server
## Web Server
- serves content.
- serves forms for user request and inputs
- has copy of Joblist to display those for the user according to requests etc.
- talks to the Manager service
- Users CANNOT talk directly with the manager
- has list of all currently running user sessions (maybe used for authentication)
## Authentication via LDAP (not sure if we need that)
- university internal authentication with LDAP
- LDAP and connexion can use (OAuth 2)
- https://ldapwiki.com/wiki/Best%20Practices%20for%20LDAP%20Security
- https://connexion.readthedocs.io/en/latest/security.html?highlight=authentication
- university external authentication with something else
## Compute pool: Docker Cluster
- How to handle job and resource management for user Jobs and processes.
- gets requests and tasks from the manager
## File Server
- stores user input and
- output files
- Maybe WebDAV
## Database
- document based data base like mongoDB preferred

View File

@ -1 +0,0 @@
<mxfile modified="2019-04-17T12:49:28.623Z" host="www.draw.io" agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0" etag="JmPgRII5KerP40fNuYKp" version="10.6.3" type="device"><diagram id="u0ifZDnm1cNV0CSORuqK" name="Page-1">3VpZc9o6FP41zKQPyXhhCY+U5ba905lOaSfNU0fYAruRJSrJYPLr75Et45XgsIUbXpCOtZzlO8efBC17GET/cLT0vjIXk5ZluFHLHrUsyzLsNnwpySaRmKbdSSQL7rtalgmm/jPWQkNLQ9/FojBQMkakvywKHUYpdmRBhjhn6+KwOSPFXZdogSuCqYNIVfrgu9JLpPdWL5N/wv7CS3c2u/3kSYDSwdoS4SGXrXMie9yyh5wxmbSCaIiJ8l7ql2TeZMfTrWIcU9lkwvSPYfwcPM+eJ/aPaCxR/9Ye3+pVVoiE2uCHhwetr9ykThBrPyCIQu+jHo+5xNFORcyteQAMzAIs+QaG6Alt7ZBNsbvO3LuVeTnX2qkjkQ7pYrtyZjU0tOGvcIJVcULL6hLY9qPrr6C5kLHhiWiWCiYEiadUCvvOKiN5WQKjalasEc0ZuDIfgu7fkKUPbkWcJQMYYFrLKHsYpwFhvDCjZdn9+JMXpbvZMOUPA80N4gsZz1+qSN1wjFz4ZpRsPuS0T/T6Hxi1wFLAF8dzjoWHlS0YQKtsExhqhbvHqANit0I88ebvxJdHg+B4/BzgOI7/hjiBAnI8/KIup0DD4bGdk/CQyDaMo/bD74IXrjt0AgvhMypO44Ca1a7Y9lBgpbRPJebIkaC4QkhI4+Y+jxygsVpaVU228OlNSFEAGBkqPZZIiDXj7ofXLgWo/sJm4mbuE7BBNJ9/d3f3tsVm++IbLJNkmVDwSpRbp/JqvIhae6eWWA4wGiX3ZADsYWRCU0jOnvBQw5GymAFBgEhJhIi/oNAleB4nELAjHyjkQIslW4J07fkST8FFars10GWQcRZSFytuYmw12sOtdtO4KuHSDMtuyLCs/rkYVqfCsL4iCtSZn5lqdqxr45rdJlwTOItgIQekFOnZ+ZMnv7OqqeIiibi/tAzV8S6Ky/oRVeX7ePoDDBt8+xwfBfnF3Vux699whjnFEovjDMuFLYgzK8D0FRz0/VTCzpVXwl4l/yc+UdG/aB3c9t+uEN43KYSxbzBfqRdFw1SYTj+pg6vK7TWeuWj1/jHfu3LM96uXTIw/vfzyFxJxqa8DgeGfJAtMw7g2OpBec9akwfYtMGLOUyEBZu8RxP0rB7FZrdyaTjgsCEIK3kqOnWVMe2ipmnOCo4G6kAYvYerq5sghcGD0nRTxVXEuluBAvvmlPR13HlXnrpN2R1H+4WiT9iJf/krXgHZuFvSySaqzKQQSu5Xb8VKyga0J8dh7vQrmwRl3HzWuIiAX8k5NxFMZxwQCsCqqW4cCvcM35seXDfWX0n2zhKPESj0pf8teWqdfXOe+W1on8UJlnRiSW6OPuMqu1pPkpBVfjgDEqFNHNk6I0QOwdiiuT4jRbkOMmkbrDUF6bxbRZZntA1Ha3bPQuWFa/cnle/KLA1IXuMaaw4tGndHqufE5SmqGtscC2F4uqRm0H3OgP3tJ7TSE6w5meBm0dkoga5drYVO03pdp26WLavX6qvYMvhvAjU4gZ6jCRqEK95qWYT2yaTKcENhpgbtuZHc7RUDavQOR3SsV9HaZvp4b2TWkNoHxkZW3rsSaBUzlMLa3xlr5Imtcqsj2GkLReksoXj1vhW72H5dkePZXIXv8Hw==</diagram></mxfile>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 53 KiB

View File

@ -1,13 +0,0 @@
##### books:
- Flask By Example
http://web.b.ebscohost.com/ehost/detail/detail?vid=3&sid=88df57d6-b0af-42aa-aeda-3f96f2d80c8e%40sessionmgr120&bdata=JnNpdGU9ZWhvc3QtbGl2ZQ%3d%3d#AN=1215157&db=nlebk
#### videos:
- Learning Flask
- https://www.lynda.com/Flask-tutorials/Learning-Flask/704154-2.html
- Building Web APIs with Flask
- https://www.lynda.com/Flask-tutorials/Web-API-Development-Flask/521200-2.html?
#### online:
- The Flask Mega-Tutorial
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world

84
start.sh Executable file
View File

@ -0,0 +1,84 @@
#!/bin/bash
COLOR_BLUE="\033[0;34m"
COLOR_END="\033[0m"
COLOR_GREEN="\033[0;32m"
PROTOTYPE_URL="http://"$(hostname -I | cut -d' ' -f1)"/vre"
if [ "$1" == "-clean" ]; then
echo -e "${COLOR_BLUE}Remove existing prototype related containers...${COLOR_END}"
docker rm -f \
vre_manager \
vre_nlp_node \
vre_ocr_node \
vre_www
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Remove existing prototype related images...${COLOR_END}"
docker rmi \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_manager \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_www
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Remove the existing prototype related volume...${COLOR_END}"
docker volume rm vre_files
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
exit 0;
fi
echo -e "${COLOR_BLUE}Build container images from corresponding directories...${COLOR_END}"
for dir in *; do
if [ -d "$dir" ]; then
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/"$dir" "$dir"
fi
done
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Create prototype files volume...${COLOR_END}"
docker volume create vre_files
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Start prototype manager container...${COLOR_END}"
docker run \
--name vre_manager \
-d \
-p 5000:5000 \
-v vre_files:/root/vre_files \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_manager:latest
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Start prototype nlp node container...${COLOR_END}"
docker run \
--name vre_nlp_node \
-dit \
-v vre_files:/root/vre_files \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node:latest
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Start prototype ocr node container...${COLOR_END}"
docker run \
--name vre_ocr_node \
-dit \
-v vre_files:/root/vre_files \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node:latest
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_BLUE}Start prototype www container...${COLOR_END}"
docker run \
--name vre_www \
-d \
-p 80:80 \
-v vre_files:/usr/share/nginx/html/vre_files \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_www:latest
echo -e "${COLOR_GREEN}Done!${COLOR_END}"
echo -e "${COLOR_GREEN}The prototype is now completly loaded and reachable under ${PROTOTYPE_URL}!${COLOR_END}"
while true; do
sleep 30s
echo -e "${COLOR_BLUE}Ask for jobs...${COLOR_END}"
$(dirname "$0")/ask_for_jobs
done

23
vre_manager/Dockerfile Normal file
View File

@ -0,0 +1,23 @@
FROM python:3-slim
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
ENV PYTHONDONTWRITEBYTECODE=1
EXPOSE 5000
RUN pip install \
connexion[swagger-ui] \
flask-cors
RUN mkdir -p \
/root/vre_manager/request_handlers \
/root/vre_manager/swagger
WORKDIR /root/vre_manager
COPY vre_manager.py /root/vre_manager
COPY swagger/vre_manager.yml /root/vre_manager/swagger
COPY request_handlers/jobs.py /root/vre_manager/request_handlers
ENTRYPOINT ["python", "/root/vre_manager/vre_manager.py"]

View File

@ -0,0 +1,114 @@
import flask
import os
UPLOAD_DIRECTORY = os.path.abspath("/root/vre_files/jobs")
job_counter = 0
jobs = []
def create_job(file, language, name, service, user):
global job_counter
if (service == "nlp" and file.content_type != "text/plain"):
flask.abort(415)
if (service == "ocr" and file.content_type != "application/pdf"):
flask.abort(415)
job = {}
job["file"] = file.filename
job["id"] = "job-" + str(job_counter + 1)
job["language"] = language
job["name"] = name
job["report"] = ""
job["service"] = service
job["status"] = "queued"
job["user"] = user
job_directory = os.path.join(UPLOAD_DIRECTORY, job["id"])
if not os.path.exists(job_directory):
os.makedirs(job_directory)
file.save(os.path.join(job_directory, file.filename))
jobs.append(job)
job_counter += 1
return flask.make_response(flask.jsonify(job), 201)
def delete_job(id):
job_directory = os.path.join(UPLOAD_DIRECTORY, id)
for job in jobs:
if job["id"] == id:
if job["status"] != "queued":
flask.abort(423)
os.remove(os.path.join(job_directory, job["file"]))
os.rmdir(job_directory)
job["file"] = None
job["language"] = None
job["name"] = None
job["report"] = None
job["service"] = None
job["status"] = "deleted"
return flask.make_response("", 204)
flask.abort(404)
def get_job(id):
for job in jobs:
if job["id"] == id:
return job
flask.abort(404)
def get_jobs(name = None, service = None, status = None, user = None):
filtered_jobs = []
for job in jobs:
if name and job["name"] != name:
continue
if service and job["service"] != service:
continue
if status and job["status"] != status:
continue
if user and job["user"] != user:
continue
filtered_jobs.append(job)
return filtered_jobs
def update_job(id, file = None, language = None, name = None, report = None, status = None):
job_directory = os.path.join(UPLOAD_DIRECTORY, id)
for job in jobs:
if job["id"] == id:
if status:
job["status"] = status
if report:
job["report"] = report
return job
if job["status"] != "queued":
flask.abort(423)
if file:
if (job["service"] == "nlp" and file.content_type != "text/plain"):
flask.abort(415)
if (job["service"] == "ocr" and file.content_type != "application/pdf"):
flask.abort(415)
os.remove(os.path.join(job_directory, job["file"]))
file.save(os.path.join(job_directory, file.filename))
job["file"] = file.filename
if language:
job["language"] = language
if report:
job["report"] = report
if name:
job["name"] = name
return job
flask.abort(404)

View File

@ -0,0 +1,225 @@
swagger: "2.0"
info:
contact:
email: "p.jentsch@uni-bielefeld.de"
name: "Patrick Jentsch"
title: "SFB 1288 | INF: Plattformname - ReST API"
version: "1.0.0"
consumes:
- application/json
produces:
- application/json
basePath: /vre
definitions:
# Enums
Service:
type: string
enum: &SERVICES
- ocr
- nlp
Status:
type: string
enum: &STATUSES
- deleted
- failed
- finished
- queued
- running
Job:
type: object
properties:
file:
type: string
id:
type: string
language:
type: string
name:
type: string
report:
type: string
service:
type: string
enum: *SERVICES
status:
type: string
enum: *STATUSES
user:
type: string
Jobs:
type: array
items:
$ref: '#/definitions/Job'
paths:
/jobs:
get:
operationId: request_handlers.jobs.get_jobs
tags:
- jobs
summary: Get a list of all jobs matching the filter(s)
description: Get a list of all jobs matching the filter(s)
parameters:
- name: name
in: query
type: string
description: The name to filter for
required: False
- name: service
in: query
type: string
enum: *SERVICES
description: The service to filter for
required: False
- name: status
in: query
type: string
description: The status to filter for
required: False
- name: user
in: query
type: string
description: The user to filter for
required: False
responses:
200:
description: OK
schema:
$ref: '#/definitions/Jobs'
post:
operationId: request_handlers.jobs.create_job
tags:
- jobs
summary: Create a new job
description: Create a new job
consumes:
- multipart/form-data
parameters:
- name: file
in: formData
type: file
description: File
required: True
- name: language
in: query
description: Language
type: string
required: True
- name: name
in: query
description: Name
type: string
required: True
- name: service
in: query
description: Service
type: string
enum: *SERVICES
required: True
- name: user
in: query
description: User
type: string
required: True
responses:
201:
description: Created
schema:
$ref: '#/definitions/Job'
415:
description: Unsupported Media Type
/jobs/{id}:
delete:
operationId: request_handlers.jobs.delete_job
tags:
- jobs
summary: Delete job (specified by {job_id})
description: Delete job (specified by {job_id})
parameters:
- name: id
in: path
description: Job identifier
type: string
required: True
responses:
204:
description: No Content
404:
description: Not Found
423:
description: Locked
get:
operationId: request_handlers.jobs.get_job
tags:
- jobs
summary: Get job (specified by {job_id})
description: Get job (specified by {job_id})
parameters:
- name: id
in: path
description: Job identifier
type: string
required: True
responses:
200:
description: OK
schema:
$ref: '#/definitions/Job'
404:
description: Not Found
put:
operationId: request_handlers.jobs.update_job
tags:
- jobs
summary: Update job (specified by {job_id})
description: Update job (specified by {job_id})
consumes:
- multipart/form-data
parameters:
- name: file
in: formData
type: file
description: File
required: False
- name: id
in: path
description: Job identifier
type: string
required: True
- name: language
in: query
description: Language
type: string
required: False
- name: name
in: query
description: Name
type: string
required: False
- name: report
in: query
description: Report
type: string
required: False
- name: status
in: query
description: Status
type: string
enum: *STATUSES
required: False
responses:
200:
description: OK
schema:
$ref: '#/definitions/Job'
404:
description: Not Found
415:
description: Unsupported Media Type
423:
description: Locked

View File

@ -0,0 +1,7 @@
import connexion
import flask_cors
app = connexion.FlaskApp(__name__, specification_dir='swagger/')
app.add_api('vre_manager.yml')
flask_cors.CORS(app.app)
app.run(port=5000)

43
vre_nlp_node/Dockerfile Normal file
View File

@ -0,0 +1,43 @@
FROM debian:stretch-slim
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
ENV LANG=C.UTF-8
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
python2.7 \
python3 \
python3-dev \
python3-pip \
python3-setuptools \
wget \
zip
WORKDIR /root
# Install pyFlow
ENV PYFLOW_VERSION 1.1.20
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
rm pyflow-"$PYFLOW_VERSION".tar.gz && \
cd pyflow-"$PYFLOW_VERSION" && \
python2.7 setup.py build install && \
cd ..
# Install spaCy
RUN pip3 install wheel && pip3 install -U spacy && \
python3 -m spacy download de && \
python3 -m spacy download en && \
python3 -m spacy download es && \
python3 -m spacy download fr && \
python3 -m spacy download pt
RUN mkdir files_for_nlp files_from_nlp
COPY nlp /usr/local/bin
COPY spacy_nlp /usr/local/bin
CMD ["/bin/bash"]

38
vre_nlp_node/README.md Normal file
View File

@ -0,0 +1,38 @@
# vre_nlp_node
Dieses Repository stellt ein Dockerfile zur Erstellung eines Dockerimages zur linguistischen Datenverarbeitung (NLP) zur Verfügung. Es werden Textdateien entgegengenommen und verticalized text-Dateien ausgegeben.
## Dockerimage erstellen
Die GitLab Registry stellt ein automatisch erstelltes Dockerimage zur Verfügung, das stehts die neusten Änderungen beinhaltet. Das Dockerimage kann aber auch lokal erstellt werden, dazu muss folgender Befehl ins Terminal eingegeben werden.
```bash
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node .
```
## Nutzung
### Starten eines Dockercontainers
```bash
docker run \
--name <containername> \
-dit \
-v <datalocation>/files_for_nlp:/root/files_for_nlp \
-v <datalocation>/files_from_nlp:/root/files_from_nlp \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_nlp_node
```
### Daten zur linguistischen Datenverarbeitung in das Eingabeverzeichnis kopieren
```bash
cp <textfile1> <textfile2> ... <textfilen> <datalocation>/files_for_nlp
```
### Linguistische Datenverarbeitung starten
```bash
docker exec -it <containername> ocr -i /root/files_for_nlp -o /root/files_from_nlp -l <languagecode>
```
Valide Angaben für `<languagecode>` sind:
* de (Deutsch)
* en (Englisch)
* es (Spanish)
* fr (Französisch)
* pt (Portugisisch)
Nach Beendigung des Vorgangs, stehen die aufbereitet Daten im Verzeichnis `<datalocation>/files_from_nlp` zur Verfügung.

131
vre_nlp_node/nlp Executable file
View File

@ -0,0 +1,131 @@
#!/usr/bin/env python2.7
# coding=utf-8
"""
nlp
Usage: For usage instructions run with option --help
Author: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
"""
import argparse
import multiprocessing
import os
import sys
from pyflow import WorkflowRunner
def parse_arguments():
parser = argparse.ArgumentParser(
"Performs NLP of documents utilizing spaCy. \
Output is .vrt."
)
parser.add_argument("-i",
dest="inputDir",
help="Input directory.",
required=True)
parser.add_argument("-l",
dest='lang',
help="Language for NLP",
required=True)
parser.add_argument("-o",
dest="outputDir",
help="Output directory.",
required=True)
parser.add_argument("--nCores",
default=multiprocessing.cpu_count(),
dest="nCores",
help="Total number of cores available.",
required=False,
type=int)
return parser.parse_args()
class NLPWorkflow(WorkflowRunner):
def __init__(self, jobs, lang, nCores):
self.jobs = jobs
self.lang = lang
self.nCores = nCores
def workflow(self):
###
# Task "mkdir_job": create output directories
# Dependencies: None
###
mkdir_jobs = []
mkdir_job_number = 0
for job in self.jobs:
mkdir_job_number += 1
cmd = 'mkdir -p "%s"' % (
job["output_dir"]
)
mkdir_jobs.append(self.addTask(label="mkdir_job_-_%i" % (mkdir_job_number), command=cmd))
###
# Task "spacy_nlp_job": perform NLP
# Dependencies: mkdir_jobs
###
self.waitForTasks()
nlp_jobs = []
nlp_job_number = 0
for job in self.jobs:
nlp_job_number += 1
cmd = 'spacy_nlp -i "%s" -o "%s" -l "%s"' % (
job["path"],
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".vrt"),
self.lang
)
nlp_jobs.append(self.addTask(label="nlp_job_-_%i" % (nlp_job_number), command=cmd, dependencies=mkdir_jobs))
###
# Task "zip_job": compress output
# Dependencies: nlp_jobs
###
zip_jobs = []
zip_job_number = 0
for job in self.jobs:
zip_job_number += 1
cmd = 'zip -jqr %s %s' % (
job["output_dir"] + "_-_nlp",
job["output_dir"]
)
zip_jobs.append(self.addTask(label="zip_job_-_%i" % (zip_job_number), command=cmd, dependencies=nlp_jobs))
def analyze_jobs(inputDir, outputDir, level=1):
jobs = []
if level > 2:
return jobs
for file in os.listdir(inputDir):
if os.path.isdir(os.path.join(inputDir, file)):
jobs += analyze_jobs(
os.path.join(inputDir, file),
os.path.join(outputDir, file),
level + 1
)
elif file.endswith(".txt"):
jobs.append({"path": os.path.join(inputDir, file), "output_dir": os.path.join(outputDir, file.rsplit(".", 1)[0])})
return jobs
def main():
args = parse_arguments()
wflow = NLPWorkflow(
analyze_jobs(args.inputDir, args.outputDir),
args.lang,
args.nCores
)
retval = wflow.run(nCores=args.nCores)
sys.exit(retval)
if __name__ == "__main__":
main()

59
vre_nlp_node/spacy_nlp Executable file
View File

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# coding=utf-8
import argparse
import os
import spacy
parser = argparse.ArgumentParser(description="Tag a .txt file with spaCy and \
save it in .vrt format")
parser.add_argument("-i",
dest="input",
help="Input file.",
required=True)
parser.add_argument("-l",
choices=["de", "en", "es", "fr", "pt"],
dest="lang",
help="Language for tagging",
required=True)
parser.add_argument("-o",
dest="output",
help="Output file.",
required=True)
args = parser.parse_args()
SPACY_MODELS = {"de": "de_core_news_sm", "en": "en_core_web_sm",
"es": "es_core_news_sm", "fr": "fr_core_news_sm",
"pt": "pt_core_news_sm"}
# Set the language model for spacy
nlp = spacy.load(SPACY_MODELS[args.lang])
# Read text from the input file
with open(args.input) as input_file:
text = input_file.read()
# Run spacy nlp over the text
doc = nlp(text)
# Create and open the output file
output_file = open(args.output, "w+")
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n<corpus>\n<text id="' + os.path.basename(args.input).rsplit(".", 1)[0] + '">\n')
for sent in doc.sents:
output_file.write('<s>\n')
for token in sent:
# Skip whitespace tokens like "\n" or "\t"
if token.text.isspace():
continue
# Write all information in .vrt style to the output file
# text, lemma, simple_pos, pos, ner
output_file.write(token.text + "\t" + token.lemma_ + "\t"
+ token.pos_ + "\t" + token.tag_ + "\t"
+ (token.ent_type_ if token.ent_type_ != "" else "NULL") + "\n")
output_file.write('</s>\n')
output_file.write('</text>\n</corpus>')
output_file.close()

49
vre_ocr_node/Dockerfile Normal file
View File

@ -0,0 +1,49 @@
FROM debian:stretch-slim
MAINTAINER Patrick Jentsch <p.jentsch@uni-bielefeld.de>
ENV LANG=C.UTF-8
RUN apt-get update && \
apt-get install -y --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg2 \
pdftk \
poppler-utils \
python2.7 \
python3 \
wget \
zip
WORKDIR /root
# Install pyFlow
ENV PYFLOW_VERSION 1.1.20
RUN wget -nv https://github.com/Illumina/pyflow/releases/download/v"$PYFLOW_VERSION"/pyflow-"$PYFLOW_VERSION".tar.gz && \
tar -xzf pyflow-"$PYFLOW_VERSION".tar.gz && \
rm pyflow-"$PYFLOW_VERSION".tar.gz && \
cd pyflow-"$PYFLOW_VERSION" && \
python2.7 setup.py build install && \
cd ..
# Install Tesseract OCR and Data Files
RUN echo "deb https://notesalexp.org/tesseract-ocr/stretch/ stretch main" >> /etc/apt/sources.list && \
wget -O - https://notesalexp.org/debian/alexp_key.asc | apt-key add - && \
apt-get update && \
apt-get install -y --no-install-recommends tesseract-ocr && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/deu.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata/raw/master/deu_frak.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/eng.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/enm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/fra.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/frm.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/por.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata && \
wget -nv https://github.com/tesseract-ocr/tessdata_best/raw/master/spa.traineddata -P /usr/share/tesseract-ocr/4.00/tessdata
RUN mkdir files_for_ocr files_from_ocr
COPY hocrtotei /usr/local/bin
COPY ocr /usr/local/bin
CMD ["/bin/bash"]

47
vre_ocr_node/README.md Normal file
View File

@ -0,0 +1,47 @@
# vre_ocr_node
Dieses Repository stellt ein Dockerfile zur Erstellung eines Dockerimages zur automatischen Zeichenerkennung zur Verfügung. Es werden PDF-Dateien entgegengenommen und PDF-Dateien, TEI konforme XML-Dateien und Textdateien ausgegeben.
## Funktionsweise
Eingabedateien durchlaufen eine Verarbeitungsroutine, die in Form einer Pipeline realisiert wurde. Diese Pipeline besteht aus drei Schritten:
1. Jede PDF-Datei aus dem Eingabeverzeichnis wird in einzelne Seiten aufgespalten.
2. Die resultierenden Dateien aus Schritt 1 werden durch eine automatische Texterkennung (OCR) weiterverarbeitet.
3. Die verarbeiteten Einzelseiten werden wieder zusammenführt.
## Dockerimage erstellen
Die GitLab Registry stellt ein automatisch erstelltes Dockerimage zur Verfügung, das stehts die neusten Änderungen beinhaltet. Das Dockerimage kann aber auch lokal erstellt werden, dazu muss folgender Befehl ins Terminal eingegeben werden.
```bash
docker build -t gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node .
```
## Nutzung
### Starten eines Dockercontainers
```bash
docker run \
--name <containername> \
-dit \
-v <datalocation>/files_for_ocr:/root/files_for_ocr \
-v <datalocation>/files_from_ocr:/root/files_from_ocr \
gitlab.ub.uni-bielefeld.de:4567/pjentsch/vre_ocr_node
```
### Daten zur Texterkennung in das Eingabeverzeichnis kopieren
```bash
cp <pdffile1> <pdffile2> ... <pdffilen> <datalocation>/files_for_ocr
```
### Texterkennung starten
```bash
docker exec -it <containername> ocr -i /root/files_for_ocr -o /root/files_from_ocr -l <languagecode>
```
Valide Angaben für `<languagecode>` sind:
* deu (Deutsch)
* deu_frak (Deutsch Fraktur)
* eng (English)
* enm (Mittelenglisch)
* fra (Französisch)
* frm (Mittelfranzösisch)
* por (Portugisisch)
* spa (Spanish)
Nach Beendigung des Vorgangs, stehen die aufbereitet Daten im Verzeichnis `<datalocation>/files_from_ocr` zur Verfügung.

44
vre_ocr_node/hocrtotei Executable file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env python3
# coding=utf-8
import xml.etree.ElementTree as ET
from xml.sax.saxutils import escape
import os
import sys
input_files = filter(lambda x: x.endswith(".hocr"), sorted(os.listdir(sys.argv[1])))
output_file = open(sys.argv[2], "w")
output_file.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
'<TEI xmlns="http://www.tei-c.org/ns/1.0" xml:id="dtabf">\n' +
' <teiHeader>\n' +
' <fileDesc>\n' +
' <titleStmt/>\n' +
' <publicationStmt/>\n' +
' <sourceDesc/>\n' +
' </fileDesc>\n' +
' <encodingDesc/>\n' +
' <profileDesc/>\n' +
' </teiHeader>\n' +
' <text>\n' +
' <body>\n')
for input_file in input_files:
tree = ET.parse(os.path.join(sys.argv[1], input_file))
output_file.write(' <pb n="%s"/>\n' % (input_file.split(".")[0].split("-")[1]))
for para in tree.findall(".//*[@class='ocr_par']"):
output_file.write(' <p>\n')
for line in para.findall(".//*[@class='ocr_line']"):
first_word_in_line = True
for word in line.findall(".//*[@class='ocrx_word']"):
if word.text is not None:
output_file.write((" " if first_word_in_line else " ") + escape(word.text.strip()))
first_word_in_line = False
if not first_word_in_line:
output_file.write('<lb/>\n')
output_file.write(' </p>\n')
output_file.write(' </body>\n' +
' </text>\n' +
'</TEI>')
output_file.close()

214
vre_ocr_node/ocr Executable file
View File

@ -0,0 +1,214 @@
#!/usr/bin/env python2
# coding=utf-8
"""
ocr
Usage: For usage instructions run with option --help
Author: Patrick Jentsch <p.jentsch@uni-bielefeld.de>
"""
import argparse
import multiprocessing
import os
import sys
from pyflow import WorkflowRunner
def parse_arguments():
parser = argparse.ArgumentParser(
"Performs OCR of documents utilizing Tesseract OCR. \
Outputs are .pdf and .txt."
)
parser.add_argument("-i",
dest="inputDir",
help="Input directory.",
required=True)
parser.add_argument("-l",
dest='lang',
help="Language for OCR",
required=True)
parser.add_argument("-o",
dest="outputDir",
help="Output directory.",
required=True)
parser.add_argument("--keep-intermediates",
action='store_true',
default=False,
dest="keepIntermediates",
help="Keep intermediate files.",
required=False)
parser.add_argument("--nCores",
default=multiprocessing.cpu_count(),
dest="nCores",
help="Total number of cores available.",
required=False,
type=int)
return parser.parse_args()
class OCRWorkflow(WorkflowRunner):
def __init__(self, jobs, keepIntermediates, lang, nCores):
self.jobs = jobs
self.keepIntermediates = keepIntermediates
self.lang = lang
self.nCores = nCores
def workflow(self):
###
# Task "mkdir_job": create output directories
# Dependencies: None
###
mkdir_jobs = []
mkdir_job_number = 0
for job in self.jobs:
mkdir_job_number += 1
cmd = 'mkdir -p "%s" "%s" "%s"' % (
job["output_dir"],
os.path.join(job["output_dir"], "tmp", "tesseract"),
os.path.join(job["output_dir"], "tmp", "tiff_files")
)
mkdir_jobs.append(self.addTask(label="mkdir_job_-_%i" % (mkdir_job_number), command=cmd))
###
# Task "split_job": split input file into one .tif file per page
# Dependencies: mkdir_jobs
###
split_jobs = []
split_job_number = 0
for job in self.jobs:
split_job_number += 1
cmd = 'pdftoppm "%s" "%s" -tiff -r 300 -tiffcompression lzw -cropbox' % (
job["path"],
os.path.join(job["output_dir"], "tmp", "tiff_files", "page")
)
split_jobs.append(self.addTask(label="split_job_-_%i" % (mkdir_job_number), command=cmd))
###
# Task "tesseract_job": perform OCR
# Dependencies: split_jobs
###
self.waitForTasks()
tesseract_jobs = []
tesseract_job_number = 0
for job in self.jobs:
# This list is empty if you don't wait for split_jobs to complete
for file in os.listdir(os.path.join(job["output_dir"], "tmp", "tiff_files")):
tesseract_job_number += 1
cmd = 'tesseract "%s" "%s" -l "%s" hocr pdf txt' % (
os.path.join(job["output_dir"], "tmp", "tiff_files", file),
os.path.join(job["output_dir"], "tmp", "tesseract", file.rsplit(".", 1)[0]),
self.lang
)
tesseract_jobs.append(self.addTask(label="tesseract_job_-_%i" % (tesseract_job_number), command=cmd, dependencies=split_jobs, nCores=min(4, self.nCores)))
###
# Task "hocr_to_teip5_job": create TEI P5 file from hocr files
# Dependencies: tesseract_jobs
###
hocr_to_tei_jobs = []
hocr_to_tei_job_number = 0
for job in self.jobs:
hocr_to_tei_job_number += 1
cmd = 'hocrtotei "%s" "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".xml")
)
hocr_to_tei_jobs.append(self.addTask(label="hocr_to_tei_job_-_%i" % (hocr_to_tei_job_number), command=cmd, dependencies=tesseract_jobs))
###
# Task "pdf_merge_job": Merge .pdf files
# Dependencies: tesseract_jobs
###
pdf_merge_jobs = []
pdf_merge_job_number = 0
for job in self.jobs:
pdf_merge_job_number += 1
cmd = 'pdftk "%s"/*.pdf cat output "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".pdf")
)
pdf_merge_jobs.append(self.addTask(label="pdf_merge_job_-_%i" % (pdf_merge_job_number), command=cmd, dependencies=tesseract_jobs))
###
# Task "txt_merge_job": Merge .txt files
# Dependencies: tesseract_jobs
###
txt_merge_jobs = []
txt_merge_job_number = 0
for job in self.jobs:
txt_merge_job_number += 1
cmd = 'cat "%s"/*.txt > "%s"' % (
os.path.join(job["output_dir"], "tmp", "tesseract"),
os.path.join(job["output_dir"], os.path.basename(job["path"]).rsplit(".", 1)[0] + ".txt")
)
txt_merge_jobs.append(self.addTask(label="txt_merge_job_-_%i" % (txt_merge_job_number), command=cmd, dependencies=tesseract_jobs))
###
# Task "cleanup_job": remove temporary files
# Dependencies: hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs
###
cleanup_jobs = []
cleanup_job_counter = 0
if not self.keepIntermediates:
for job in self.jobs:
cleanup_job_counter += 1
cmd = 'rm -r "%s"' % (
os.path.join(job["output_dir"], "tmp")
)
cleanup_jobs.append(self.addTask(label="cleanup_job_-_%i" % (cleanup_job_counter), command=cmd, dependencies=hocr_to_tei_jobs + pdf_merge_jobs + txt_merge_jobs))
###
# Task "zip_job": compress output
# Dependencies: cleanup_jobs
###
zip_jobs = []
zip_job_number = 0
for job in self.jobs:
zip_job_number += 1
cmd = 'zip -jqr %s %s' % (
job["output_dir"] + "_-_ocr",
job["output_dir"]
)
zip_jobs.append(self.addTask(label="zip_job_-_%i" % (zip_job_number), command=cmd, dependencies=cleanup_jobs))
def analyze_jobs(inputDir, outputDir, level=1):
jobs = []
if level > 2:
return jobs
for file in os.listdir(inputDir):
if os.path.isdir(os.path.join(inputDir, file)):
jobs += analyze_jobs(
os.path.join(inputDir, file),
os.path.join(outputDir, file),
level + 1
)
elif file.endswith(".pdf"):
jobs.append({"path": os.path.join(inputDir, file), "output_dir": os.path.join(outputDir, file.rsplit(".", 1)[0])})
return jobs
def main():
args = parse_arguments()
wflow = OCRWorkflow(
analyze_jobs(args.inputDir, args.outputDir),
args.keepIntermediates,
args.lang,
args.nCores
)
retval = wflow.run(nCores=args.nCores)
sys.exit(retval)
if __name__ == "__main__":
main()

5
vre_www/Dockerfile Normal file
View File

@ -0,0 +1,5 @@
FROM nginx:1.14.2
RUN mkdir -p /usr/share/nginx/html/vre
COPY website /usr/share/nginx/html/vre

View File

@ -0,0 +1,222 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Benutzerverwaltung</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<header>
<ul id="main-nav-account-dropdown" class="dropdown-content">
<li class="active">
<a href="account.html">Benutzerverwaltung</a>
</li>
<li class="divider"></li>
<li>
<a href="javascript:logout();">Abmelden</a>
</li>
</ul>
<nav class="blue-grey darken-2">
<div class="container">
<div class="nav-wrapper">
<a href="portal.html" class="brand-logo">
<i class="material-icons hide-on-small-only">cloud</i>
Opaque
</a>
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
<i class="material-icons">menu</i>
</a>
<ul id="main-nav" class="right hide-on-med-and-down">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li class="active">
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
<i class="material-icons left">account_circle</i>
Benutzer
<i class="material-icons right">arrow_drop_down</i>
</a>
</li>
</ul>
</div>
</div>
</nav>
<ul class="sidenav" id="main-nav-mobile">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li class="active">
<a href="account.html">
<i class="material-icons">account_circle</i>
Benutzer
</a>
</li>
<li>
<a href="javascript:logout();">
<i class="material-icons">account_circle</i>
Abmelden
</a>
</li>
</ul>
</header>
<main>
<div class="container">
<div class="row">
<div class="col s12 m8">
<div class="card">
<div class="card-content">
<span class="card-title">Benutzerverwaltung</span>
<p>&nbsp;</p>
<form id="user_form" class="row">
<div class="input-field col s12 m6">
<i class="material-icons prefix">account_circle</i>
<input id="user-form-first-name" type="text">
<label for="first-form-first-name">Vorname</label>
</div>
<div class="input-field col s12 m6">
<input id="user-form-last-name" type="text">
<label for="first-form-last-name">Nachname</label>
</div>
<div class="input-field col s12 m6">
<i class="material-icons prefix">email</i>
<input id="user-form-e-mail" type="text">
<label for="first-form-e-mail">E-Mail</label>
</div>
<div class="input-field col s12 m6">
<p style="height: 64px;">
<label>
<input id="user-form-notifications" type="checkbox" checked="checked">
<span>Benachrichtigungen per E-Mail erhalten</span>
</label>
</p>
</div>
<div class="input-field col s12 m6">
<i class="material-icons prefix">phone</i>
<input id="user-form-phone" type="text">
<label for="first-form-phone">Phone</label>
</div>
<div class="input-field col s12 right-align">
<a id="user-form-reset" class="waves-effect waves-light btn disabled">Verwerfen</a>
<a id="user-form-submit" class="waves-effect waves-light btn disabled">Bestätigen</a>
</div>
</form>
</div>
</div>
</div>
<div class="col s12 m4">
<div class="card">
<div class="card-content">
<span class="card-title">Texterkennung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
Textdaten werden aus Scans oder Fotos zur anschließenden
Weiterverarbeitung extrahiert.
</p>
</div>
<div class="card-action right-align">
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Linguistische Datenverarbeitung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
weiteren Informationen angereichert.
</p>
</div>
<div class="card-action right-align">
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Auftragsübersicht</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
Aufträge aufgelistet.
</p>
</div>
<div class="card-action right-align">
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
</div>
</div>
</div>
</div>
</div>
</main>
<footer class="page-footer blue-grey darken-2">
<div class="container">
<div class="row">
<div class="col s12">
<p class="grey-text text-lighten-4 left">
<a href="https://www.uni-bielefeld.de/">
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
</a>
</p>
<p class="grey-text text-lighten-4 right">
Gefördert durch die<br>
<a href="https://www.dfg.de/">
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
</a>
</p>
</div>
</div>
</div>
<div class="footer-copyright">
<div class="container">
© 2019 Copyright
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
</div>
</div>
</footer>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
<script src="js/account.js"></script>
</body>
</html>

9067
vre_www/website/css/materialize.css vendored Normal file

File diff suppressed because it is too large Load Diff

13
vre_www/website/css/materialize.min.css vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,43 @@
body {
display: flex;
min-height: 100vh;
flex-direction: column;
}
main {
flex: 1 0 auto;
}
.tabs .tab {
cursor: pointer;
}
.tabs .tab a {
color: rgba(0,0,0,0.87);
}
.tabs .tab a:hover {
background-color: #eceff1;
color: rgba(0,0,0,0.87);
}
.tabs .tab a.active, .tabs .tab a:focus.active {
background-color: #fff;
color: rgba(0,0,0,0.87);
}
.tabs .indicator {
background-color: #455a64;
}
.no-padding {
padding: 0;
}
.overflow-hidden {
overflow: hidden;
}
.overflow-visible {
overflow: visible;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Anmeldung</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<main class="valign-wrapper">
<div class="container">
<div class="row">
<div class="col card s12 m6 offset-m3">
<div class="card-content">
<span class="card-title"><i class="material-icons left">cloud</i> Opaque</span>
</div>
</div>
<div class="col card s12 m6 offset-m3">
<div class="card-content">
<span class="card-title">Anmeldung</span>
<form id="login_form" class="row" action="javascript:LoginFormSubmitHandler();">
<div class="input-field col s12">
<input id="login-form-user" type="text" class="validate" autofocus="autofocus" onfocus="this.select()">
<label for="login-form-user">Benutzername</label>
</div>
<div class="input-field col s12">
<input id="login-form-password" type="password" class="validate">
<label for="login-form-password">Passwort</label>
<span class="helper-text" data-error="Benutzername oder Passwort falsch"></span>
</div>
<div class="input-field col s12 hide">
<input id="login_form_submit" type="submit">
</div>
</form>
</div>
<div class="card-action right-align">
<a href="javascript:LoginFormSubmitHandler();" class="waves-effect waves-light btn">Anmelden</a>
</div>
</div>
</div>
</div>
</main>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
<script src="js/index.js"></script>
</body>
</html>

View File

@ -0,0 +1,197 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Auftragsverwaltung</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<header>
<ul id="main-nav-account-dropdown" class="dropdown-content">
<li>
<a href="account.html">Benutzerverwaltung</a>
</li>
<li class="divider"></li>
<li>
<a href="javascript:logout();">Abmelden</a>
</li>
</ul>
<nav class="blue-grey darken-2">
<div class="container">
<div class="nav-wrapper">
<a href="portal.html" class="brand-logo">
<i class="material-icons hide-on-small-only">cloud</i>
Opaque
</a>
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
<i class="material-icons">menu</i>
</a>
<ul id="main-nav" class="right hide-on-med-and-down">
<li class="active">
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
<i class="material-icons left">account_circle</i>
Benutzer
<i class="material-icons right">arrow_drop_down</i>
</a>
</li>
</ul>
</div>
</div>
</nav>
<ul class="sidenav" id="main-nav-mobile">
<li class="active">
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a href="account.html">
<i class="material-icons">account_circle</i>
Benutzer
</a>
</li>
<li>
<a href="javascript:logout();">
<i class="material-icons">account_circle</i>
Abmelden
</a>
</li>
</ul>
</header>
<main>
<div class="container">
<div class="row">
<div class="col s12 m8">
<div class="card">
<div class="card-content">
<span class="card-title">Auftragsübersicht</span>
<p>
Die Auftragsübersicht bietet eine Übersicht über Ihre
Aufträge. Nach erfolgreichem Abschluss können die Ergebnisse
hier heruntergeladen werden.
</p>
<p>&nbsp;</p>
<div class="card-panel no-padding z-depth-0">
<ul class="tabs">
<li class="tab col s3">
<a data-job-filter="" class="active">Alle</a>
</li>
<li class="tab col s3">
<a data-job-filter="finished">Abgeschlossen</a>
</li>
<li class="tab col s3">
<a data-job-filter="running">Laufend</a>
</li>
<li class="tab col s3">
<a data-job-filter="queued">Wartend</a>
</li>
</ul>
</div>
<ul id="jobs-collapsible" class="collapsible expandable z-depth-0"></ul>
</div>
</div>
</div>
<div class="col s12 m4">
<div class="card">
<div class="card-content">
<span class="card-title">Linguistische Datenverarbeitung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
weiteren Informationen angereichert.
</p>
</div>
<div class="card-action right-align">
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Texterkennung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
Textdaten werden aus Scans oder Fotos zur anschließenden
Weiterverarbeitung extrahiert.
</p>
</div>
<div class="card-action right-align">
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
</div>
</div>
</div>
</main>
<footer class="page-footer blue-grey darken-2">
<div class="container">
<div class="row">
<div class="col s12">
<p class="grey-text text-lighten-4 left">
<a href="https://www.uni-bielefeld.de/">
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
</a>
</p>
<p class="grey-text text-lighten-4 right">
Gefördert durch die<br>
<a href="https://www.dfg.de/">
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
</a>
</p>
</div>
</div>
</div>
<div class="footer-copyright">
<div class="container">
© 2019 Copyright
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
</div>
</div>
</footer>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
<script src="js/job_manager.js"></script>
</body>
</html>

View File

@ -0,0 +1,110 @@
function setUserCallback() {
userFormEMailElement.value = user["e-mail"];
userFormFirstNameElement.value = user["firstName"];
userFormLastNameElement.value = user["lastName"];
userFormNotificationsElement.checked = user["notifications"];
userFormPhoneElement.value = user["phone"];
M.updateTextFields();
userFormEMailElement.addEventListener("input", event => {
eMailChanged = (user["e-mail"] != userFormEMailElement.value) ? true : false;
evaluateUserForm();
});
userFormFirstNameElement.addEventListener("input", event => {
firstNameChanged = (user["firstName"] != userFormFirstNameElement.value) ? true : false;
evaluateUserForm();
});
userFormLastNameElement.addEventListener("input", event => {
lastNameChanged = (user["lastName"] != userFormLastNameElement.value) ? true : false;
evaluateUserForm();
});
userFormNotificationsElement.addEventListener("change", event => {
notificationsChanged = (user["notifications"] != userFormNotificationsElement.checked) ? true : false;
evaluateUserForm();
});
userFormPhoneElement.addEventListener("input", event => {
phoneChanged = (user["phone"] != userFormPhoneElement.value) ? true : false;
evaluateUserForm();
});
userFormResetElement.addEventListener("click", event => {
eMailChanged = false;
firstNameChanged = false;
lastNameChanged = false;
notificationsChanged = false;
phoneChanged = false;
userFormEMailElement.value = user["e-mail"];
userFormFirstNameElement.value = user["firstName"];
userFormLastNameElement.value = user["lastName"];
userFormNotificationsElement.checked = user["notifications"];
userFormPhoneElement.value = user["phone"];
evaluateUserForm();
});
userFormSubmitElement.addEventListener("click", event => {
if (eMailChanged) {
user["e-mail"] = userFormEMailElement.value;
eMailChanged = false;
}
if (firstNameChanged) {
user["firstName"] = userFormFirstNameElement.value;
firstNameChanged = false;
}
if (lastNameChanged) {
user["lastName"] = userFormLastNameElement.value;
lastNameChanged = false;
}
if (notificationsChanged) {
user["notifications"] = userFormNotificationsElement.checked;
notificationsChanged = false;
}
if (phoneChanged) {
user["phone"] = userFormPhoneElement.value;
phoneChanged = false;
}
localStorage.setItem("user", JSON.stringify(user));
M.toast({html: "Benutzerdaten wurden aktualisiert."});
evaluateUserForm();
});
}
function evaluateUserForm() {
var somethingChanged;
somethingChanged = eMailChanged || firstNameChanged || lastNameChanged || notificationsChanged || phoneChanged;
if (somethingChanged) {
userFormResetElement.classList.remove("disabled");
userFormSubmitElement.classList.remove("disabled");
} else {
userFormResetElement.classList.add("disabled");
userFormSubmitElement.classList.add("disabled");
}
}
var eMailChanged;
var firstNameChanged;
var lastNameChanged;
var notificationsChanged;
var phoneChanged;
var userFormEMailElement;
var userFormFirstNameElement;
var userFormLastNameElement;
var userFormNotificationsElement;
var userFormPhoneElement;
var userFormResetElement;
var userFormSubmitElement;
eMailChanged = false;
firstNameChanged = false;
lastNameChanged = false;
notificationsChanged = false;
phoneChanged = false;
userFormFirstNameElement = document.getElementById("user-form-first-name");
userFormLastNameElement = document.getElementById("user-form-last-name");
userFormEMailElement = document.getElementById("user-form-e-mail");
userFormNotificationsElement = document.getElementById("user-form-notifications");
userFormPhoneElement = document.getElementById("user-form-phone");
userFormResetElement = document.getElementById("user-form-reset");
userFormSubmitElement = document.getElementById("user-form-submit");

View File

@ -0,0 +1,17 @@
/**
* Process the submit event from the login form.
*/
function LoginFormSubmitHandler() {
var loginFormPasswordElement;
var loginFormUserElement;
loginFormPasswordElement = document.getElementById("login-form-password");
loginFormUserElement = document.getElementById("login-form-user");
try {
login(loginFormPasswordElement.value, loginFormUserElement.value);
} catch(e) {
loginFormPasswordElement.classList.add("invalid");
loginFormUserElement.classList.add("invalid");
}
}

View File

@ -0,0 +1,43 @@
function jobFormChangeHandler() {
if (jobFormFileElement.files.length === 0 || jobFormLanguageElement.value === "" || jobFormNameElement.value === "") {
jobFormSubmitElement.classList.add("disabled");
} else {
jobFormSubmitElement.classList.remove("disabled");
}
}
function jobFormSubmitHandler() {
createJob(jobFormFileElement.files[0], jobFormLanguageElement.value, jobFormNameElement.value, jobFormServiceElement.value, user["id"]);
}
var jobFormElement;
var jobFormFileElement;
var jobFormLanguageElement;
var jobFormNameElement;
var jobFormServiceElement;
var jobFormSubmitElement;
jobFormElement = document.getElementById("job_form");
jobFormFileElement = document.getElementById("job_form_file");
jobFormLanguageElement = document.getElementById("job_form_language");
jobFormNameElement = document.getElementById("job_form_name");
jobFormServiceElement = document.getElementById("job_form_service");
jobFormSubmitElement = document.getElementById("job_form_submit");
jobFormFileElement.addEventListener("change", jobFormChangeHandler);
jobFormLanguageElement.addEventListener("change", jobFormChangeHandler);
jobFormNameElement.addEventListener("input", jobFormChangeHandler);
M.Modal.init(
document.getElementById("create_job"),
{
"onCloseEnd": modal => {
jobFormElement.reset();
M.updateTextFields();
}
}
);

View File

@ -0,0 +1,172 @@
var JOBLANGUAGES = {
"nlp": {
"de": "Deutsch",
"en": "Englisch",
"fr": "Französisch",
"pt": "Portugisisch",
"es": "Spanisch"
},
"ocr": {
"deu": "Deutsch",
"deu_frak": "Deutsch (Fraktur)",
"eng": "Englisch",
"fra": "Französisch",
"por": "Portugisisch",
"spa": "Spanisch"
}
};
var JOBSERVICEICONS = {
"nlp": "format_textdirection_l_to_r",
"ocr": "find_in_page"
};
var JOBSTATUSCOLORS = {
"failed": "pink",
"finished": "teal",
"queued": "amber",
"running": "indigo"
};
var JOBSTATUSNAMES = {
"failed": 'Fehlgeschlagen',
"finished": 'Abgeschlossen',
"queued": 'Wartend',
"running": 'Laufend'
}
var JOBSTATUSTEXTS = {
'failed': 'Es ist ein Fehler bei der Bearbeitung des Auftrages aufgetreten. \
Für weitere Informationen wenden Sie sich mit dem \
Auftragskennzeichen an den Seitenbetreiber.',
'finished': 'Der Auftrag ist abgeschlossen, die Ergebnisse können nun \
heruntergeladen werden. Die Quelldatei bleibt verfügbar und \
kann jederzeit abgerufen werden.',
'queued': 'Der Auftrag befindet sich in der Warteschlange und wird vom \
System bearbeitet, sobald Rechenkapazität verfügbar ist.',
'running': 'Dieser Auftrag wird gerade bearbeitet, sobald der Vorgang \
abgeschlossen wurde, können die Ergebnisse hier heruntergeladen \
werden.'
}
var jobsCollapsibleElement;
var jobStatusFilter;
jobsCollapsibleElement = document.getElementById("jobs-collapsible");
jobStatusFilter = '';
M.Collapsible.init(
jobsCollapsibleElement,
{accordion: false}
);
document.querySelectorAll('[data-job-filter]').forEach(jobFilterElement => {
jobFilterElement.addEventListener("click", event => {
jobStatusFilter = event.currentTarget.getAttribute("data-job-filter");
jobListManager();
});
});
function jobListManager() {
jobs.forEach(job => {
var jobElement;
jobElement = document.getElementById(job["id"]);
// Check whether an element with id = job['id'] is found
if (jobElement) {
var jobResultElement;
var jobStatusElement;
var jobStatusTextElement;
jobResultElement = jobElement.querySelector('.job-result');
jobStatusElement = jobElement.querySelector('.job-status');
jobStatusTextElement = jobElement.querySelector('.job-status-text');
// Check whether the job status changed since it got printed
if (jobStatusElement.innerHTML != JOBSTATUSNAMES[job["status"]]) {
// Update the job status element (label and color)
jobStatusElement.innerHTML = JOBSTATUSNAMES[job["status"]];
jobStatusElement.classList.remove("pink", "teal", "amber", "indigo");
jobStatusElement.classList.add(JOBSTATUSCOLORS[job["status"]]);
// Update the job status text element
jobStatusTextElement.innerHTML = JOBSTATUSTEXTS[job["status"]];
// Check whether the new job status is 'finished'
if (job["status"] === "finished") {
// Unhide the jobs result element
jobResultElement.classList.remove("hide");
}
}
// If the job status doesn't match the current filter, remove it
//if (jobStatusFilter != "" && job["status"] != jobStatusFilter) jobElement.remove();
// If the job status doesn't match the current filter, hide it
if (jobStatusFilter != "" && job["status"] != jobStatusFilter) {
jobElement.classList.add('hide');
} else {
jobElement.classList.remove('hide');
}
} else {
// If the job status doesn't match the current filter, skip it
//if (jobStatusFilter != "" && job["status"] != jobStatusFilter) return;
jobElement = document.createElement("li");
jobElement.setAttribute("id", job["id"]);
// If the job status doesn't match the current filter, skip it
if (jobStatusFilter != "" && job["status"] != jobStatusFilter) {
jobElement.classList.add('hide');
} else {
jobElement.classList.remove('hide');
}
jobElement.innerHTML =
'<div class="collapsible-header">'
+ '<i class="material-icons dropdown-indicator">arrow_drop_down</i>'
+ '<i class="material-icons">'
+ JOBSERVICEICONS[job["service"]]
+ '</i>'
+ job["name"]
+ '<span class="job-status new badge ' + JOBSTATUSCOLORS[job["status"]] + '" data-badge-caption="">'
+ JOBSTATUSNAMES[job["status"]]
+ '</span>'
+ '</div>'
+ '<div class="collapsible-body">'
+ '<p class="job-status-text">' + JOBSTATUSTEXTS[job["status"]] + '</p>'
+ '<p>&nbsp;</p>'
+ '<p class="overflow-hidden">'
+ '<i class="material-icons blue-grey-text text-darken-2 left">bookmark</i>'
+ 'Auftragskennzeichen: '
+ job["id"]
+ '</p>'
+ '<p class="overflow-hidden">'
+ '<i class="material-icons blue-grey-text text-darken-2 left">insert_drive_file</i>'
+ 'Datei: '
+ '<a href="../vre_files/jobs/' + job["id"] + '/' + job["file"] + '">'
+ job["file"]
+ '</a>'
+ '</p>'
+ '<p class="overflow-hidden">'
+ '<i class="material-icons blue-grey-text text-darken-2 left">language</i>'
+ 'Sprache: '
+ JOBLANGUAGES[job["service"]][job["language"]]
+ '</p>'
+ '<p class="job-result' + (job['status'] != 'finished' ? ' hide' : '') + ' overflow-hidden">'
+ '<i class="material-icons blue-grey-text text-darken-2 left">archive</i>'
+ 'Ergebnisse: '
+ '<a href="../vre_files/jobs/' + job["id"] + '/' + job["file"].split(".").slice(0, -1).join(".") + '_-_' + job["service"] + '.zip">'
+ 'Download'
+ '</a>'
+ '</p>'
+ '</div>';
jobElement.querySelector('.collapsible-header').addEventListener("click", event => {
var dropdownIndicatorElement;
dropdownIndicatorElement = event.currentTarget.querySelector('.dropdown-indicator');
if (dropdownIndicatorElement.innerHTML === "arrow_drop_down") {
dropdownIndicatorElement.innerHTML = "arrow_drop_up";
} else {
dropdownIndicatorElement.innerHTML = "arrow_drop_down";
}
});
jobsCollapsibleElement.appendChild(jobElement);
}
});
}
setJobsCallback = jobListManager;

12374
vre_www/website/js/materialize.js vendored Normal file

File diff suppressed because it is too large Load Diff

6
vre_www/website/js/materialize.min.js vendored Normal file

File diff suppressed because one or more lines are too long

154
vre_www/website/js/vre.js Normal file
View File

@ -0,0 +1,154 @@
var JOBSPOLLINTERVALL = 1000;
var USERS = {
"testbenutzer": {
"e-mail": "t.benutzer@uni-bielefeld.de",
"firstName": "Test",
"id": "testbenutzer",
"lastName": "Benutzer",
"notifications": true,
"password": "passwort",
"phone": "+49 521 106-XXXXX"
}
};
// The job list of the current user
var jobs;
// The current user
var user;
/**
* Login a user and redirect the visitor to the portal page.
* @param {string} password - The password to use for login.
* @param {string} user - The user to use for login.
*/
function login(password, user) {
// Check if the user exists
if (USERS[user]) {
// Check if the password is correct
if (USERS[user]["password"] === password) {
// Save the user data to the local storage
localStorage.setItem("user", JSON.stringify(USERS[user]));
// Redirect the visitor to the portal page
window.location = "/vre/portal.html";
return;
}
}
// The function only ends here, when the user doesn't exists or when the
// password was wrong, in both cases it should throw an exception
throw "User doesn't exist or password was wrong!";
}
/**
* Logout the current user and redirect the visitor to the login page.
*/
function logout() {
// delete all data from the local storage
localStorage.clear();
// redirect to the login page
window.location = "/vre/";
}
/**
* The main function, which is executed on each page after it's completly
* loaded.
*/
function main() {
M.Dropdown.init(
document.getElementById("main-nav-account"),
{"constrainWidth": false, "coverTrigger": false}
);
// Indicates whether the current page is the login page
var isLoginPage;
// Indicates whether the current page is a service page (like ocr or nlp)
var isServicePage;
isLoginPage = window.location.pathname === "/vre/" || window.location.pathname === "/vre/index.html";
isServicePage = window.location.pathname === "/vre/nlp.html" || window.location.pathname === "/vre/ocr.html";
// Check if the visitor is logged in, by checking if the local storage
// contains a user
if (localStorage.getItem("user")) {
// Redirect to the portal page if the current page is the login page
if (isLoginPage) window.location = "portal.html";
// Load user from the local storage
user = JSON.parse(localStorage.getItem("user"));
// If a function named setUserCallback exists, call it
if (typeof(setUserCallback) === "function") setUserCallback();
} else {
// Redirect to the login page if the current page isn't the login page
if (!isLoginPage) window.location = "index.html";
return;
}
// Check if the local storage contains a copy of the users job list
if (localStorage.getItem("jobs")) {
// Load the job list from the local storage
jobs = JSON.parse(localStorage.getItem("jobs"));
// If a function named setJobsCallback exists, call it
if (typeof(setJobsCallback) === "function") setJobsCallback();
} else {
// Load the job list from the vre server
getJobs({"user": user["id"]}, function(newJobs) {
jobs = newJobs;
// If a function named setJobsCallback exists, call it
if (typeof(setJobsCallback) === "function") setJobsCallback();
});
}
// Set a continous poll for the job list
setInterval(getJobs, JOBSPOLLINTERVALL, {"user": user["id"]}, updateJobs);
}
/**
* Updates the job list in the local storage and the contents of the global
* job list variable. It also informs the visitor about job status changes.
* @param {Object[]} newJobs - The new job list.
*/
function updateJobs(newJobs) {
var i;
// Indicates whether the new job list differs from the old one
var hasJobsChanged;
// The new job list as JSON string
var newJobsAsJSONString;
newJobsAsJSONString = JSON.stringify(newJobs);
hasJobsChanged = localStorage.getItem("jobs") != newJobsAsJSONString;
if (hasJobsChanged) {
// Iterate over the old job list
for (i = 0; i < jobs.length; i++) {
// Check whether the status of the currently iterated job changed
if (jobs[i]["status"] != newJobs[i]["status"]) {
// Inform the visitor with a toast message about the new status
switch (newJobs[i]["status"]) {
case "failed":
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' is fehlgeschlagen."});
break;
case "finished":
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' wurde abgeschlossen."});
break;
case "running":
M.toast({html: "Auftrag '" + jobs[i]["name"] + "' wird bearbeitet."});
break;
default:
}
}
}
// Save the new job list to the local storage
localStorage.setItem("jobs", newJobsAsJSONString);
// Replace the global job list variable with the new job list
jobs = newJobs;
// If a function named setJobsCallback exists, call it
if (typeof(setJobsCallback) === "function") setJobsCallback();
}
}
window.onload = main;

View File

@ -0,0 +1,180 @@
var VREMANAGER = "http://" + window.location.hostname + ":5000";
/**
* Sends an asynchronous HTTP request to the vre manager in order to create a
* new job.
* @param {File} file - The file to process with the job.
* @param {string} language - The language of the file contents.
* @param {string} name - A name for the job, this doesn't have to be unique.
* @param {string} service - The service this job belongs to.
* @param {string} user - The user this job belongs to.
* @param {function} [callback] - A function, that is called after the HTTP
* request finished.
*/
function createJob(file, language, name, service, user, callback = console.log) {
var formData;
var oReq;
var query;
formData = new FormData();
formData.append("file", file);
oReq = new XMLHttpRequest();
oReq.addEventListener("loadend", function(oEvent) {
if (this.status === 201) {
M.toast({html: "Auftrag '" + name + "' wurde erstellt."});
callback(JSON.parse(this.responseText));
} else {
if (this.status === 415) {
M.toast({html: "Auftrag '" + name + "' konnte nicht erstellt werden.<br>(Unerwarteter Dateityp)"});
} else {
M.toast({html: "Auftrag '" + name + "' konnte nicht erstellt werden.<br>(Fehlercode: " + this.status + ")"});
}
}
});
query = "?language=" + language + "&name=" + name + "&service=" + service + "&user=" + user;
oReq.open("POST", VREMANAGER + "/vre/jobs" + query);
oReq.send(formData);
}
/**
* Sends an asynchronous HTTP request to the vre manager in order to delete an
* existing job.
* @param {string} id - The id of the job to be deleted.
* @param {function} [callback] - A function, that is called after the HTTP
* request finished.
*/
function deleteJob(id, callback = console.log) {
var oReq;
oReq = new XMLHttpRequest();
oReq.addEventListener("loadend", function(oEvent) {
if (this.status === 204) {
M.toast({html: "Auftrag '" + id + "' wurde gelöscht."});
callback();
} else {
M.toast({html: "Auftrag '" + id + "' konnte nicht gelöscht werden.<br>(Fehlercode: " + this.status + ")"});
}
});
oReq.open("DELETE", VREMANAGER + "/vre/jobs/" + id);
oReq.send();
}
/**
* Sends an asynchronous HTTP request to the vre manager in order to get all
* informations about a specific and existing job.
* @param {string} id - The id of the job to get the information from.
* @param {function} [callback] - A function, that is called after the HTTP
* request finished.
*/
function getJob(id, callback = console.log) {
var oReq;
oReq = new XMLHttpRequest();
oReq.addEventListener("loadend", function(oEvent) {
if (this.status === 200) {
callback(JSON.parse(this.responseText));
}
});
oReq.open("GET", VREMANAGER + "/vre/jobs/" + id);
oReq.send();
}
/**
* Sends an asynchronous HTTP request to the vre manager in order to get a
* (filtered) list of all jobs.
* @param {object} [filters=null] - An object containing filters. E.g.
* {'service': 'nlp', 'status': 'finished'}
* @param {function} [callback] - A function, that is called after the HTTP
* request finished.
*/
function getJobs(filters = null, callback = console.log) {
var oReq;
var query;
oReq = new XMLHttpRequest();
oReq.addEventListener("loadend", function(oEvent) {
if (this.status === 200) {
callback(JSON.parse(this.responseText));
}
});
query = ""
if (filters) {
if (filters["name"]) {
query += (query === "") ? "?" : "&";
query += "name=" + filters["name"];
}
if (filters["service"]) {
query += (query === "") ? "?" : "&";
query += "service=" + filters["service"];
}
if (filters["status"]) {
query += (query === "") ? "?" : "&";
query += "status=" + filters["status"];
}
if (filters["user"]) {
query += (query === "") ? "?" : "&";
query += "user=" + filters["user"];
}
}
oReq.open("GET", VREMANAGER + "/vre/jobs" + query);
oReq.send();
}
/**
* Sends an asynchronous HTTP request to the vre manager in order to update the
* values of a specific and existing job.
* @param {string} id - The id of the job to update.
* @param {object} [newValues=null] - An object containing the new values. E.g.
* {'status': 'finished'}
* @param {function} [callback] - A function, that is called after the HTTP
* request finished.
*/
function updateJob(id, newValues = null, callback = console.log) {
var oReq;
var query;
oReq = new XMLHttpRequest();
oReq.addEventListener("loadend", function(oEvent) {
if (this.status === 200) {
M.toast({html: "Auftrag '" + id + "' wurde aktualisiert."});
callback(JSON.parse(this.responseText));
} else {
M.toast({html: "Auftrag '" + id + "' konnte nicht aktualisiert werden.<br>(Fehlercode: " + this.status + ")"});
}
});
query = ""
if (newValues) {
if (newValues["name"]) {
query += (query === "") ? "?" : "&";
query += "name=" + newValues["name"];
}
if (newValues["language"]) {
query += (query === "") ? "?" : "&";
query += "language=" + newValues["language"];
}
if (newValues["report"]) {
query += (query === "") ? "?" : "&";
query += "report=" + newValues["report"];
}
if (newValues["status"]) {
query += (query === "") ? "?" : "&";
query += "status=" + newValues["status"];
}
}
oReq.open("PUT", VREMANAGER + "/vre/jobs/" + id + query);
oReq.send();
}

295
vre_www/website/nlp.html Normal file
View File

@ -0,0 +1,295 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Linguistische Datenverarbeitung</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<header>
<ul id="main-nav-account-dropdown" class="dropdown-content">
<li>
<a href="account.html">Benutzerverwaltung</a>
</li>
<li class="divider"></li>
<li>
<a href="javascript:logout();">Abmelden</a>
</li>
</ul>
<nav class="blue-grey darken-2">
<div class="container">
<div class="nav-wrapper">
<a href="portal.html" class="brand-logo">
<i class="material-icons hide-on-small-only">cloud</i>
Opaque
</a>
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
<i class="material-icons">menu</i>
</a>
<ul id="main-nav" class="right hide-on-med-and-down">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li class="active">
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
<i class="material-icons left">account_circle</i>
Benutzer
<i class="material-icons right">arrow_drop_down</i>
</a>
</li>
</ul>
</div>
</div>
</nav>
<ul class="sidenav" id="main-nav-mobile">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li class="active">
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a href="account.html">
<i class="material-icons">account_circle</i>
Benutzer
</a>
</li>
<li>
<a href="javascript:logout();">
<i class="material-icons">account_circle</i>
Abmelden
</a>
</li>
</ul>
</header>
<main>
<div class="container">
<div class="row">
<div class="col s12 m8">
<div class="card">
<div class="card-content">
<span class="card-title">Linguistische Datenverarbeitung</span>
<p>
Mit Hilfe computergestützter linguistischer
Datenverarbeitungsmethoden können Textdateien mit weiteren
Informationen angereichert werden. Auf dieser Plattform werden
derzeit die im folgenden aufgeführten
Textverarbeitungsroutinen in automatisierter Form zur
Verfügung gestellt.
</p>
<div class="row">
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Tokenisierung
</span>
<p>
Aufteilung eines Textes in Sätze und Wörter. Dies
ist zur weiteren Verarbeitung notwendig.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Lemmatisierung
</span>
<p>
Reduktion der Flexionsformen eines Wortes auf dessen
Grundform.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Part-of-speech-Tagging
</span>
<p>
Kontext- und definitionsbezogene Zuordnung von Wörtern
und Satzzeichen zu Wortarten.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Eigennamenerkennung
</span>
<p>
Identifikation von Wörtern, die eine Entität
beschreiben, wie Firmen- und Personennamen.
</p>
</div>
</div>
</div>
</div>
<p>
Die Ausgabe erfolgt in Form einer <i>verticalized text</i>
Datei (Dateiendung: .vrt). In diesem Format kann der Text
einfach in Auswertungsprogramme, wie zum Beispiel der <a href="http://cwb.sourceforge.net/">IMS Open Corpus Workbench</a>,
eingefügt werden.
</p>
</div>
</div>
</div>
<div class="col s12 m4">
<div class="card">
<div class="card-content">
<span class="card-title">Auftrag erstellen</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>
Auftragsformular zur linguistische Datenverarbeitung von
Textdateien.
</p>
</div>
<div class="card-action right-align">
<a href="#create_job" class="waves-effect waves-light btn modal-trigger">Erstellen</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Auftragsübersicht</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
Hier wird der aktuelle Status sämtlicher vom Benutzer
erstellten Aufträge aufgelistet.
</p>
</div>
<div class="card-action right-align">
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
</div>
</div>
</div>
</div>
</div>
<div id="create_job" class="modal overflow-visible">
<div class="modal-content">
<h4 style="line-height: 60px;"><i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>Linguistische Datenverarbeitung</h4>
<p>
Um den Auftrag abschicken zu können, müssen alle Formularfelder
ausgefüllt werden. Anschließend erhalten Sie fortlaufende
Benachrichtigungen über den aktuellen Bearbeitungsstand des
Auftrags. Nach erfolgreicher Bearbeitung, stehen die Ergebnisse in
der <a href="job_manager.html">Auftragsverwaltung</a> zum Download
zur Verfügung.
</p>
<form id="job_form" class="row">
<div class="input-field col s12">
<input placeholder="Mein Auftragsname" id="job_form_name" type="text" class="validate">
<label for="job_form_name">Auftragsname</label>
</div>
<div class="file-field input-field col s12 m6">
<div class="btn">
<span>Datei</span>
<input id="job_form_file" type="file" accept="text/plain">
</div>
<div class="file-path-wrapper">
<input class="file-path validate" type="text">
</div>
</div>
<div class="input-field col s12 m6">
<select id="job_form_language">
<option value="" disabled selected>Option auswählen</option>
<option value="de">Deutsch</option>
<option value="en">Englisch</option>
<option value="fr">Französisch</option>
<option value="pt">Portugisisch</option>
<option value="es">Spanisch</option>
</select>
<label>Sprache der Eingabedatei</label>
</div>
<div class="input-field col s12 m6 hide">
<select id="job_form_service">
<option value="nlp" selected></option>
</select>
</div>
</form>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-green btn-flat">Abbrechen</a>
<a href="javascript:jobFormSubmitHandler();" id="job_form_submit" class="modal-close waves-effect waves-green btn disabled">Abschicken</a>
</div>
</div>
</main>
<footer class="page-footer blue-grey darken-2">
<div class="container">
<div class="row">
<div class="col s12">
<p class="grey-text text-lighten-4 left">
<a href="https://www.uni-bielefeld.de/">
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
</a>
</p>
<p class="grey-text text-lighten-4 right">
Gefördert durch die<br>
<a href="https://www.dfg.de/">
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
</a>
</p>
</div>
</div>
</div>
<div class="footer-copyright">
<div class="container">
© 2019 Copyright
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
</div>
</div>
</footer>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
<script src="js/job_form_handler.js"></script>
</body>
</html>

293
vre_www/website/ocr.html Normal file
View File

@ -0,0 +1,293 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Texterkennung</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<header>
<ul id="main-nav-account-dropdown" class="dropdown-content">
<li>
<a href="account.html">Benutzerverwaltung</a>
</li>
<li class="divider"></li>
<li>
<a href="javascript:logout();">Abmelden</a>
</li>
</ul>
<nav class="blue-grey darken-2">
<div class="container">
<div class="nav-wrapper">
<a href="portal.html" class="brand-logo">
<i class="material-icons hide-on-small-only">cloud</i>
Opaque
</a>
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
<i class="material-icons">menu</i>
</a>
<ul id="main-nav" class="right hide-on-med-and-down">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li class="active">
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
<i class="material-icons left">account_circle</i>
Benutzer
<i class="material-icons right">arrow_drop_down</i>
</a>
</li>
</ul>
</div>
</div>
</nav>
<ul class="sidenav" id="main-nav-mobile">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li class="active">
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a href="account.html">
<i class="material-icons">account_circle</i>
Benutzer
</a>
</li>
<li>
<a href="javascript:logout();">
<i class="material-icons">account_circle</i>
Abmelden
</a>
</li>
</ul>
</header>
<main>
<div class="container">
<div class="row">
<div class="col s12 m8">
<div class="card">
<div class="card-content">
<span class="card-title">Texterkennung</span>
<p>
Der hier zur Verfügung gestellte Dienst basiert auf Verfahren der
optischen Zeichenerkennung. Dabei werden durch optische Analysemethoden
und einen anschließenden Abgleich mit Wörterbüchern aus Bilddaten,
wie Fotos oder Scans, Textdaten erzeugt. Erst dieser Vorverarbeitungsschritt
ermöglicht eine computergestützte Verarbeitung von Dokumenten.
</p>
<div class="row">
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Eingabe von Bilddaten
</span>
<p>
Über ein Auftragsformular können Bilddaten in Form von
PDF-Dateien hochgeladen werden.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Optische Zeichenerkennung
</span>
<p>
Die optische Zeichenerkennung erfolgt in der
Recheninfrastruktur der Plattform.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Fehlerkorrektur
</span>
<p>
Je nach Qualität der Eingabedaten kann es zu
Fehlern kommen, die korrigiert werden sollten.
</p>
</div>
</div>
</div>
<div class="col s12 m6">
<div class="card z-depth-0">
<div class="card-content">
<span class="card-title">
<i class="material-icons blue-grey-text text-darken-2 left">layers</i>
Weiterverarbeitung
</span>
<p>
Die Textdaten können weiterverarbeitet<a class="tooltipped" data-position="top" data-tooltip="Zum Beispiel durch die hier angebotene linguistische Datenverarbeitung."><sup>[*]</sup></a>
oder in dieser Form bereits genutzt<a class="tooltipped" data-position="top" data-tooltip="Zum Beispiel mit dem Programm &quot;AntConc&quot;."><sup>[*]</sup></a> werden.
</p>
</div>
</div>
</div>
</div>
<p>
Die Ausgabe dieses Dienstes erfolgt in Form von Textdateien,
PDF Dateien mit hinterlegtem Text und <a href="https://tei-c.org/guidelines/p5/" target="_blank">TEI P5</a>
konformen XML Dateien.
</p>
</div>
</div>
</div>
<div class="col s12 m4">
<div class="card">
<div class="card-content">
<span class="card-title">Auftrag erstellen</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>
Auftragsformular zur Texterkennung von PDF-Dateien.
</p>
</div>
<div class="card-action right-align">
<a href="#create_job" class="waves-effect waves-light btn modal-trigger">Erstellen</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Auftragsübersicht</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
Aufträge aufgelistet.
</p>
</div>
<div class="card-action right-align">
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
</div>
</div>
</div>
</div>
</div>
<div id="create_job" class="modal overflow-visible">
<div class="modal-content">
<h4 style="line-height: 60px;"><i class="material-icons medium blue-grey-text text-darken-2 left">playlist_add</i>Texterkennung</h4>
<p>
Um den Auftrag erstellen zu können, müssen alle Formularfelder
ausgefüllt werden. Nachdem der Auftrag abgeschickt wurde, erhalten
Sie fortlaufende Benachrichtigungen über den aktuellen Bearbeitungsstand.
Nach erfolgreicher Bearbeitung stehen die Ergebnisse in
der <a href="job_manager.html">Auftragsübersicht</a> zum Download
zur Verfügung.
</p>
<form id="job_form" class="row">
<div class="input-field col s12">
<input placeholder="Mein Auftragsname" id="job_form_name" type="text" class="validate">
<label for="job_form_name">Auftragsname</label>
</div>
<div class="file-field input-field col s12 m6">
<div class="btn">
<span>Datei</span>
<input id="job_form_file" type="file" accept="application/pdf">
</div>
<div class="file-path-wrapper">
<input class="file-path validate" type="text">
</div>
</div>
<div class="input-field col s12 m6">
<select id="job_form_language">
<option value="" disabled selected>Option auswählen</option>
<option value="deu">Deutsch</option>
<option value="deu_frak">Deutsch (Fraktur)</option>
<option value="eng">Englisch</option>
<option value="fra">Französisch</option>
<option value="por">Portugisisch</option>
<option value="spa">Spanisch</option>
</select>
<label>Sprache der Eingabedatei</label>
</div>
<div class="input-field col s12 m6 hide">
<select id="job_form_service">
<option value="ocr" selected></option>
</select>
</div>
</form>
</div>
<div class="modal-footer">
<a href="#!" class="modal-close waves-effect waves-green btn-flat">Abbrechen</a>
<a href="javascript:jobFormSubmitHandler();" id="job_form_submit" class="modal-close waves-effect waves-green btn disabled">Abschicken</a>
</div>
</div>
</main>
<footer class="page-footer blue-grey darken-2">
<div class="container">
<div class="row">
<div class="col s12">
<p class="grey-text text-lighten-4 left">
<a href="https://www.uni-bielefeld.de/">
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
</a>
</p>
<p class="grey-text text-lighten-4 right">
Gefördert durch die<br>
<a href="https://www.dfg.de/">
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
</a>
</p>
</div>
</div>
</div>
<div class="footer-copyright">
<div class="container">
© 2019 Copyright
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
</div>
</div>
</footer>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
<script src="js/job_form_handler.js"></script>
</body>
</html>

233
vre_www/website/portal.html Normal file
View File

@ -0,0 +1,233 @@
<!DOCTYPE html>
<html lang="de">
<head>
<meta charset="UTF-8">
<title>Opaque | Portal</title>
<link href="/vre/images/sfb_1288.png" rel="icon" type="image/png">
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet">
<link rel="stylesheet" href="css/materialize.min.css">
<link rel="stylesheet" href="css/style.css">
<script src="js/vre_manager_rest.js"></script>
<script src="js/vre.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body class="blue-grey lighten-5">
<header>
<ul id="main-nav-account-dropdown" class="dropdown-content">
<li>
<a href="account.html">Benutzerverwaltung</a>
</li>
<li class="divider"></li>
<li>
<a href="javascript:logout();">Abmelden</a>
</li>
</ul>
<nav class="blue-grey darken-2">
<div class="container">
<div class="nav-wrapper">
<a href="portal.html" class="brand-logo">
<i class="material-icons hide-on-small-only">cloud</i>
Opaque
</a>
<a href="#" data-target="main-nav-mobile" class="sidenav-trigger">
<i class="material-icons">menu</i>
</a>
<ul id="main-nav" class="right hide-on-med-and-down">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a id="main-nav-account" class="dropdown-trigger" href="#!" data-target="main-nav-account-dropdown">
<i class="material-icons left">account_circle</i>
Benutzer
<i class="material-icons right">arrow_drop_down</i>
</a>
</li>
</ul>
</div>
</div>
</nav>
<ul class="sidenav" id="main-nav-mobile">
<li>
<a href="job_manager.html">
<i class="material-icons left">work</i>
Auftragsübersicht
</a>
</li>
<li>
<a href="nlp.html">
<i class="material-icons left">format_textdirection_l_to_r</i>
Linguistische Datenverarbeitung
</a>
</li>
<li>
<a href="ocr.html">
<i class="material-icons left">find_in_page</i>
Texterkennung
</a>
</li>
<li>
<a href="account.html">
<i class="material-icons">account_circle</i>
Benutzer
</a>
</li>
<li>
<a href="javascript:logout();">
<i class="material-icons">account_circle</i>
Abmelden
</a>
</li>
</ul>
</header>
<main>
<div class="container">
<div class="row">
<div class="col s12 m8">
<div class="card">
<div class="card-content">
<span class="card-title">Portal</span>
<p class="overflow-hidden">
<i class="material-icons blue-grey-text text-darken-2 left" style="font-size: 135px;">cloud</i>
Opaque ist eine freie Plattform zur Unterstützung
textverarbeitender Prozesse. Sie soll es ermöglichen,
komplexe Programme zur einfachen Nutzung bereitzustellen und
bereits etablierte Textverarbeitungsroutinen ohne die Hürde langer
Einarbeitungszeiten zugänglich zu machen. Alle Berechnungen
werden dabei von den vom SFB 1288 zur Verfügung gestellten
Servern ausgeführt. So können Aufträge bearbeitet werden, ohne
dass der Computer des Benutzers eingeschaltet sein muss.
</p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<div class="divider"></div>
<p>&nbsp;</p>
<p>&nbsp;</p>
<p class="overflow-hidden">
<i class="material-icons blue-grey-text text-darken-2 left" style="font-size: 135px;">lock_open</i>
Alle hier zur Verfügung gestellten Dienste wurden durch den
Einsatz freier Software realisiert. Um den Ansprüchen modernen
Forschungsdatenmanagements zu entsprechen, muss die
Nachnutzbarkeit von Ergebnissen sichergestellt werden. Aus diesem Grund
wird bewusst auf proprietäre Dateiformate verzichtet. Stattdessen werden nur
standardisierte und offene Formate genutzt, die eine
einfache Verbreitung und Weiterverarbeitung ermöglichen.
</p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<div class="divider"></div>
<p>&nbsp;</p>
<p>&nbsp;</p>
<p class="overflow-hidden">
<img src="images/sfb_1288.png" alt="Logo: SFB 1288" class="left" style="width: 135px; height: 135px; margin-right: 15px;">
Diese Seite wurde vom Teilprojekt INF des Sonderforschungsbereich 1288
vor dem Hintergrund der gesammelten Erfahrungen entwickelt, die in einem
Pilotprojekt zur digitalen Verarbeitung von Textdaten im SFB gemacht wurden.<br>
Für Fragen und Anregungen steht das TP INF gerne via E-Mail zur Verfügung:
<br><a href="mailto:mailto:inf_sfb1288@lists.uni-bielefeld.de">inf_sfb1288@lists.uni-bielefeld.de</a>
</p>
<p>&nbsp;</p>
</div>
</div>
</div>
<div class="col s12 m4">
<div class="card">
<div class="card-content">
<span class="card-title">Linguistische Datenverarbeitung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">format_textdirection_l_to_r</i>
Mit Hilfe datenverarbeitender Methoden werden Textdaten mit
weiteren Informationen angereichert.
</p>
</div>
<div class="card-action right-align">
<a href="nlp.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Texterkennung</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">find_in_page</i>
Textdaten werden aus Scans oder Fotos zur anschließenden
Weiterverarbeitung extrahiert.
</p>
</div>
<div class="card-action right-align">
<a href="ocr.html" class="waves-effect waves-light btn">Zum Dienst</a>
</div>
</div>
<div class="card">
<div class="card-content">
<span class="card-title">Auftragsübersicht</span>
<p>
<i class="material-icons medium blue-grey-text text-darken-2 left">work</i>
Hier wird der aktuelle Status sämtlicher vom Benutzer erstellten
Aufträge aufgelistet.
</p>
</div>
<div class="card-action right-align">
<a href="job_manager.html" class="waves-effect waves-light btn">Öffnen</a>
</div>
</div>
</div>
</div>
</div>
</main>
<footer class="page-footer blue-grey darken-2">
<div class="container">
<div class="row">
<div class="col s12">
<p class="grey-text text-lighten-4 left">
<a href="https://www.uni-bielefeld.de/">
<img src="images/universitaet_bielefeld.png" alt="Logo: Universität Bielefeld" style="height: 72.5px">
</a>
</p>
<p class="grey-text text-lighten-4 right">
Gefördert durch die<br>
<a href="https://www.dfg.de/">
<img src="images/dfg.png" alt="Logo: Deutsche Forschungsgemeinschaft" style="height: 50px;">
</a>
</p>
</div>
</div>
</div>
<div class="footer-copyright">
<div class="container">
© 2019 Copyright
<a class="grey-text text-lighten-4" href="https://www.uni-bielefeld.de/sfb1288">Universität Bielefeld | SFB 1288</a>
<a class="grey-text text-lighten-4 right" href="impressum.html">Impressum</a>
</div>
</div>
</footer>
<script src="js/materialize.min.js"></script>
<script>
M.AutoInit();
</script>
</body>
</html>