2020-10-08 23:17:58 +02:00
|
|
|
FROM debian:buster-slim
|
2019-02-06 16:58:17 +01:00
|
|
|
|
2019-09-11 13:20:07 +02:00
|
|
|
|
2021-02-25 11:26:11 +01:00
|
|
|
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <porada@posteo.de>"
|
2019-02-06 16:58:17 +01:00
|
|
|
|
2019-09-11 13:20:07 +02:00
|
|
|
|
2019-09-12 11:42:42 +02:00
|
|
|
ENV LANG=C.UTF-8
|
|
|
|
|
|
|
|
|
2021-03-26 09:46:17 +01:00
|
|
|
RUN apt-get update \
|
|
|
|
&& apt-get install --no-install-recommends --yes \
|
|
|
|
wget
|
2021-02-25 11:26:11 +01:00
|
|
|
|
2021-03-26 09:46:17 +01:00
|
|
|
# Install the NLP pipeline and it's dependencies #
|
2020-09-23 15:26:53 +02:00
|
|
|
## Install pyFlow ##
|
2021-03-26 09:46:17 +01:00
|
|
|
ENV PYFLOW_VERSION=1.1.20
|
|
|
|
RUN wget --no-check-certificate --quiet \
|
|
|
|
"https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" \
|
|
|
|
&& tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
|
|
|
|
&& cd "pyflow-${PYFLOW_VERSION}" \
|
2020-10-08 23:08:49 +02:00
|
|
|
&& apt-get install --no-install-recommends --yes \
|
2020-09-23 15:26:53 +02:00
|
|
|
python2.7 \
|
2019-09-11 13:20:07 +02:00
|
|
|
&& python2.7 setup.py build install \
|
|
|
|
&& cd .. \
|
2021-03-26 09:46:17 +01:00
|
|
|
&& rm -r "pyflow-${PYFLOW_VERSION}" "pyflow-${PYFLOW_VERSION}.tar.gz"
|
2020-04-06 09:21:38 +02:00
|
|
|
|
|
|
|
|
2020-10-08 23:08:49 +02:00
|
|
|
## Install spaCy ##
|
2021-03-26 09:46:17 +01:00
|
|
|
ENV SPACY_VERSION=3.0.5
|
2021-02-25 11:26:11 +01:00
|
|
|
RUN apt-get install --no-install-recommends --yes \
|
2020-09-23 15:26:53 +02:00
|
|
|
python3.7 \
|
|
|
|
python3-pip \
|
|
|
|
&& pip3 install \
|
|
|
|
chardet \
|
|
|
|
setuptools \
|
|
|
|
wheel \
|
2021-02-25 11:26:11 +01:00
|
|
|
&& pip3 install --upgrade pip \
|
|
|
|
&& pip3 install "spacy==${SPACY_VERSION}"
|
|
|
|
|
2021-03-26 09:46:17 +01:00
|
|
|
|
2021-07-22 16:59:29 +02:00
|
|
|
# Only models that include the following components are compatibel:
|
2021-03-26 09:46:17 +01:00
|
|
|
# lemmatizer, ner, parser, senter, tagger,
|
|
|
|
ENV SPACY_MODELS="de_core_news_md,en_core_web_md,it_core_news_md,nl_core_news_md,pl_core_news_md,zh_core_web_md"
|
2021-02-25 11:26:11 +01:00
|
|
|
ENV SPACY_MODELS_VERSION=3.0.0
|
2021-03-26 09:46:17 +01:00
|
|
|
RUN for spacy_model in $(echo ${SPACY_MODELS} | tr "," "\n"); do python3 -m spacy download "${spacy_model}-${SPACY_MODELS_VERSION}" --direct; done
|
|
|
|
|
|
|
|
|
|
|
|
## Further dependencies ##
|
|
|
|
RUN apt-get install --no-install-recommends --yes \
|
|
|
|
procps \
|
|
|
|
zip
|
2019-02-06 16:58:17 +01:00
|
|
|
|
2019-09-11 13:34:01 +02:00
|
|
|
|
2021-07-22 16:59:29 +02:00
|
|
|
COPY packages .
|
|
|
|
RUN cd stand-off-data-py \
|
|
|
|
&& python3 setup.py build \
|
|
|
|
&& python3 setup.py install \
|
|
|
|
&& cd -
|
|
|
|
|
|
|
|
|
2020-10-08 23:08:49 +02:00
|
|
|
## Install Pipeline ##
|
2021-03-26 09:46:17 +01:00
|
|
|
COPY nlp spacy-nlp vrt-creator /usr/local/bin/
|
2020-09-23 15:26:53 +02:00
|
|
|
|
|
|
|
|
2021-02-25 11:26:11 +01:00
|
|
|
RUN rm -r /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
2019-05-20 11:28:51 +02:00
|
|
|
ENTRYPOINT ["nlp"]
|
2019-09-11 13:20:07 +02:00
|
|
|
CMD ["--help"]
|