nlp/Dockerfile

63 lines
1.7 KiB
Docker
Raw Permalink Normal View History

2020-10-08 21:17:58 +00:00
FROM debian:buster-slim
2019-02-06 15:58:17 +00:00
2019-09-11 11:20:07 +00:00
2021-02-25 10:26:11 +00:00
LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <porada@posteo.de>"
2019-02-06 15:58:17 +00:00
2019-09-11 11:20:07 +00:00
2019-09-12 09:42:42 +00:00
ENV LANG=C.UTF-8
RUN apt-get update \
&& apt-get install --no-install-recommends --yes \
wget
2021-02-25 10:26:11 +00:00
# Install the NLP pipeline and it's dependencies #
2020-09-23 13:26:53 +00:00
## Install pyFlow ##
ENV PYFLOW_VERSION=1.1.20
RUN wget --no-check-certificate --quiet \
"https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" \
&& tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
&& cd "pyflow-${PYFLOW_VERSION}" \
2020-10-08 21:08:49 +00:00
&& apt-get install --no-install-recommends --yes \
2020-09-23 13:26:53 +00:00
python2.7 \
2019-09-11 11:20:07 +00:00
&& python2.7 setup.py build install \
&& cd .. \
&& rm -r "pyflow-${PYFLOW_VERSION}" "pyflow-${PYFLOW_VERSION}.tar.gz"
2020-04-06 07:21:38 +00:00
2020-10-08 21:08:49 +00:00
## Install spaCy ##
ENV SPACY_VERSION=3.0.5
2021-02-25 10:26:11 +00:00
RUN apt-get install --no-install-recommends --yes \
2020-09-23 13:26:53 +00:00
python3.7 \
python3-pip \
&& pip3 install \
chardet \
setuptools \
wheel \
2021-02-25 10:26:11 +00:00
&& pip3 install --upgrade pip \
&& pip3 install "spacy==${SPACY_VERSION}"
# Only models that include the following components are compatibel:
# lemmatizer, ner, parser, senter, tagger,
ENV SPACY_MODELS="de_core_news_md,en_core_web_md,it_core_news_md,nl_core_news_md,pl_core_news_md,zh_core_web_md"
2021-02-25 10:26:11 +00:00
ENV SPACY_MODELS_VERSION=3.0.0
RUN for spacy_model in $(echo ${SPACY_MODELS} | tr "," "\n"); do python3 -m spacy download "${spacy_model}-${SPACY_MODELS_VERSION}" --direct; done
## Further dependencies ##
RUN apt-get install --no-install-recommends --yes \
procps \
zip
2019-02-06 15:58:17 +00:00
2019-09-11 11:34:01 +00:00
2020-10-08 21:08:49 +00:00
## Install Pipeline ##
COPY nlp spacy-nlp vrt-creator /usr/local/bin/
2020-09-23 13:26:53 +00:00
2021-02-25 10:26:11 +00:00
RUN rm -r /var/lib/apt/lists/*
2019-05-20 09:28:51 +00:00
ENTRYPOINT ["nlp"]
2019-09-11 11:20:07 +00:00
CMD ["--help"]