FROM debian:buster-slim


LABEL authors="Patrick Jentsch <p.jentsch@uni-bielefeld.de>, Stephan Porada <porada@posteo.de>"


ENV LANG=C.UTF-8


RUN apt-get update \
 && apt-get install --no-install-recommends --yes \
      procps \
      python3.7 \
      python3-pip \
      wget \
 && python3 -m pip install \
      chardet \
      setuptools \
      wheel

# Install the NLP pipeline and it's dependencies #
## Install pyFlow ##
ENV PYFLOW_VERSION=1.1.20
RUN wget --no-check-certificate --quiet \
      "https://github.com/Illumina/pyflow/releases/download/v${PYFLOW_VERSION}/pyflow-${PYFLOW_VERSION}.tar.gz" \
 && tar -xzf "pyflow-${PYFLOW_VERSION}.tar.gz" \
 && cd "pyflow-${PYFLOW_VERSION}" \
 && apt-get install --no-install-recommends --yes \
      python2.7 \
 && python2.7 setup.py build install \
 && cd - > /dev/null \
 && rm -r "pyflow-${PYFLOW_VERSION}" "pyflow-${PYFLOW_VERSION}.tar.gz"


## Install spaCy ##
ENV SPACY_VERSION=3.2.1
RUN apt-get install --no-install-recommends --yes \
      python3.7 \
      python3-pip \
 && pip3 install \
      chardet \
      setuptools \
      wheel \
 && pip3 install --upgrade pip \
 && pip3 install "spacy==${SPACY_VERSION}"


ENV SPACY_MODELS="de_core_news_md,en_core_web_md,it_core_news_md,pl_core_news_md,zh_core_web_md"
ENV SPACY_MODELS_VERSION=3.2.0
RUN for spacy_model in $(echo ${SPACY_MODELS} | tr "," "\n"); do python3 -m spacy download "${spacy_model}-${SPACY_MODELS_VERSION}" --direct; done


COPY packages .
RUN cd stand-off-data-py \
 && python3 -m pip install . \
 && cd -


## Install Pipeline ##
COPY nlp spacy-nlp vrt-creator /usr/local/bin/


RUN rm -r /var/lib/apt/lists/*


ENTRYPOINT ["nlp"]
CMD ["--help"]