Skip to content

Distributed training integrated and sedona registration updated #66

Distributed training integrated and sedona registration updated

Distributed training integrated and sedona registration updated #66

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: Continuous Integration
on:
push:
branches: [ "main" ]
paths-ignore:
- 'README.md'
- 'CONTRIBUTING.md'
pull_request:
branches: [ "main" ]
jobs:
build:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- spark: '3.4.1'
hadoop: '3'
scala: '2.12.8'
python: '3.10'
spark_compat: '3.4'
- spark: '3.4.1'
hadoop: '3'
scala: '2.12.8'
python: '3.7'
spark_compat: '3.4'
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Download spark-hadoop
env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
- name: Install spark-hadoop
env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
- run: sudo apt-get install -y python3-pip python3-dev libgeos-dev gdal-bin libgdal-dev
- run: python3 -m pip install --upgrade pip
- run: python3 -m pip install -U setuptools
- run: python3 -m pip install -U wheel
- run: python3 -m pip install -U virtualenvwrapper
- run: python3 -m pip install pipenv
# Download Sedona jar
- env:
SPARK_COMPAT: ${{ matrix.spark_compat }}
run: wget https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-${SPARK_COMPAT}_2.12/1.4.1/sedona-spark-shaded-${SPARK_COMPAT}_2.12-1.4.1.jar
# Put Sedona jar in place
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: find . -name sedona-spark-shaded-*.jar -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \;
# Download Geotools jar
- run: wget https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/1.4.0-28.2/geotools-wrapper-1.4.0-28.2.jar
# Put Geotools jar in place
- env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: find . -name geotools-wrapper-*.jar -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \;
- name: Install dependencies
env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
run: |
python3 -m pip install flake8 pytest
python3 -m pip install pandas
python3 -m pip install numpy
python3 -m pip install xarray
python3 -m pip install torch torchvision torchaudio
python3 -m pip install -U scikit-image>=0.19.0
python3 -m pip install cdsapi
python3 -m pip install rasterio==1.1.8
python3 -m pip install petastorm
python3 -m pip install matplotlib
python3 -m pip install pydeck
python3 -m pip install geojson
python3 -m pip install pyspark==${SPARK_VERSION}
python3 -m pip install apache-sedona
if [ -f requirements.txt ]; then pip3 install -r requirements.txt; fi
- name: Install main package
run: |
python3 -m pip install -e .[tests]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
(export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION};export PYSPARK_PYTHON=/usr/bin/python3;python3 -m pytest tests)
env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}