Add a basic requirements file

And also easy script for running a Spark notebook
pull/1/head
Bradlee Speice 2016-10-21 18:00:06 -04:00
джерело 71627e160e
коміт df0132d466
3 змінених файлів з 13 додано та 1 видалено

4
.gitignore сторонній

@ -47,4 +47,6 @@ fabric.properties
# Don't include the full snapshot ZIP since it's massive.
kiva_ds_json.zip
*.json
*.json
spark-*/
*.swp

1
requirements.txt Normal file

@ -0,0 +1 @@
py4j>=0.10.4

9
start_pyspark.sh Executable file

@ -0,0 +1,9 @@
SPARK_DIR="spark-2.0.1-bin-hadoop2.7"
SPARK_URL="http://apache.claz.org/spark/spark-2.0.1/spark-2.0.1-bin-hadoop2.7.tgz"
if [ ! -d "$SPARK_DIR" ]; then
echo "Downloading Spark distribution..."
wget "$SPARK_URL" -O - | tar xzf -
fi
PYSPARK_DRIVER_PYTHON="jupyter" PYSPARK_DRIVER_PYTHON_OPTS="notebook" "./$SPARK_DIR/bin/pyspark"