From df0132d4662c64e3a771f68da2e417196b497f5e Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Fri, 21 Oct 2016 18:00:06 -0400 Subject: [PATCH] Add a basic requirements file And also easy script for running a Spark notebook --- .gitignore | 4 +++- requirements.txt | 1 + start_pyspark.sh | 9 +++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 requirements.txt create mode 100755 start_pyspark.sh diff --git a/.gitignore b/.gitignore index 04b7ce2..df1aa4b 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,6 @@ fabric.properties # Don't include the full snapshot ZIP since it's massive. kiva_ds_json.zip -*.json \ No newline at end of file +*.json +spark-*/ +*.swp diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8c10745 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +py4j>=0.10.4 diff --git a/start_pyspark.sh b/start_pyspark.sh new file mode 100755 index 0000000..dd8e2d8 --- /dev/null +++ b/start_pyspark.sh @@ -0,0 +1,9 @@ +SPARK_DIR="spark-2.0.1-bin-hadoop2.7" +SPARK_URL="http://apache.claz.org/spark/spark-2.0.1/spark-2.0.1-bin-hadoop2.7.tgz" + +if [ ! -d "$SPARK_DIR" ]; then + echo "Downloading Spark distribution..." + wget "$SPARK_URL" -O - | tar xzf - +fi + +PYSPARK_DRIVER_PYTHON="jupyter" PYSPARK_DRIVER_PYTHON_OPTS="notebook" "./$SPARK_DIR/bin/pyspark"