Browse Source

Add a basic requirements file

And also easy script for running a Spark notebook
pull/1/head
Bradlee Speice 4 years ago
parent
commit
df0132d466
3 changed files with 13 additions and 1 deletions
  1. +3
    -1
      .gitignore
  2. +1
    -0
      requirements.txt
  3. +9
    -0
      start_pyspark.sh

+ 3
- 1
.gitignore View File

@@ -47,4 +47,6 @@ fabric.properties

# Don't include the full snapshot ZIP since it's massive.
kiva_ds_json.zip
*.json
*.json
spark-*/
*.swp

+ 1
- 0
requirements.txt View File

@@ -0,0 +1 @@
py4j>=0.10.4

+ 9
- 0
start_pyspark.sh View File

@@ -0,0 +1,9 @@
SPARK_DIR="spark-2.0.1-bin-hadoop2.7"
SPARK_URL="http://apache.claz.org/spark/spark-2.0.1/spark-2.0.1-bin-hadoop2.7.tgz"

if [ ! -d "$SPARK_DIR" ]; then
echo "Downloading Spark distribution..."
wget "$SPARK_URL" -O - | tar xzf -
fi

PYSPARK_DRIVER_PYTHON="jupyter" PYSPARK_DRIVER_PYTHON_OPTS="notebook" "./$SPARK_DIR/bin/pyspark"

Loading…
Cancel
Save