forked from recommenders-team/recommenders
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_databricks_for_o16n.sh
91 lines (77 loc) · 3.63 KB
/
prepare_databricks_for_o16n.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# ---------------------------------------------------------
# This script installs appropriate external libraries onto
# a databricks cluster for operationalization.
DATABRICKS_CLI=$(which databricks)
if ! [ -x "$DATABRICKS_CLI" ]; then
echo "No databricks-cli found!! Please see the SETUP.md file for installation prerequisites."
exit 1
fi
CLUSTER_ID=$1
if [ -z $CLUSTER_ID ]; then
echo "Please provide the target cluster id: 'prepare_databricks_for_016n.sh <CLUSTER_ID>'."
echo "Cluster id can be found by running 'databricks clusters list'"
echo "which returns a list of <CLUSTER_ID> <CLUSTER_NAME> <STATUS>."
exit 1
fi
## for spark version >=2.3.0
COSMOSDB_CONNECTOR_URL="https://search.maven.org/remotecontent?filepath=com/microsoft/azure/azure-cosmosdb-spark_2.3.0_2.11/1.2.2/azure-cosmosdb-spark_2.3.0_2.11-1.2.2-uber.jar"
COSMOSDB_CONNECTOR_BASENAME=$(basename $COSMOSDB_CONNECTOR_URL)
CLUSTER_EXIST=false
PYPI_LIBRARIES=( "azure-cli==2.0.56" "azureml-sdk[databricks]==1.0.8" "pydocumentdb==2.3.3" )
while IFS=' ' read -ra ARR; do
if [ ${ARR[0]} = $CLUSTER_ID ]; then
CLUSTER_EXIST=true
STATUS=${ARR[2]}
STATUS=${STATUS//[^a-zA-Z]/}
if [ $STATUS = RUNNING ]; then
## install each of the pypi libraries
for lib in "${PYPI_LIBRARIES[@]}"
do
echo
echo "Adding $lib"
echo
databricks libraries install --cluster-id $CLUSTER_ID --pypi-package $lib
done
## get spark-cosmosdb connector:
echo
echo "downloading cosmosdb connector jar file"
echo
curl -O $COSMOSDB_CONNECTOR_URL
## uplaod the jar to dbfs
echo
echo "Uploading to dbfs"
echo
dbfs cp --overwrite ${COSMOSDB_CONNECTOR_BASENAME} dbfs:/FileStore/jars/${COSMOSDB_CONNECTOR_BASENAME}
# isntall from dbfs
echo
echo "Adding ${COSMOSDB_CONNECTOR_BASENAME} as library"
echo
databricks libraries install --cluster-id $CLUSTER_ID --jar dbfs:/FileStore/jars/${COSMOSDB_CONNECTOR_BASENAME}
## Check installation status
echo
echo "Done! Installation status checking..."
databricks libraries cluster-status --cluster-id $CLUSTER_ID
echo
echo "Restarting the cluster to activate the library..."
databricks clusters restart --cluster-id $CLUSTER_ID
echo "This will take few seconds. Please check the result from Databricks workspace."
echo "Alternatively, run 'databricks clusters list' to check the restart status and"
echo "run 'databricks libraries cluster-status --cluster-id $CLUSTER_ID' to check the installation status."
exit 0
else
echo "Cluster $CLUSTER_ID found, but it is not running. Status=${STATUS}"
echo "You can start the cluster with 'databricks clusters start --cluster-id $CLUSTER_ID'."
echo "Then, check the cluster status by using 'databricks clusters list' and"
echo "re-try installation once the status turns into RUNNING."
exit 1
fi
fi
done < <(databricks clusters list)
if ! [ $CLUSTER_EXIST = true ]; then
echo "Cannot find the target cluster $CLUSTER_ID. Please check if you entered the valid id."
echo "Cluster id can be found by running 'databricks clusters list'"
echo "which returns a list of <CLUSTER_ID> <CLUSTER_NAME> <STATUS>."
exit 1
fi