Skip to content
This repository has been archived by the owner on Mar 20, 2023. It is now read-only.

Commit

Permalink
Node prep script improvements
Browse files Browse the repository at this point in the history
- Blacklist nouveau universally on GPU VMs
- Change URL retrieval to requests
- Update requirements to latest
  • Loading branch information
alfpark committed May 5, 2017
1 parent 2d53e41 commit 983a7ee
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 23 deletions.
35 changes: 24 additions & 11 deletions convoy/fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,8 @@
import pathlib2 as pathlib
except ImportError:
import pathlib
import requests
import time
try:
import urllib.request as urllibreq
except ImportError:
import urllib as urllibreq
import uuid
# non-stdlib imports
import azure.batch.models as batchmodels
Expand All @@ -63,6 +60,7 @@
logger = logging.getLogger(__name__)
util.setup_logger(logger)
# global defines
_REQUEST_CHUNK_SIZE = 4194304
_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent
_AZUREFILE_DVD_BIN = {
'url': (
Expand All @@ -72,6 +70,7 @@
'sha256': (
'288f809a1290ea8daf89d222507bda9b3709a9665cec8b70354a50252395e127'
),
'target': 'resources/azurefile-dockervolumedriver'
}
_NVIDIA_DOCKER = {
'ubuntuserver': {
Expand Down Expand Up @@ -275,12 +274,18 @@ def _setup_nvidia_driver_package(blob_client, config, vm_size):
raise RuntimeError(
'Cannot proceed with deployment due to non-agreement with '
'license for NVIDIA driver')
else:
logger.info('NVIDIA Software License accepted')
# download driver
logger.debug('downloading NVIDIA driver to {}'.format(
_NVIDIA_DRIVER['target']))
response = urllibreq.urlopen(_NVIDIA_DRIVER[gpu_type]['url'])
response = requests.get(_NVIDIA_DRIVER[gpu_type]['url'], stream=True)
with pkg.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
pkg.stat().st_size, _NVIDIA_DRIVER['target']))
# check sha256
if (util.compute_sha256_for_file(pkg, False) !=
_NVIDIA_DRIVER[gpu_type]['sha256']):
Expand Down Expand Up @@ -308,9 +313,13 @@ def _setup_nvidia_docker_package(blob_client, config):
# download package
logger.debug('downloading NVIDIA docker to {}'.format(
_NVIDIA_DOCKER[offer]['target']))
response = urllibreq.urlopen(_NVIDIA_DOCKER[offer]['url'])
response = requests.get(_NVIDIA_DOCKER[offer]['url'], stream=True)
with pkg.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
pkg.stat().st_size, _NVIDIA_DOCKER[offer]['target']))
# check sha256
if (util.compute_sha256_for_file(pkg, False) !=
_NVIDIA_DOCKER[offer]['sha256']):
Expand All @@ -331,15 +340,19 @@ def _setup_azurefile_volume_driver(blob_client, config):
offer = settings.pool_offer(config, lower=True)
sku = settings.pool_sku(config, lower=True)
# check to see if binary is downloaded
bin = pathlib.Path(_ROOT_PATH, 'resources/azurefile-dockervolumedriver')
bin = pathlib.Path(_ROOT_PATH, _AZUREFILE_DVD_BIN['target'])
if (not bin.exists() or
util.compute_sha256_for_file(bin, False) !=
_AZUREFILE_DVD_BIN['sha256']):
# download package
logger.debug('downloading Azure File Docker Volume Driver')
response = urllibreq.urlopen(_AZUREFILE_DVD_BIN['url'])
response = requests.get(_AZUREFILE_DVD_BIN['url'], stream=True)
with bin.open('wb') as f:
f.write(response.read())
for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
if chunk:
f.write(chunk)
logger.debug('wrote {} bytes to {}'.format(
bin.stat().st_size, _AZUREFILE_DVD_BIN['target']))
# check sha256
if (util.compute_sha256_for_file(bin, False) !=
_AZUREFILE_DVD_BIN['sha256']):
Expand Down
3 changes: 2 additions & 1 deletion docs/02-batch-shipyard-quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ to your local machine has been completed.
2. Create a directory to hold your configuration files. For this quickstart
guide, create a directory named `config`.
3. Copy the sample configuration files from the Deep Learning framework recipe
of your choice to the `config` directory:
of your choice to the `config` directory (please note that some Docker images
are very large, such as CNTK, which will lead to longer pool allocation time):
* [CNTK-CPU-OpenMPI](../recipes/CNTK-CPU-OpenMPI/config/singlenode/)
* [Caffe-CPU](../recipes/Caffe-CPU/config/)
* [Chainer-CPU](../recipes/Chainer-CPU/config/)
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ azure-batch==2.0.1
azure-keyvault==0.2.0
azure-mgmt-batch==3.0.1
azure-mgmt-compute==1.0.0rc1
azure-mgmt-network==1.0.0rc2
azure-mgmt-resource==1.0.0rc1
azure-mgmt-network==1.0.0rc3
azure-mgmt-resource==1.0.0rc2
azure-storage==0.34.0
blobxfer==0.12.1
click==6.7
future==0.16.0
msrest==0.4.7
msrestazure==0.4.7
pathlib2==2.2.1; python_version < '3.5'
requests==2.13.0
scandir==1.5; python_version < '3.5'
17 changes: 8 additions & 9 deletions scripts/shipyard_nodeprep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,15 @@ check_for_buggy_ntfs_mount() {

check_for_nvidia_card() {
set +e
lspci
lspci | grep -i nvidia > /dev/null
out=$(lspci)
echo "$out" | grep -i nvidia > /dev/null
if [ $? -ne 0 ]; then
echo $out
echo "ERROR: No Nvidia card(s) detected!"
exit 1
fi
set -e
echo $out
}

install_azurefile_docker_volume_driver() {
Expand Down Expand Up @@ -376,20 +378,17 @@ if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then
check_for_nvidia_card
# split arg into two
IFS=':' read -ra GPUARGS <<< "$gpu"
# take special actions if we're on NV-series VMs
if [ ${GPUARGS[0]} == "True" ]; then
# remove nouveau
apt-get --purge remove xserver-xorg-video-nouveau
rmmod nouveau
# blacklist nouveau from being loaded if rebooted
# remove nouveau
apt-get --purge remove xserver-xorg-video-nouveau
rmmod nouveau
# blacklist nouveau from being loaded if rebooted
cat > /etc/modprobe.d/blacklist-nouveau.conf << EOF
blacklist nouveau
blacklist lbm-nouveau
options nouveau modeset=0
alias nouveau off
alias lbm-nouveau off
EOF
fi
nvdriver=${GPUARGS[1]}
nvdocker=${GPUARGS[2]}
# get development essentials for nvidia driver
Expand Down

0 comments on commit 983a7ee

Please sign in to comment.