forked from shanlior/Async-EASGD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAsyncEASGD.sh
executable file
·51 lines (34 loc) · 1.59 KB
/
AsyncEASGD.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env bash
# run 2 nodes
numNodes=2
#################################################
# Try to close the ports if they are already used
if [ -z "$1" ]
then
port=`echo 8080`
else
port=$1
fi
currPort=$port
numPorts=$(($numNodes + 1))
for i in `seq 0 $numPorts`;
do
fuser -k $currPort/tcp
# echo Kill port $currPort
currPort=$(($currPort + 1))
done
# OPTIONAL: Uncomment if you want to close all luajit Processes running on GPU's
# kill $(nvidia-smi -g 0 | awk '$2=="Processes:" {p=1} p && $3 > 0 && $5~/luajit/ {print $3}')
#################################################
serverip=`ifconfig | awk '/inet addr/{print substr($2,6)}' | head -1`
echo Current server is located at ip: $serverip
th server.lua --server --numNodes $numNodes --numEpochs 50 --nodeIndex 0 --batchSize 128 --port $port --host $serverip &
th tester.lua --tester --cuda --gpu 1 --numNodes $numNodes --numEpochs 50 --batchSize 128 --port $port --save testNet --host $serverip &
th client.lua --cuda --gpu 1 --numNodes $numNodes --nodeIndex 1 --batchSize 128 --port $port --host $serverip &
th client.lua --cuda --gpu 2 --numNodes $numNodes --nodeIndex 2 --batchSize 128 --port $port --host $serverip &
# run client on a remote client
# ssh -n -f [user]@[host] "sh -c 'cd [Async-EASGD examples dir] ; nohup /home/lior/torch/install/bin/th client.lua --cuda --gpu 1 --numNodes $numNodes --nodeIndex 3 --batchSize 128 --port $port --host $serverip > /dev/null 2>&1 &'"
# run script on a remote client
# ssh -n -f [user]@[host] "sh -c 'cd [script dir] ; nohup ./[script] $port > /dev/null 2>&1 &'"
# wait for them all
wait