mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
* Update scripts to handle cases where you have other VMs in your sub * Support subs with other VMs and fix for PDSH permission error * Minor fix to support subs with other VMs
51 lines
1.7 KiB
Bash
Executable File
51 lines
1.7 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
azure_config=azure_config.json
|
|
if [ ! -f ${azure_config} ]; then
|
|
echo "Cannot find $azure_config"
|
|
exit 1
|
|
fi
|
|
location=`cat ${azure_config} | jq .location | sed 's/"//g'`
|
|
rg=deepspeed_rg_$location
|
|
|
|
parallel=true
|
|
command -v pdsh
|
|
if [ $? != 0 ]; then
|
|
echo "Installing pdsh will allow for the docker pull to be done in parallel across the cluster. See: 'apt-get install pdsh'"
|
|
parallel=false
|
|
fi
|
|
|
|
ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
|
|
if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
|
|
num_vms=`cat ${azure_config} | jq .num_vms`
|
|
if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi
|
|
|
|
args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
|
|
username=deepspeed
|
|
|
|
update_script="
|
|
docker pull deepspeed/deepspeed:latest;
|
|
ln -s workdir/DeepSpeed/azure/attach.sh attach.sh;
|
|
cd workdir/DeepSpeed;
|
|
git pull;
|
|
git submodule update --init --recursive;
|
|
bash azure/start_container.sh;
|
|
"
|
|
|
|
if [ $parallel == true ]; then
|
|
echo "parallel docker pull"
|
|
hosts=""
|
|
for node_id in {0..1}; do
|
|
addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
|
|
hosts="${addr},${hosts}"
|
|
done
|
|
PDSH_RCMD_TYPE=ssh PDSH_SSH_ARGS_APPEND=${args} pdsh -w $hosts -l ${username} $update_script
|
|
else
|
|
echo "sequential docker pull"
|
|
for node_id in `seq 0 $((num_vms - 1))`; do
|
|
ip_addr=`az vm list-ip-addresses -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
|
|
addr=${username}@${ip_addr}
|
|
ssh ${args} $addr $update_script
|
|
done
|
|
fi
|