%%bash
curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip" # Download the AWS CLI from Amazon's website
unzip awscli-bundle.zip
sudo ./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws
rm ./awscli-bundle.zip
rm -r ./awscli-bundle
%%bash
if [[ -d ~/.aws ]] # If there exists a directory called ~/.aws
then
rm -r ~/.aws
fi
mkdir ~/.aws
touch ~/.aws/config
echo "[default]" >> ~/.aws/config
echo "region=us-west-2" >> ~/.aws/config # THIS MUST MATCH WITH cluster-config.yaml.
echo "output=json" >> ~/.aws/config
%%bash
sudo apt-get update -y
sudo apt-get install rsync -y
!ray up ./cluster-config.yaml -y # launch the cluster with -y to automatically accept
# This cleans up the old head node instance info in case this is not a new machine
import os
if os.path.exists('headinstance.json'):
os.remove('headinstance.json')
%%bash
aws ec2 describe-instances --filters "Name=tag:Name,Values=ray-example_cluster-head" \
--query "Reservations[*].Instances[*].{Ip:PublicIpAddress, SgGroupId:NetworkInterfaces[*].Groups[*].GroupId}"\
>> headinstance.json
# Read the required info
import json
with open('headinstance.json') as f:
instance_info = json.load(f)
for instance in instance_info:
if instance[0]['Ip'] is not None:
conn_addr, sggroupid = instance[0]['Ip'], instance[0]['SgGroupId'][0][0]
print(f'Connect to {conn_addr} with sggroupid {sggroupid}')
!aws ec2 authorize-security-group-ingress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 authorize-security-group-ingress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 authorize-security-group-egress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 authorize-security-group-egress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24
import time
time.sleep(30) # Let things settle down on the head node
import ray
ray.init(address=f'ray://{conn_addr}:10001')
import socket
from collections import Counter
@ray.remote
def check_hosts():
time.sleep(5)
return socket.gethostname()
for run in range(5):
if run != 0:
time.sleep(60) # Let boxes make progress initialising
remote_promises = [check_hosts.remote() for _ in range(10)]
ids = ray.get(remote_promises)
print(Counter(ids))
!aws ec2 revoke-security-group-ingress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 revoke-security-group-ingress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 revoke-security-group-egress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!aws ec2 revoke-security-group-egress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24
!ray down ./cluster-config.yaml -y