Integrating Deepnote with (free!) Burstable Distributed Computing using Python, AWS EC2 and Ray

%%bash curl "https://s3.amazonaws.com/aws-cli/awscli-bundle.zip" -o "awscli-bundle.zip" # Download the AWS CLI from Amazon's website unzip awscli-bundle.zip sudo ./awscli-bundle/install -i /usr/local/aws -b /usr/local/bin/aws rm ./awscli-bundle.zip rm -r ./awscli-bundle

%%bash if [[ -d ~/.aws ]] # If there exists a directory called ~/.aws then rm -r ~/.aws fi mkdir ~/.aws touch ~/.aws/config echo "[default]" >> ~/.aws/config echo "region=us-west-2" >> ~/.aws/config # THIS MUST MATCH WITH cluster-config.yaml. echo "output=json" >> ~/.aws/config

%%bash sudo apt-get update -y sudo apt-get install rsync -y

!ray up ./cluster-config.yaml -y # launch the cluster with -y to automatically accept

# This cleans up the old head node instance info in case this is not a new machine import os if os.path.exists('headinstance.json'): os.remove('headinstance.json')

%%bash aws ec2 describe-instances --filters "Name=tag:Name,Values=ray-example_cluster-head" \ --query "Reservations[*].Instances[*].{Ip:PublicIpAddress, SgGroupId:NetworkInterfaces[*].Groups[*].GroupId}"\ >> headinstance.json

# Read the required info import json with open('headinstance.json') as f: instance_info = json.load(f) for instance in instance_info: if instance[0]['Ip'] is not None: conn_addr, sggroupid = instance[0]['Ip'], instance[0]['SgGroupId'][0][0] print(f'Connect to {conn_addr} with sggroupid {sggroupid}')

!aws ec2 authorize-security-group-ingress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 authorize-security-group-ingress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 authorize-security-group-egress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 authorize-security-group-egress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24

import time time.sleep(30) # Let things settle down on the head node import ray ray.init(address=f'ray://{conn_addr}:10001')

import socket from collections import Counter @ray.remote def check_hosts(): time.sleep(5) return socket.gethostname() for run in range(5): if run != 0: time.sleep(60) # Let boxes make progress initialising remote_promises = [check_hosts.remote() for _ in range(10)] ids = ray.get(remote_promises) print(Counter(ids))

!aws ec2 revoke-security-group-ingress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 revoke-security-group-ingress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 revoke-security-group-egress --group-id {sggroupid} --protocol tcp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !aws ec2 revoke-security-group-egress --group-id {sggroupid} --protocol udp --port 10001 --cidr $(curl ipinfo.io/ip)/24 !ray down ./cluster-config.yaml -y