Stratified Sampling
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def display_distribution(dataset):
    """
    Displays the distribution of dogs and cats in the supplied dataset.
    """
    values, counts = np.unique(dataset, return_counts=True)
    counter = dict(zip(values, counts))
    dogs = counter.get(0, 0)
    percentage_of_dogs = dogs / len(dataset) * 100
    cats = counter.get(1, 0)
    percentage_of_cats = cats / len(dataset) * 100
    print(
        f"Dataset distribution: "
        f"{dogs} dogs ({percentage_of_dogs:.2f}%). "
        f"{cats} cats ({percentage_of_cats:.2f}%). "
    )
DOGS = 80
CATS = 20
dataset = np.concatenate((
    np.array([0] * DOGS), 
    np.array([1] * CATS)
))
display_distribution(dataset)
train, test = train_test_split(dataset, test_size=0.2)
display_distribution(train)
display_distribution(test)
train, test = train_test_split(dataset, test_size=0.2, stratify=dataset)
display_distribution(train)
display_distribution(test)