import numpy as np
import pandas as pd
import rdkit
from rdkit.Chem import Draw, Lipinski, Crippen, Descriptors
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG
mol = rdkit.Chem.MolFromSmiles('Cn1cnc2n(C)c(=O)n(C)c(=O)c12')
# The following code draws the molecule in 2D - we won't worry about this utility too much moving forward.
# The important idea is that we now have a Mol object that describes one of our favorite molecules.
d2d = rdMolDraw2D.MolDraw2DSVG(350,300)
d2d.DrawMolecule(mol)
d2d.FinishDrawing()
SVG(d2d.GetDrawingText())
# We can iterate over all atoms in the molecule to generate certain atom-level properties.
# Note that the order in which the atoms are index depends on the SMILES string used to generate the molecule...
# Luckily there are relatively few cases where we'll need to know individual atom indices.
for atom in mol.GetAtoms():
print(atom.GetAtomicNum(), atom.GetHybridization(), atom.GetFormalCharge())
# Notice that there are no hydrogens included by default. If we want to add hydrogens, we must do so explicity:
mol_with_Hs = rdkit.Chem.AddHs(mol)
print("With added hydrogens:")
for atom in mol_with_Hs.GetAtoms():
print(atom.GetAtomicNum(), atom.GetHybridization(), atom.GetFormalCharge())
# We can also calculate molecular properties
print(mol_with_Hs.GetNumAtoms())
print(mol_with_Hs.GetNumBonds())
print(mol_with_Hs.GetNumHeavyAtoms())
6 SP3 0
7 SP2 0
6 SP2 0
7 SP2 0
6 SP2 0
7 SP2 0
6 SP3 0
6 SP2 0
8 SP2 0
7 SP2 0
6 SP3 0
6 SP2 0
8 SP2 0
6 SP2 0
With added hydrogens:
6 SP3 0
7 SP2 0
6 SP2 0
7 SP2 0
6 SP2 0
7 SP2 0
6 SP3 0
6 SP2 0
8 SP2 0
7 SP2 0
6 SP3 0
6 SP2 0
8 SP2 0
6 SP2 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
1 UNSPECIFIED 0
24
25
14
# Implement your encode_molecule() function here:
def encode_molecule(SMILES_string):
"""Given a SMILES string return a list of molecular encodings
Arguments:
SMILES_string: A string representing the SMILE string of the molecule
Returns:
mol_encoding: A list of molecule features
"""
mol = rdkit.Chem.MolFromSmiles(SMILES_string) # load molecule into rdkit
# make list of functions to use
functions = [
rdkit.Chem.Lipinski.FractionCSP3,
rdkit.Chem.Lipinski.HeavyAtomCount,
rdkit.Chem.Lipinski.NHOHCount,
rdkit.Chem.Lipinski.NOCount,
rdkit.Chem.Lipinski.NumAliphaticCarbocycles,
rdkit.Chem.Lipinski.NumAliphaticHeterocycles,
rdkit.Chem.Lipinski.NumAliphaticRings,
rdkit.Chem.Lipinski.NumAromaticCarbocycles,
rdkit.Chem.Lipinski.NumAromaticHeterocycles,
rdkit.Chem.Lipinski.NumAromaticRings,
rdkit.Chem.Lipinski.NumHAcceptors,
rdkit.Chem.Lipinski.NumHDonors,
rdkit.Chem.Lipinski.NumHeteroatoms,
rdkit.Chem.Lipinski.NumRotatableBonds,
rdkit.Chem.Lipinski.NumSaturatedCarbocycles,
rdkit.Chem.Lipinski.NumSaturatedHeterocycles,
rdkit.Chem.Lipinski.NumSaturatedRings,
rdkit.Chem.Lipinski.RingCount
]
# initialize list of properties
fingerprint = [0]*len(functions)
for i in range(len(functions)):
fingerprint[i] = functions[i](mol)
mol_encoding = fingerprint
return mol_encoding
print(encode_molecule('Cn1cnc2n(C)c(=O)n(C)c(=O)c12'))
print(encode_molecule("O"))
[0.375, 14, 0, 6, 0, 0, 0, 0, 2, 2, 6, 0, 6, 0, 0, 0, 0, 2]
[0.0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]
delaney_processed = pd.read_csv('delaney-processed.csv', sep=',')
delaney_processed
smile_strings = delaney_processed["smiles"].to_list()
molecule_features = np.array([encode_molecule(smile_string) for smile_string in smile_strings])
esol = delaney_processed['measured log solubility in mols per litre'].to_numpy()
print(molecule_features.shape) # Some sanity checks
print(esol.shape)
num_data_points = len(esol) # This is a useful variable to have around
(1128, 18)
(1128,)
class Node():
"""Defines a single Node in the decision tree. Note that initializing a Node on a set of data and targets will grow an entire tree based on that data
Attributes:
min_size: The minimum size of a split data set that will spawn a child node. Recommend 6 (i.e. splits of size < 6 return a concrete value)
feature_index: An int indicating the feature index containing the attributes upon which the split is decided
threshold: A float indicating the threshold for splitting
left_output: The output of the node if a test point falls at or below the threshold in its feature index
right_output: The output of the node if a test_point falls above the threshold in its feature index
"""
def __init__(self, min_size, data, targets):
self.min_size = min_size
self.data = data
self.feature_index, self.threshold, self.left_output, self.right_output = self.grow_tree_from_data(data, targets)
def SDR(self, left_targets, right_targets):
""" Calculates the standard deviation reduction caused by the splitting of a data set.
This is calculated as std(all data) - sum(p(split_data)*std(split_data)).
Returns 0 if left_targets or rigth_targets is empty (i.e. has length 0)
Args:
left_targets, right_targets: The split data
Returns:
SDR: The standard deviation reduction or 0 if left_targets or right_targets is 0
"""
if len(left_targets) == 0 or len(right_targets) == 0:
return 0
SDR = np.std(self.data) - np.sum(
[(len(split)/len(self.data))*np.std(split) for split in [left_targets, right_targets]]
)
return SDR
def grow_tree_from_data(self, data, targets):
""" Grows a random decision tree by assigning attributes to this node and spawning child nodes, if necessary.
Args:
data, targets: the attributes and targets of the data passed to the node
Returns:
A fully-attributed node with child nodes, if necessary
"""
# Randomly choose n/3 indices to be visible to this node.
visible_indices = np.random.choice(np.arange(np.shape(data)[1]), size = int(np.shape(data)[1]/3), replace = False)
#Start keeping track of split performance
best_SDR = None
best_index = None
best_threshold = None
#Systematically try every possible split on the visible indices and store the best result (as measured by SDR)
for index in visible_indices:
for value in data[:, index]:
left_targets = targets[np.where(data[:,index] <= value)]
right_targets = targets[np.where(data[:,index] > value)]
trial_SDR = self.SDR(left_targets, right_targets)
if (best_SDR == None or best_SDR < trial_SDR):
best_SDR = trial_SDR
best_index = index
best_threshold = value
# See what the data looks like after the optimal split
best_left_data = data[np.where(data[:,best_index] <= best_threshold)]
best_right_data = data[np.where(data[:,best_index] > best_threshold)]
best_left_targets = targets[np.where(data[:,best_index] <= best_threshold)]
best_right_targets = targets[np.where(data[:,best_index] > best_threshold)]
# Return the mean of the targets if the resulting split data is small enough; otherwise generate a new node to split the data further
if len(best_left_targets) == 0:
left_output = np.mean(best_right_targets) # No split has occured
elif len(best_left_targets) < self.min_size or best_SDR == 0:
left_output = np.mean(best_left_targets)
else:
left_output = Node(self.min_size, best_left_data, best_left_targets)
if len(best_right_targets) == 0:
right_output = np.mean(best_left_targets) # No split has occured
elif len(best_right_targets) < self.min_size or best_SDR == 0:
right_output = np.mean(best_right_targets)
else:
right_output = Node(self.min_size, best_right_data, best_right_targets)
return best_index, best_threshold, left_output, right_output
def predict(self, data_point):
"""Predicts the target value of a data point passed to this node
Args:
data_point: The data point passed to this node
Returns:
The predicted target value, either from this node or from an eventual terminal child node
"""
if data_point[self.feature_index] <= self.threshold:
if isinstance(self.left_output, float): # If this is a terminal node
return self.left_output
else:
return self.left_output.predict(data_point)
else:
if isinstance(self.right_output, float): # If this is a terminal node
return self.right_output
else:
return self.right_output.predict(data_point)
## Write a function to bootstrap sampling
def take_bootstrap_sample(data, targets):
"""Given a data set, takes a sample of len(data) data points, with replacement
Args:
data, targets: The input data points
targets: The target values of the input data
Returns:
selected_indices: The list of indices selected for the bootstrapped sample
data[selected_indices]: The bootstrapped data sample
targets[selected_indices]: The bootstrapped target values
"""
selected_indices = np.random.randint(0, len(data), size=len(data)) # random indices
return selected_indices, data[selected_indices], targets[selected_indices]
# Implement Random Tree
class RandomTree():
"""A random tree grown during using the CART algorithm. A full dataset is passed to the tree and it is grown on a bootstrapped sample.
Attributes:
min_size: A node must be at least this big to split further
data: The full, unboostrapped data set on which to grow the tree
targets: The full, unboostrapped targets on which to grow the tree
"""
def __init__(self, min_size, data, targets):
self.selected_indices, self.data, self.targets = take_bootstrap_sample(data, targets) # Take a bootstrap sample
self.root_node = Node(min_size, self.data, self.targets) # Grow a tree from that bootstrapped sample
def predict(self, data_point):
"""Predicts a value for a given data point
Args:
data_point: The data point to be predicted
Returns:
prediction: The predicted value of the data point
"""
prediction = self.root_node.predict(data_point)
return prediction
#Check out the results on the first 20 ESOL samples
sample_tree = RandomTree(6, molecule_features, esol)
for i in range(20):
print(sample_tree.predict(molecule_features[i]), esol[i])
-0.756 -0.77
-2.90425 -3.3
-1.2911764705882351 -2.06
-8.392 -7.87
-1.33 -1.33
-1.5 -1.5
-8.402857142857144 -7.32
-5.0314 -5.03
-5.4225 -6.29
-4.966666666666667 -4.42
1.07 1.07
-4.121666666666666 -4.14
-3.09 -2.68
-2.486 -2.64
-8.106666666666667 -7.96
-1.25 -1.41
-0.24000000000000005 -0.47
-1.345 -1.0
-3.7942 -3.64
-3.0745 -2.94
## Plant a random forest here:
class RandomForest():
"""A forest built out of CART trees.
Attributes:
n_trees: The number of trees in the forest
data: The dataset upon which the forest is grown
targets: The target values of the data
tree_list: A list for holding each tree object in the forest
"""
def __init__(self, n_trees, min_size, data, targets):
self.n_trees = n_trees
self.data = data
self.targets = targets
self.tree_list = []
print("Planting Trees...")
for i in range(self.n_trees): # Grow a tree n_trees times and store it in tree_list
print(i)
self.tree_list.append(RandomTree(min_size, data, targets))
def predict_point(self, data_point):
""" Predicts the target value of a point by averaging the prediction from each tree in the forest
Args:
data_point: The data point whose target value is to be predicted
Returns:
mean_prediction: The mean prediction by all the trees
"""
predictions = []
for tree in self.tree_list:
predictions.append(tree.predict(data_point))
mean_prediction = np.mean(predictions)
return mean_prediction
def calculate_out_of_bag_error(self, data, targets):
"""Calculates the out-of-bag error for the random forest. This is the mean-squared-error for each data point as predicted only by the trees
in the forest who did not see that data point in their bootsrapped training set.
Args:
data, targets: The data and targets upon which the tree was trained
Returns:
obg_error The out-of-bag error
"""
mean_predictions = []
for i, data_point in enumerate(data):
predictions = []
for tree in self.tree_list:
if i in tree.selected_indices: # if the tree already saw this point then skip
continue
else:
predictions.append(tree.predict(data_point))
#print(predictions)
mean_predictions.append(np.mean(predictions))
print(mean_predictions)
obg_error = np.sum((mean_predictions - targets)**2)/len(data)
return obg_error
my_forest = RandomForest(200, 6, molecule_features, esol)
my_forest.calculate_out_of_bag_error(molecule_features, esol)
Planting Trees...
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
[-1.3384589430894307, -2.6478690476190474, -2.2852258461235735, -8.55949756351105, -0.9093509391179496, -1.4496168674698795, -7.567586674291884, -4.381589711934157, -5.021543333333335, -5.120070918367347, 0.1551097883597884, -4.34041840307381, -2.637613512557186, -2.6116502252252256, -5.779163428093742, -1.6979107212368159, -0.3306997602832897, -1.2706759035742696, -4.00451505376344, -3.51602408547795, -7.591158519299196, -3.2844400966183582, -3.7110659396159393, -2.731775345373677, -4.038910052910053, -1.3569342867187324, -2.2632945175438595, -2.429368976770829, -7.148315271144605, -2.6014786370877805, -3.8645580766262793, -3.941606944444444, -4.310067352703793, -1.5892301477393447, -5.368359090762742, -0.5739512626989945, -3.3413569934451512, -3.4997768115942023, -4.079719191919192, -4.179582998013471, -4.64525614876908, -0.4768046350672871, -2.242641133004926, -1.8802272015655577, -0.2967340940901937, -1.444699194784484, -2.1685913335334126, -2.4728866666666667, -4.066379605263158, -2.1077121212121215, -4.647388915595587, -2.48218120978121, -7.339173049678865, -2.650305434782609, -7.837028017194253, -0.9444226590260391, -4.377719937438885, -4.5415698144149745, -7.02076445817453, -0.44458951436918837, -8.812965991360626, -1.2784519825347773, -4.044129354354354, -0.08247424242424241, -3.4633040983606556, -5.0899938888888885, -3.4988693576388896, -0.7947946078431373, -1.1557738095238093, -5.807991016718335, -2.661550245098039, -4.442844482151835, -2.2284512345679017, -2.5251051587301587, -1.7966941765878883, -1.6794194934812023, -6.028461737089202, -6.253471323682673, -1.7362896622299624, -4.300855223880598, -1.8698419913419917, -0.6209127192982455, 0.6276578966218966, -2.9699778620881387, -3.9031705207649012, -3.0673965117321558, -7.05382027027027, -0.333435778505209, -2.020283597883598, -3.7275427631578957, -3.1021355641896626, -2.7622829478854474, -6.946039513505986, -1.0824924943974668, -4.494267134581104, -4.353627777777778, -4.244758578431372, -7.013680832484761, -1.9001550560764846, -3.314234217004688, -4.327474603174603, -3.572582916666667, -0.7268461187214612, -1.4931015695227319, -4.497734894634966, 0.14896543905145693, -1.5002067907444665, -1.4941091666666666, -2.021161646130366, -1.8472622807017542, -2.8762158934407096, -4.758973026315789, -2.8389625807102306, -0.7088543694040716, -4.567856346153846, -1.6451573333333331, -1.7169534883720932, -4.488330593607306, -4.81209037037037, -4.88919484034767, -2.9594229042154954, -4.313971056624906, -4.920970801589563, -1.643047550170627, -5.722304180695818, -2.4043429320003864, -2.898285542168674, -7.720255691056912, 0.026176421957672, -4.298731495367412, 0.36109681372549013, -3.7189078571428564, -3.1744001424501427, -1.1460988455988455, -4.039131999885205, -3.666012731749062, -2.1871951282051283, -2.093266699282453, -3.2131066841375366, -4.255890787928477, -2.450811886646767, 0.3058398545898545, -1.9632400037913667, -2.5263622772707524, -2.9655994064619344, -2.2249606821106824, 0.6389487607920127, -2.50373206212647, 0.116079516770758, -1.474613841158841, -1.7063493975903616, -0.2948824107142857, -1.4122496418167716, -2.945781392175345, -1.8194456899640201, -1.1474472001497864, -2.200388830532213, 0.6138075854856755, -5.329560340136054, -3.006917682543094, -2.6327293248945147, -6.096760337552741, -1.3471075312446095, -2.5474538617886178, -3.835704871794872, -2.09369155442776, -5.853928689162702, -5.978138414634147, -0.08912097288676238, -5.375306965174129, -5.126122810705657, -1.4160769230769232, -4.452311919191919, -1.2713756703970793, -2.7198433760683756, -6.006736772486773, -1.7672545391806262, -2.1744265620079584, -2.6023642082674825, -2.779022573839663, -3.372678307591639, -6.381470886075949, -4.309967873910129, 0.13341125925925926, -4.880507692307693, -4.183810013860014, 0.6099353741496599, -5.2635463421434725, -4.6468559386973185, -0.1423919820655535, -4.058020612340476, -3.510281343283582, -2.4347656862745093, -2.0184930715969904, -2.405892658730159, -4.116651912568306, -3.088797441780401, -4.988234564256257, -2.8900038820748577, -0.9599534736025028, -3.91486081081081, 0.748449105627449, -1.540450967510167, -3.878829761904762, -1.5227481711525186, 0.26625184045447203, -2.0905394871794876, -2.456148367346938, -2.718289359387548, -1.7416441083974885, 0.035857296296296325, -3.7566339408579297, -4.236544207044622, -2.150121960784313, -5.421701755816482, -5.538732455114341, 0.38380958747135213, -1.8713527397260277, -4.508172698412699, -4.3096634013605435, -8.130262023292746, -2.7039236723495983, -4.309784770882139, -2.60712119047619, -1.1198018384569017, -3.9035086854460093, -3.7458756060606064, -2.3269900821118465, -2.348448870477952, -4.3004167631917625, -0.8242025917805139, -2.0479218826835273, -4.583395207570208, 0.46592602739726036, -4.510719315386215, -3.660198858447488, -1.4812658834011776, -1.4508736414363013, -0.5316071591650359, -1.0584062227007316, -0.6563444940476191, -1.4011129906188948, -0.24855062886515975, -4.128387254901959, -2.9719850964708288, -3.7148472222222217, -2.0662260281385283, -4.454309173293833, -3.989477053140097, -0.19755687229437227, -3.8557071111111108, -1.0682009454639891, -3.086090400689739, 0.06481844691336423, -4.6503584745762705, -0.9558301643823817, -4.1180448314374365, -1.3753016113756371, -3.876918468468468, -7.16951856794566, -5.638508807588075, -4.874767142857143, -4.937750968992248, -5.889896913580246, -3.0125212602212605, -4.742177631578947, -4.525866170634921, -2.3250733332904923, -4.676161929240327, -0.8882287268334851, -3.0057326576576573, -2.265952729778591, -7.218769993476845, -2.4265044479586706, -4.05372077922078, -4.2703229760868116, 0.012789473684210517, -1.0484420882992456, -2.027715369130872, -3.2880670781892998, -1.409143240886098, -2.363060370302624, -2.135027992179778, -3.296296906024102, -4.5435653951759285, -2.0548448224852076, -5.3985870485123675, -2.5341670405093777, -4.307857276995305, -6.174476722299832, -3.6404003456221203, -3.666224928129146, -3.1580875939849626, -4.346302788220551, -4.711356504065041, -1.9907508551307853, -3.737378387359309, -8.824011228575634, -1.7813565217391305, -7.753363868760063, -4.269846296296296, -1.6180732340807162, -2.4285124369747897, -4.528554059829061, -4.252945918139385, -4.607101282051282, -4.310262169369235, -2.393122747771267, -3.1995754971070745, -1.5666721713471712, -4.466164107142858, -2.1558319248826288, -4.110440336013757, -3.4882192488262906, -0.13490329771091336, -2.436436569441488, -6.909836231884058, -0.878766149068323, -2.180435964912281, -7.5874779195032565, -3.890041365461847, -8.056322854260326, -3.6944968055555556, -2.356304166666667, -6.178646751848218, -3.0071642156862746, -3.951956444444445, -0.6659314042519722, -1.9388630420876412, -3.6167627694859035, -4.95676225490196, -3.883874075756015, -0.5667030945809407, -3.869400703651092, -1.3314876582526105, -3.9855319203812476, -3.5844422705314005, -3.2801430379746837, -3.8862895582329307, -3.7802916666666673, -3.850660110893673, 0.13620404040404044, -0.8880692246416229, -3.9412307142857146, -4.0681739726027395, -2.7957196816076997, -3.1529310897435896, -4.269437500227799, -2.5112285336356766, -0.7289956859541027, 0.47943563587206456, -4.08921272823113, -1.6147832461287044, -4.323278626163109, -2.688941666666667, -3.0440392801105305, 0.14777427360834228, -3.3258786031746026, -6.40195621206903, -1.8930640179460598, -4.458820361247948, -2.6146605748605745, -3.561095504385965, -2.687282051282051, -3.4451753424657534, 0.5514313725490197, -2.549171952662722, 0.04006247617844004, -2.6926995627894104, -3.6357155405405415, -2.934089552238806, -1.8864728112728113, -2.387829678020265, -3.725705476190476, -2.6638837448559674, -4.356413615023476, -1.491270439209913, -0.7448992835873953, -2.937673372243634, -2.5688745495495495, -1.677893918748336, -1.4086223039215688, -4.652926444444444, -3.3688668231611887, -4.3962317956306345, -1.2775226930000467, 0.2957815315315315, -2.1357490476190475, -0.9746418582346371, -4.874510076473234, -5.930826858146595, -3.329239124210522, -2.403600714285714, -2.5955179257796313, -4.7707335858585855, -1.040520245242313, -3.087313853303559, -5.585292606674142, -4.473962735042735, -2.322369909856947, -3.0692787138787145, -2.9347983796296297, -3.3431509059811693, -1.504489972137962, -2.1777089548491575, -4.747864788732394, -1.7655507356532354, -3.2385113123993556, -3.5875866123925824, -0.8089446468953931, -3.075579040404041, -2.8004951169834733, -2.105921641791045, -1.721574564822642, -6.326489837398373, -0.23324170274170275, -1.8177883523362397, -7.070181245107062, -4.3063287500000005, -2.564421164021164, -4.753273437500001, -3.448043961352657, -3.8228059523809526, -4.183715244079927, -3.8577581140350876, -2.596213978138978, -2.2901858954248366, -0.9715128205128206, -3.6449006493506495, -6.115514705882354, -4.208963450834879, -3.333676020645, -7.075876261549397, -2.792147847287063, -4.5965835585585575, -2.384499853344451, -1.622193670585114, -1.7111755875376191, -2.309553424280806, -1.6299981068463492, -1.749468781884782, -0.42527769230769236, -3.8422712998712996, -2.643856188842564, -4.9458592071531875, -3.1650436303871246, -0.847485188364796, -4.240711624949125, -1.1738692640692638, -4.307001904761905, -6.859232025622062, -4.37991425120773, -4.339713394002593, -3.937341428571429, -0.9886380539202178, -4.332115954945877, -2.4813970085470087, -1.4915759924548386, -7.540257142857142, -1.7030473251028806, -0.5186554962158958, -2.5495523341660626, -3.3042572482386703, 0.1819181410883024, -4.023522105278573, -2.3650042194092826, -3.2740677570352963, -1.7526516534391534, -2.7419289682539683, -1.3037308389874682, 0.016095242970242958, -3.4081589438339446, -3.7603829315579316, -4.003733333333332, -2.022758386811178, -1.3975150235434197, -2.974576086956521, -3.4663261040092634, -3.3400573394884203, -5.5187993055555555, -3.867973971193415, -1.8246358333333332, -5.882406148282097, -0.755493927233815, -1.6483167597402626, -1.1231445286195287, -4.9085068075117375, -1.131773880975482, -4.609190096618358, -0.9081503681122606, -1.6790311655018055, -3.859037671885848, -3.3374842293906806, -3.9991689615605273, -1.710137280795292, -2.0960781690140844, -4.129714150047484, -7.990512491306081, -4.730289552238806, -4.491394742063492, -5.46938566066066, -3.1712930695049923, -3.037815104166667, -3.2932863453815266, -2.9263122023809522, 0.014880718794986734, -2.0778784971083835, -1.0506963959556426, -1.9341486879877663, -3.299134772475806, -4.041733815958815, -3.5643758454106282, -2.447511788479218, -1.1069983882783885, -2.4239596774193544, -2.0140068170959475, 0.3221634911308824, -5.05870193236715, -3.5628151111111115, -2.3936206943250737, -0.26503658601672303, -3.1449055779119157, -4.263764806907873, 0.47576278282759765, -0.3017223377865715, -1.9483486482221368, -1.778193840234329, -1.411158412158573, -2.533025210053548, -5.931784537037037, -3.578429385964913, -4.545720566893423, -3.706610118814819, -2.2047047222222225, -1.9089621693121694, -0.06922794117647058, -4.092616666666666, -2.407837745098039, -1.7229627441978597, -3.4726429587688723, 0.3644216142348576, -4.349085533453887, -0.3445416153486577, -3.500684072978304, -1.337483479569312, -2.607167826997964, -2.8410400900900896, -7.24435256902496, -3.6589451161240634, -3.551434217171717, -0.9661342281219112, -0.715435621521336, -4.593673004694836, -2.1963469320034625, -4.29658334822822, -3.247673983739837, -4.418213797173622, -5.763207373271889, -4.410693319752142, -7.463255895161894, -7.934089437154755, -4.628794761904762, -3.706604738721195, -2.3631493911719934, -3.3437676535087717, -1.6829938178181993, -0.9869862660813365, -4.980336070366042, -3.28339973574045, -3.845291319050363, -2.743034156378601, -2.505623319357619, -1.2437238753025717, -2.27106268663723, -3.6256314453799035, -1.1956171532213844, -3.6747926829268303, -1.6535417532216845, -4.737042708333334, -6.868280587749016, -1.525348668270727, -1.3677670073121482, -5.0607702991452985, -4.16258952991453, -4.415380389309764, -1.1380173941798941, -7.460696918316293, -3.4697833302025605, -2.446713596491228, -3.263186693636528, -1.4456902853274176, -6.404564322916666, -3.5467060185185186, -4.343287015503876, -3.1077484242470375, -0.9848562711282527, -2.689399326563959, -3.0335347619047623, -2.408156499493177, 0.40193413145539914, -1.6832416817365408, -3.275770241002685, -1.711581083598941, -8.593990261625914, -2.613895726495726, 0.5894213627963628, -4.1613414255883, -4.902169568822554, -0.39056839400828025, -1.1586944190224961, -2.8458371693121696, -5.965314198243781, 0.35884896999552174, -2.9860176772470157, -1.458224266283902, -4.196337731481481, -4.377526637251636, -3.456996542904631, -2.670907207207208, -1.7849347724867721, -2.4178606115318595, -3.553473774509804, -5.071757175925926, -4.5623443037974685, -4.951292766726945, -3.693192888888889, 0.19863607503607505, -3.9106612499999995, -0.33551328412944126, -2.471383422030561, -1.1828758594679676, -0.8678000900516593, -4.400850245842345, -2.8993894736842107, -4.700582247425998, -2.2866257660783593, -3.064075004899079, -8.204139613756613, -1.4406189198240196, -2.888682563874251, -3.830901388888889, -2.4270612618977174, -2.3768336645677555, -3.777663224510178, -4.416638802083333, -3.8816747957671556, -5.532912180814355, -1.442666980820106, -2.3280685314685323, -2.153764432434097, -7.118019945548148, -1.257234408739409, -3.222662777777778, -0.9879046233520496, -3.923848189689099, -4.447909555555556, -4.736054016064258, -1.8145876068376066, -2.0734490523968785, -4.048296502057613, -2.48423908994709, -8.352324134353987, -1.3841917874396135, -2.2292130027117754, -3.310984552978299, -3.239433284152309, -3.0059051279931293, -2.109201792084624, -7.958169125394403, -0.9836175906183371, -3.1604056240277862, 0.24255478071975917, -7.076724135487529, -2.2552035682916123, -4.205485686949866, -3.6845792857142863, -8.654198088647401, -3.347657098765432, -2.0048259523809526, -0.20005078107959717, -1.5141693121693123, -3.7084028169014096, -6.206080328187895, -2.577107142857143, -0.6744709013524803, -0.5088680708180708, -2.912736922015183, -0.0022521727184312596, -2.515402199074074, -1.1679447267823415, -0.5069243444286098, -2.004117302259887, -2.4843233603351247, -1.4427791801263194, -4.819496968694884, -3.69014236874237, -4.467224430199431, -3.3529030252969276, -2.013758464290656, -3.0655979382869796, -3.870020375849223, -2.2876515974765974, -4.818757077625571, -1.9176113690476189, -1.1404019173198279, -1.0109308936090087, 0.45086773152266296, -2.6294583333333335, -4.252798803418804, -4.103715050500276, -4.372802799227799, -4.731292690058479, -0.26709712761864185, -0.5269060178785067, -2.6332563989809015, -2.210572055137845, -5.187311342592592, -4.723766666666667, -8.62565419488118, -2.148744341372913, -1.3820035980205618, -4.388987037037038, -2.0683855628760965, -4.519910585585586, -2.2719423687423683, -4.322993835616439, -2.031214945381416, -1.282830364873222, -4.399267874396135, -7.184352935260884, -2.52422960199005, -3.0596669354838713, -2.5798076324642176, -1.3704479636591478, -4.880842484436214, -0.9685465756465756, -7.0706488054428815, -1.9972662570404507, -2.636904078297986, -2.55111180445151, -3.91542027027027, -2.542763586827766, -3.2656756693923366, -4.4010558675387506, -7.081305083655084, -3.767815384615385, -4.177800208333333, -0.7750762503091994, -4.650313173400674, -2.2022566176470586, -4.20087116583813, -4.098742331729648, -2.498249193548387, -4.15828392506429, -2.8801475225225217, -7.6953478952166465, -4.963361236511237, -1.3685958800558167, -0.8473390993265992, -5.945230674087817, -1.518086721611722, -3.051439230769231, -2.3762817695430325, -3.33861271166453, -2.582025809433622, -4.0753007142857145, -1.1061488025528217, -4.5490310803398675, -5.480335557791191, -0.45762117346938763, 0.19972873403652622, -4.209773450130466, -2.0388239472934475, -1.742277654364911, -1.0186238598256205, -4.691753381642512, -3.329416025998836, -2.0820095354236803, -3.126045177817963, -2.4359933600272545, -8.107235461485722, -0.8883422825322825, -1.4265884559509676, -5.656579418130888, -2.4873810252100843, 0.24984952625152626, -2.5623587818187548, -3.230599703731618, -6.247916997354498, -1.920114393293549, -3.610959583333333, -1.40760639589169, -4.966849431818182, -1.1639901289301067, -2.1443068139085497, -4.219598888888889, -1.9708282552083336, -2.3144289351851857, -1.1679517195767195, -6.141792974861397, -0.12890916469893743, -3.9859553613181204, -0.49424580887155506, -2.533014562430323, -1.4684653256704983, -0.5585260273972603, -4.031420785303403, -4.21454761904762, -3.4867360730593613, -4.230269949494949, 0.01980666666666668, -3.7764620160156572, -3.342312557077626, -7.804157527619714, -2.457664133898509, -2.114948655910272, -4.55044658119658, -4.524584723328203, -2.173946932655158, -7.1894664648942515, -2.6869012367375666, -3.5397232937693253, -1.8538911056376568, -4.585964429302423, -8.011466775233647, -2.2898987033079083, -2.097763289144519, -2.9718483989445152, -2.5735690777576856, -2.5606475016767267, -2.97231277487184, -0.3448153546459424, -3.8086944444444453, -5.680321507936507, -1.956383063654755, -2.4921344285747145, -3.187502685452176, -3.0184194557823125, -3.2594658148505307, -2.959933665649052, -0.4031708155515759, -2.9663399483730037, 0.08818996817225984, 0.06200838744588746, -4.060325821596243, -6.658853418803419, -0.9026105633802818, -2.88640193236715, -2.0082794617021995, -5.108085074626866, -7.199614314286062, -4.697959589041096, -2.5095232931726907, -3.2587975422427036, -3.261558296674066, -7.181707278572596, -1.1149193294963222, -2.1373893483913506, -4.117724647887323, -3.303787433505374, -1.0924870662670663, -4.1510579812206565, -7.940501304325007, -3.584202444444445, -3.08450026844071, -7.781196629400355, -3.880079838709677, -5.782684089781746, 0.20471205044574906, -2.228173888888889, -4.422073414953102, -7.802083526189028, -2.5638424242424245, -1.0207208880985632, -2.674944246031746, 0.12595874316939892, -1.2781413598085236, -3.260283255086072, -2.179304465744631, -8.030966863994008, -2.9022885574715285, -0.7674489286144216, -2.5628618843745783, -4.215530423280424, -2.8334858649789028, -4.13730507246377, -1.0355081408880387, -3.259099470471438, -2.761774019607843, -1.7207201587301588, -2.03237552742616, -4.652015618626561, -2.0521626128578765, -6.440342764429717, -3.5449219954648536, -0.9131317694646877, -1.066962729615961, -3.0017396724914738, -2.239359986687803, -6.166559684684684, -2.370491828336273, -5.4688956445993036, -4.32775967556784, -1.131235625387376, -3.151581999559083, -3.6000504385964915, -4.427202574783241, -6.184199298222092, -8.035422194182594, -0.325135762796874, -1.0252411011904763, -5.793165119386529, -2.0058907273457907, -2.259663280736358, -2.8379844924812025, -1.6768569145466405, -1.400512105178772, -4.403490398126465, -2.845321568627451, 0.5075870801702324, -4.327450143430866, -0.8808552621320886, -4.511612698412698, -2.48337478326924, -4.221223619494746, -3.7985708333333332, -0.7823446039890769, -1.2223231481481482, -4.57556045145331, -1.9265125540859482, -4.258655436735297, -2.2333334094368342, -1.08704289819376, -4.219002083333334, -1.7679409727326514, -1.5474343642952324, -3.5157829670329668, -0.8995928380746238, -2.7939045454545455, -5.51146712962963, -0.05571620349894343, -4.432742205638474, -2.844791340361687, -2.437891736627549, -4.478515640584316, -2.9750632978866314, -4.055068859649123, -4.2367212302742265, -2.5678625788194065, -4.092496347031964, -2.9532995614035085, -3.494315990990991, -3.376467676767677, -1.2495821413318318, 0.3542999616981099, -2.4900434032800525, -2.7353026960784312, -1.5754653725318284, -1.9286133554729292, -1.874120787709023, -4.0899223127997635, -5.011194897959185, -6.198385374938922, -4.805838288288288, -4.305786912393163, -6.073661184210527, -4.124207071783938, -3.2657690316925203, -6.419740709325398, -6.803887596006143, -6.917542909017258, -1.387702879818594, -1.5361498547271846, -2.7826954802259887, -0.1318230225120401, -7.3579276781697835, -2.159750241545893, -2.3277782894736836, -2.0905679445389267, -3.2784009601225486, -3.317569070949141, -3.3699061904761907, -5.558537162162162, 0.49781366184756404, -2.6872020940906487, -2.715646270928463, -6.598017982456141, -2.840265370370371, -6.925793571428572, -4.729163145629307, -3.7384967871485957, -3.284867489711934, -0.39761009640083356, -2.3208303885239596, -0.945866890848212, -4.1495275000000005, -4.580983001658375, -2.7245038812785385, -0.08211778018278018, -0.8486944662372652, -0.2271500339213026, -2.1769819857561794, -4.378567397531792, -1.1542038934830188, -8.521793673652894, -2.4608794505085623, -0.42672405095283333, 0.24730039138943252, -3.8622567632850244, -1.0620355342921557, -3.1654043010752693, -4.53933592940509, -1.6155039730795369, -2.8519633522727275, -2.28244448051948, -7.369933185463659, -2.3423401419255843, -5.4993739348370925, -3.9222277083333323, -6.826707950832081, 0.1590791105359287, -4.700355314009663, -3.4112802816901406, -3.8689631174929016, -1.3633027209427693, -0.8374188725490195, -0.20380730593607305, -2.792527611940299, -2.4173566239316244, -1.140781694140239, -5.173309936417749, -3.227055522638534, -2.502282530345472, -4.360309389671363, -4.094830281690141, -1.0889030096107635, -2.3707098042328045, -2.8009309895833336, -2.1356055989583336, -5.100883564814815, -1.298849929249089, -3.6197956600361665, -7.893704196184132, -1.1368795224977042, -1.4926747984441022, -2.746542219986587, -6.3925296375266525, -1.253936457287102, -1.3948239962651725, -3.2236282405656733, -2.5134108108108104, -1.4544482762157755, -3.401304705882353, -1.4702121232162897, -6.167793802493803, -2.0362469333813955, -4.038618511250654, -2.768257660071226, -2.3446649859943975, -3.3484165122735066, -1.7454200980392156, -4.5854184108527125, -3.0678811440462734, -4.041649991297709, -2.795315060240964, -3.271789638447972, -0.2808577219817219, -4.4266000000000005, -2.3067197154515022, -1.4779813258636787, -4.110411788617886, -2.766806687188946, -3.1478709956709956, 0.18149220196610247, -3.776257577396784, -1.2467768081614115, -1.4746871031163047, -1.903025550482281, -3.1702051662948927, -3.337058858786036, -3.432887939809179, 0.17343030762508982, -0.896648737928738, -3.76310568551141, -5.011355637254902, -3.924784004884004, -3.355335259115005, -2.4052129251700674, -1.5007572642587015, -3.021414705882353, -3.992320129870131, -2.513683147809255, -1.5912729957805907, -2.7165311594202906, -3.5466310077519387, -3.5915529900332235, -3.904251623376624, -7.642679457775097, -3.1928770022770023, -0.9104844140421873, 0.4008514852727181, -1.714139558959869, -3.2506043770804944, -0.7354239277781083, -3.8476056962025322, 0.17933344969164372, -3.8905183098591554, -2.5625016121031745, -1.2387446590730486, -3.7183002136752132, -4.384865068493149, -4.116488682322244, -2.0036719502992355, -3.297035497835498, -3.948848190323191, -1.2080553734061934, -6.306529524886878, -3.6468003553660266, -1.0893476106908835, -3.5467236762403553, -8.377295852067444, -2.0367359389366704, -2.311926523993436, -2.9120378081120597, -1.3348810515873015, -3.110583915669442, -2.5220705261079153, -4.573019358178052]
# Check out your results here
for i in range(20):
print(my_forest.predict_point(molecule_features[i]), esol[i])
print(my_forest.calculate_out_of_bag_error(molecule_features, esol))
-0.9676373333333333 -0.77
-2.9856421507936513 -3.3
-2.198780203418839 -2.06
-8.35257210857302 -7.87
-1.0894211979378463 -1.33
-1.456886333333333 -1.5
-7.5738305025111705 -7.32
-4.63614525 -5.03
-5.665752583333334 -6.29
-4.640219404761905 -4.42
0.6658920238095238 1.07
-4.314321590278532 -4.14
-2.6388855901105206 -2.68
-2.6217448333333335 -2.64
-7.097919093658081 -7.96
-1.652831218101669 -1.41
-0.35846417350105136 -0.47
-1.1894524392246435 -1.0
-3.7998334924242427 -3.64
-3.3171821277048275 -2.94
[-1.3384589430894307, -2.6478690476190474, -2.2852258461235735, -8.55949756351105, -0.9093509391179496, -1.4496168674698795, -7.567586674291884, -4.381589711934157, -5.021543333333335, -5.120070918367347, 0.1551097883597884, -4.34041840307381, -2.637613512557186, -2.6116502252252256, -5.779163428093742, -1.6979107212368159, -0.3306997602832897, -1.2706759035742696, -4.00451505376344, -3.51602408547795, -7.591158519299196, -3.2844400966183582, -3.7110659396159393, -2.731775345373677, -4.038910052910053, -1.3569342867187324, -2.2632945175438595, -2.429368976770829, -7.148315271144605, -2.6014786370877805, -3.8645580766262793, -3.941606944444444, -4.310067352703793, -1.5892301477393447, -5.368359090762742, -0.5739512626989945, -3.3413569934451512, -3.4997768115942023, -4.079719191919192, -4.179582998013471, -4.64525614876908, -0.4768046350672871, -2.242641133004926, -1.8802272015655577, -0.2967340940901937, -1.444699194784484, -2.1685913335334126, -2.4728866666666667, -4.066379605263158, -2.1077121212121215, -4.647388915595587, -2.48218120978121, -7.339173049678865, -2.650305434782609, -7.837028017194253, -0.9444226590260391, -4.377719937438885, -4.5415698144149745, -7.02076445817453, -0.44458951436918837, -8.812965991360626, -1.2784519825347773, -4.044129354354354, -0.08247424242424241, -3.4633040983606556, -5.0899938888888885, -3.4988693576388896, -0.7947946078431373, -1.1557738095238093, -5.807991016718335, -2.661550245098039, -4.442844482151835, -2.2284512345679017, -2.5251051587301587, -1.7966941765878883, -1.6794194934812023, -6.028461737089202, -6.253471323682673, -1.7362896622299624, -4.300855223880598, -1.8698419913419917, -0.6209127192982455, 0.6276578966218966, -2.9699778620881387, -3.9031705207649012, -3.0673965117321558, -7.05382027027027, -0.333435778505209, -2.020283597883598, -3.7275427631578957, -3.1021355641896626, -2.7622829478854474, -6.946039513505986, -1.0824924943974668, -4.494267134581104, -4.353627777777778, -4.244758578431372, -7.013680832484761, -1.9001550560764846, -3.314234217004688, -4.327474603174603, -3.572582916666667, -0.7268461187214612, -1.4931015695227319, -4.497734894634966, 0.14896543905145693, -1.5002067907444665, -1.4941091666666666, -2.021161646130366, -1.8472622807017542, -2.8762158934407096, -4.758973026315789, -2.8389625807102306, -0.7088543694040716, -4.567856346153846, -1.6451573333333331, -1.7169534883720932, -4.488330593607306, -4.81209037037037, -4.88919484034767, -2.9594229042154954, -4.313971056624906, -4.920970801589563, -1.643047550170627, -5.722304180695818, -2.4043429320003864, -2.898285542168674, -7.720255691056912, 0.026176421957672, -4.298731495367412, 0.36109681372549013, -3.7189078571428564, -3.1744001424501427, -1.1460988455988455, -4.039131999885205, -3.666012731749062, -2.1871951282051283, -2.093266699282453, -3.2131066841375366, -4.255890787928477, -2.450811886646767, 0.3058398545898545, -1.9632400037913667, -2.5263622772707524, -2.9655994064619344, -2.2249606821106824, 0.6389487607920127, -2.50373206212647, 0.116079516770758, -1.474613841158841, -1.7063493975903616, -0.2948824107142857, -1.4122496418167716, -2.945781392175345, -1.8194456899640201, -1.1474472001497864, -2.200388830532213, 0.6138075854856755, -5.329560340136054, -3.006917682543094, -2.6327293248945147, -6.096760337552741, -1.3471075312446095, -2.5474538617886178, -3.835704871794872, -2.09369155442776, -5.853928689162702, -5.978138414634147, -0.08912097288676238, -5.375306965174129, -5.126122810705657, -1.4160769230769232, -4.452311919191919, -1.2713756703970793, -2.7198433760683756, -6.006736772486773, -1.7672545391806262, -2.1744265620079584, -2.6023642082674825, -2.779022573839663, -3.372678307591639, -6.381470886075949, -4.309967873910129, 0.13341125925925926, -4.880507692307693, -4.183810013860014, 0.6099353741496599, -5.2635463421434725, -4.6468559386973185, -0.1423919820655535, -4.058020612340476, -3.510281343283582, -2.4347656862745093, -2.0184930715969904, -2.405892658730159, -4.116651912568306, -3.088797441780401, -4.988234564256257, -2.8900038820748577, -0.9599534736025028, -3.91486081081081, 0.748449105627449, -1.540450967510167, -3.878829761904762, -1.5227481711525186, 0.26625184045447203, -2.0905394871794876, -2.456148367346938, -2.718289359387548, -1.7416441083974885, 0.035857296296296325, -3.7566339408579297, -4.236544207044622, -2.150121960784313, -5.421701755816482, -5.538732455114341, 0.38380958747135213, -1.8713527397260277, -4.508172698412699, -4.3096634013605435, -8.130262023292746, -2.7039236723495983, -4.309784770882139, -2.60712119047619, -1.1198018384569017, -3.9035086854460093, -3.7458756060606064, -2.3269900821118465, -2.348448870477952, -4.3004167631917625, -0.8242025917805139, -2.0479218826835273, -4.583395207570208, 0.46592602739726036, -4.510719315386215, -3.660198858447488, -1.4812658834011776, -1.4508736414363013, -0.5316071591650359, -1.0584062227007316, -0.6563444940476191, -1.4011129906188948, -0.24855062886515975, -4.128387254901959, -2.9719850964708288, -3.7148472222222217, -2.0662260281385283, -4.454309173293833, -3.989477053140097, -0.19755687229437227, -3.8557071111111108, -1.0682009454639891, -3.086090400689739, 0.06481844691336423, -4.6503584745762705, -0.9558301643823817, -4.1180448314374365, -1.3753016113756371, -3.876918468468468, -7.16951856794566, -5.638508807588075, -4.874767142857143, -4.937750968992248, -5.889896913580246, -3.0125212602212605, -4.742177631578947, -4.525866170634921, -2.3250733332904923, -4.676161929240327, -0.8882287268334851, -3.0057326576576573, -2.265952729778591, -7.218769993476845, -2.4265044479586706, -4.05372077922078, -4.2703229760868116, 0.012789473684210517, -1.0484420882992456, -2.027715369130872, -3.2880670781892998, -1.409143240886098, -2.363060370302624, -2.135027992179778, -3.296296906024102, -4.5435653951759285, -2.0548448224852076, -5.3985870485123675, -2.5341670405093777, -4.307857276995305, -6.174476722299832, -3.6404003456221203, -3.666224928129146, -3.1580875939849626, -4.346302788220551, -4.711356504065041, -1.9907508551307853, -3.737378387359309, -8.824011228575634, -1.7813565217391305, -7.753363868760063, -4.269846296296296, -1.6180732340807162, -2.4285124369747897, -4.528554059829061, -4.252945918139385, -4.607101282051282, -4.310262169369235, -2.393122747771267, -3.1995754971070745, -1.5666721713471712, -4.466164107142858, -2.1558319248826288, -4.110440336013757, -3.4882192488262906, -0.13490329771091336, -2.436436569441488, -6.909836231884058, -0.878766149068323, -2.180435964912281, -7.5874779195032565, -3.890041365461847, -8.056322854260326, -3.6944968055555556, -2.356304166666667, -6.178646751848218, -3.0071642156862746, -3.951956444444445, -0.6659314042519722, -1.9388630420876412, -3.6167627694859035, -4.95676225490196, -3.883874075756015, -0.5667030945809407, -3.869400703651092, -1.3314876582526105, -3.9855319203812476, -3.5844422705314005, -3.2801430379746837, -3.8862895582329307, -3.7802916666666673, -3.850660110893673, 0.13620404040404044, -0.8880692246416229, -3.9412307142857146, -4.0681739726027395, -2.7957196816076997, -3.1529310897435896, -4.269437500227799, -2.5112285336356766, -0.7289956859541027, 0.47943563587206456, -4.08921272823113, -1.6147832461287044, -4.323278626163109, -2.688941666666667, -3.0440392801105305, 0.14777427360834228, -3.3258786031746026, -6.40195621206903, -1.8930640179460598, -4.458820361247948, -2.6146605748605745, -3.561095504385965, -2.687282051282051, -3.4451753424657534, 0.5514313725490197, -2.549171952662722, 0.04006247617844004, -2.6926995627894104, -3.6357155405405415, -2.934089552238806, -1.8864728112728113, -2.387829678020265, -3.725705476190476, -2.6638837448559674, -4.356413615023476, -1.491270439209913, -0.7448992835873953, -2.937673372243634, -2.5688745495495495, -1.677893918748336, -1.4086223039215688, -4.652926444444444, -3.3688668231611887, -4.3962317956306345, -1.2775226930000467, 0.2957815315315315, -2.1357490476190475, -0.9746418582346371, -4.874510076473234, -5.930826858146595, -3.329239124210522, -2.403600714285714, -2.5955179257796313, -4.7707335858585855, -1.040520245242313, -3.087313853303559, -5.585292606674142, -4.473962735042735, -2.322369909856947, -3.0692787138787145, -2.9347983796296297, -3.3431509059811693, -1.504489972137962, -2.1777089548491575, -4.747864788732394, -1.7655507356532354, -3.2385113123993556, -3.5875866123925824, -0.8089446468953931, -3.075579040404041, -2.8004951169834733, -2.105921641791045, -1.721574564822642, -6.326489837398373, -0.23324170274170275, -1.8177883523362397, -7.070181245107062, -4.3063287500000005, -2.564421164021164, -4.753273437500001, -3.448043961352657, -3.8228059523809526, -4.183715244079927, -3.8577581140350876, -2.596213978138978, -2.2901858954248366, -0.9715128205128206, -3.6449006493506495, -6.115514705882354, -4.208963450834879, -3.333676020645, -7.075876261549397, -2.792147847287063, -4.5965835585585575, -2.384499853344451, -1.622193670585114, -1.7111755875376191, -2.309553424280806, -1.6299981068463492, -1.749468781884782, -0.42527769230769236, -3.8422712998712996, -2.643856188842564, -4.9458592071531875, -3.1650436303871246, -0.847485188364796, -4.240711624949125, -1.1738692640692638, -4.307001904761905, -6.859232025622062, -4.37991425120773, -4.339713394002593, -3.937341428571429, -0.9886380539202178, -4.332115954945877, -2.4813970085470087, -1.4915759924548386, -7.540257142857142, -1.7030473251028806, -0.5186554962158958, -2.5495523341660626, -3.3042572482386703, 0.1819181410883024, -4.023522105278573, -2.3650042194092826, -3.2740677570352963, -1.7526516534391534, -2.7419289682539683, -1.3037308389874682, 0.016095242970242958, -3.4081589438339446, -3.7603829315579316, -4.003733333333332, -2.022758386811178, -1.3975150235434197, -2.974576086956521, -3.4663261040092634, -3.3400573394884203, -5.5187993055555555, -3.867973971193415, -1.8246358333333332, -5.882406148282097, -0.755493927233815, -1.6483167597402626, -1.1231445286195287, -4.9085068075117375, -1.131773880975482, -4.609190096618358, -0.9081503681122606, -1.6790311655018055, -3.859037671885848, -3.3374842293906806, -3.9991689615605273, -1.710137280795292, -2.0960781690140844, -4.129714150047484, -7.990512491306081, -4.730289552238806, -4.491394742063492, -5.46938566066066, -3.1712930695049923, -3.037815104166667, -3.2932863453815266, -2.9263122023809522, 0.014880718794986734, -2.0778784971083835, -1.0506963959556426, -1.9341486879877663, -3.299134772475806, -4.041733815958815, -3.5643758454106282, -2.447511788479218, -1.1069983882783885, -2.4239596774193544, -2.0140068170959475, 0.3221634911308824, -5.05870193236715, -3.5628151111111115, -2.3936206943250737, -0.26503658601672303, -3.1449055779119157, -4.263764806907873, 0.47576278282759765, -0.3017223377865715, -1.9483486482221368, -1.778193840234329, -1.411158412158573, -2.533025210053548, -5.931784537037037, -3.578429385964913, -4.545720566893423, -3.706610118814819, -2.2047047222222225, -1.9089621693121694, -0.06922794117647058, -4.092616666666666, -2.407837745098039, -1.7229627441978597, -3.4726429587688723, 0.3644216142348576, -4.349085533453887, -0.3445416153486577, -3.500684072978304, -1.337483479569312, -2.607167826997964, -2.8410400900900896, -7.24435256902496, -3.6589451161240634, -3.551434217171717, -0.9661342281219112, -0.715435621521336, -4.593673004694836, -2.1963469320034625, -4.29658334822822, -3.247673983739837, -4.418213797173622, -5.763207373271889, -4.410693319752142, -7.463255895161894, -7.934089437154755, -4.628794761904762, -3.706604738721195, -2.3631493911719934, -3.3437676535087717, -1.6829938178181993, -0.9869862660813365, -4.980336070366042, -3.28339973574045, -3.845291319050363, -2.743034156378601, -2.505623319357619, -1.2437238753025717, -2.27106268663723, -3.6256314453799035, -1.1956171532213844, -3.6747926829268303, -1.6535417532216845, -4.737042708333334, -6.868280587749016, -1.525348668270727, -1.3677670073121482, -5.0607702991452985, -4.16258952991453, -4.415380389309764, -1.1380173941798941, -7.460696918316293, -3.4697833302025605, -2.446713596491228, -3.263186693636528, -1.4456902853274176, -6.404564322916666, -3.5467060185185186, -4.343287015503876, -3.1077484242470375, -0.9848562711282527, -2.689399326563959, -3.0335347619047623, -2.408156499493177, 0.40193413145539914, -1.6832416817365408, -3.275770241002685, -1.711581083598941, -8.593990261625914, -2.613895726495726, 0.5894213627963628, -4.1613414255883, -4.902169568822554, -0.39056839400828025, -1.1586944190224961, -2.8458371693121696, -5.965314198243781, 0.35884896999552174, -2.9860176772470157, -1.458224266283902, -4.196337731481481, -4.377526637251636, -3.456996542904631, -2.670907207207208, -1.7849347724867721, -2.4178606115318595, -3.553473774509804, -5.071757175925926, -4.5623443037974685, -4.951292766726945, -3.693192888888889, 0.19863607503607505, -3.9106612499999995, -0.33551328412944126, -2.471383422030561, -1.1828758594679676, -0.8678000900516593, -4.400850245842345, -2.8993894736842107, -4.700582247425998, -2.2866257660783593, -3.064075004899079, -8.204139613756613, -1.4406189198240196, -2.888682563874251, -3.830901388888889, -2.4270612618977174, -2.3768336645677555, -3.777663224510178, -4.416638802083333, -3.8816747957671556, -5.532912180814355, -1.442666980820106, -2.3280685314685323, -2.153764432434097, -7.118019945548148, -1.257234408739409, -3.222662777777778, -0.9879046233520496, -3.923848189689099, -4.447909555555556, -4.736054016064258, -1.8145876068376066, -2.0734490523968785, -4.048296502057613, -2.48423908994709, -8.352324134353987, -1.3841917874396135, -2.2292130027117754, -3.310984552978299, -3.239433284152309, -3.0059051279931293, -2.109201792084624, -7.958169125394403, -0.9836175906183371, -3.1604056240277862, 0.24255478071975917, -7.076724135487529, -2.2552035682916123, -4.205485686949866, -3.6845792857142863, -8.654198088647401, -3.347657098765432, -2.0048259523809526, -0.20005078107959717, -1.5141693121693123, -3.7084028169014096, -6.206080328187895, -2.577107142857143, -0.6744709013524803, -0.5088680708180708, -2.912736922015183, -0.0022521727184312596, -2.515402199074074, -1.1679447267823415, -0.5069243444286098, -2.004117302259887, -2.4843233603351247, -1.4427791801263194, -4.819496968694884, -3.69014236874237, -4.467224430199431, -3.3529030252969276, -2.013758464290656, -3.0655979382869796, -3.870020375849223, -2.2876515974765974, -4.818757077625571, -1.9176113690476189, -1.1404019173198279, -1.0109308936090087, 0.45086773152266296, -2.6294583333333335, -4.252798803418804, -4.103715050500276, -4.372802799227799, -4.731292690058479, -0.26709712761864185, -0.5269060178785067, -2.6332563989809015, -2.210572055137845, -5.187311342592592, -4.723766666666667, -8.62565419488118, -2.148744341372913, -1.3820035980205618, -4.388987037037038, -2.0683855628760965, -4.519910585585586, -2.2719423687423683, -4.322993835616439, -2.031214945381416, -1.282830364873222, -4.399267874396135, -7.184352935260884, -2.52422960199005, -3.0596669354838713, -2.5798076324642176, -1.3704479636591478, -4.880842484436214, -0.9685465756465756, -7.0706488054428815, -1.9972662570404507, -2.636904078297986, -2.55111180445151, -3.91542027027027, -2.542763586827766, -3.2656756693923366, -4.4010558675387506, -7.081305083655084, -3.767815384615385, -4.177800208333333, -0.7750762503091994, -4.650313173400674, -2.2022566176470586, -4.20087116583813, -4.098742331729648, -2.498249193548387, -4.15828392506429, -2.8801475225225217, -7.6953478952166465, -4.963361236511237, -1.3685958800558167, -0.8473390993265992, -5.945230674087817, -1.518086721611722, -3.051439230769231, -2.3762817695430325, -3.33861271166453, -2.582025809433622, -4.0753007142857145, -1.1061488025528217, -4.5490310803398675, -5.480335557791191, -0.45762117346938763, 0.19972873403652622, -4.209773450130466, -2.0388239472934475, -1.742277654364911, -1.0186238598256205, -4.691753381642512, -3.329416025998836, -2.0820095354236803, -3.126045177817963, -2.4359933600272545, -8.107235461485722, -0.8883422825322825, -1.4265884559509676, -5.656579418130888, -2.4873810252100843, 0.24984952625152626, -2.5623587818187548, -3.230599703731618, -6.247916997354498, -1.920114393293549, -3.610959583333333, -1.40760639589169, -4.966849431818182, -1.1639901289301067, -2.1443068139085497, -4.219598888888889, -1.9708282552083336, -2.3144289351851857, -1.1679517195767195, -6.141792974861397, -0.12890916469893743, -3.9859553613181204, -0.49424580887155506, -2.533014562430323, -1.4684653256704983, -0.5585260273972603, -4.031420785303403, -4.21454761904762, -3.4867360730593613, -4.230269949494949, 0.01980666666666668, -3.7764620160156572, -3.342312557077626, -7.804157527619714, -2.457664133898509, -2.114948655910272, -4.55044658119658, -4.524584723328203, -2.173946932655158, -7.1894664648942515, -2.6869012367375666, -3.5397232937693253, -1.8538911056376568, -4.585964429302423, -8.011466775233647, -2.2898987033079083, -2.097763289144519, -2.9718483989445152, -2.5735690777576856, -2.5606475016767267, -2.97231277487184, -0.3448153546459424, -3.8086944444444453, -5.680321507936507, -1.956383063654755, -2.4921344285747145, -3.187502685452176, -3.0184194557823125, -3.2594658148505307, -2.959933665649052, -0.4031708155515759, -2.9663399483730037, 0.08818996817225984, 0.06200838744588746, -4.060325821596243, -6.658853418803419, -0.9026105633802818, -2.88640193236715, -2.0082794617021995, -5.108085074626866, -7.199614314286062, -4.697959589041096, -2.5095232931726907, -3.2587975422427036, -3.261558296674066, -7.181707278572596, -1.1149193294963222, -2.1373893483913506, -4.117724647887323, -3.303787433505374, -1.0924870662670663, -4.1510579812206565, -7.940501304325007, -3.584202444444445, -3.08450026844071, -7.781196629400355, -3.880079838709677, -5.782684089781746, 0.20471205044574906, -2.228173888888889, -4.422073414953102, -7.802083526189028, -2.5638424242424245, -1.0207208880985632, -2.674944246031746, 0.12595874316939892, -1.2781413598085236, -3.260283255086072, -2.179304465744631, -8.030966863994008, -2.9022885574715285, -0.7674489286144216, -2.5628618843745783, -4.215530423280424, -2.8334858649789028, -4.13730507246377, -1.0355081408880387, -3.259099470471438, -2.761774019607843, -1.7207201587301588, -2.03237552742616, -4.652015618626561, -2.0521626128578765, -6.440342764429717, -3.5449219954648536, -0.9131317694646877, -1.066962729615961, -3.0017396724914738, -2.239359986687803, -6.166559684684684, -2.370491828336273, -5.4688956445993036, -4.32775967556784, -1.131235625387376, -3.151581999559083, -3.6000504385964915, -4.427202574783241, -6.184199298222092, -8.035422194182594, -0.325135762796874, -1.0252411011904763, -5.793165119386529, -2.0058907273457907, -2.259663280736358, -2.8379844924812025, -1.6768569145466405, -1.400512105178772, -4.403490398126465, -2.845321568627451, 0.5075870801702324, -4.327450143430866, -0.8808552621320886, -4.511612698412698, -2.48337478326924, -4.221223619494746, -3.7985708333333332, -0.7823446039890769, -1.2223231481481482, -4.57556045145331, -1.9265125540859482, -4.258655436735297, -2.2333334094368342, -1.08704289819376, -4.219002083333334, -1.7679409727326514, -1.5474343642952324, -3.5157829670329668, -0.8995928380746238, -2.7939045454545455, -5.51146712962963, -0.05571620349894343, -4.432742205638474, -2.844791340361687, -2.437891736627549, -4.478515640584316, -2.9750632978866314, -4.055068859649123, -4.2367212302742265, -2.5678625788194065, -4.092496347031964, -2.9532995614035085, -3.494315990990991, -3.376467676767677, -1.2495821413318318, 0.3542999616981099, -2.4900434032800525, -2.7353026960784312, -1.5754653725318284, -1.9286133554729292, -1.874120787709023, -4.0899223127997635, -5.011194897959185, -6.198385374938922, -4.805838288288288, -4.305786912393163, -6.073661184210527, -4.124207071783938, -3.2657690316925203, -6.419740709325398, -6.803887596006143, -6.917542909017258, -1.387702879818594, -1.5361498547271846, -2.7826954802259887, -0.1318230225120401, -7.3579276781697835, -2.159750241545893, -2.3277782894736836, -2.0905679445389267, -3.2784009601225486, -3.317569070949141, -3.3699061904761907, -5.558537162162162, 0.49781366184756404, -2.6872020940906487, -2.715646270928463, -6.598017982456141, -2.840265370370371, -6.925793571428572, -4.729163145629307, -3.7384967871485957, -3.284867489711934, -0.39761009640083356, -2.3208303885239596, -0.945866890848212, -4.1495275000000005, -4.580983001658375, -2.7245038812785385, -0.08211778018278018, -0.8486944662372652, -0.2271500339213026, -2.1769819857561794, -4.378567397531792, -1.1542038934830188, -8.521793673652894, -2.4608794505085623, -0.42672405095283333, 0.24730039138943252, -3.8622567632850244, -1.0620355342921557, -3.1654043010752693, -4.53933592940509, -1.6155039730795369, -2.8519633522727275, -2.28244448051948, -7.369933185463659, -2.3423401419255843, -5.4993739348370925, -3.9222277083333323, -6.826707950832081, 0.1590791105359287, -4.700355314009663, -3.4112802816901406, -3.8689631174929016, -1.3633027209427693, -0.8374188725490195, -0.20380730593607305, -2.792527611940299, -2.4173566239316244, -1.140781694140239, -5.173309936417749, -3.227055522638534, -2.502282530345472, -4.360309389671363, -4.094830281690141, -1.0889030096107635, -2.3707098042328045, -2.8009309895833336, -2.1356055989583336, -5.100883564814815, -1.298849929249089, -3.6197956600361665, -7.893704196184132, -1.1368795224977042, -1.4926747984441022, -2.746542219986587, -6.3925296375266525, -1.253936457287102, -1.3948239962651725, -3.2236282405656733, -2.5134108108108104, -1.4544482762157755, -3.401304705882353, -1.4702121232162897, -6.167793802493803, -2.0362469333813955, -4.038618511250654, -2.768257660071226, -2.3446649859943975, -3.3484165122735066, -1.7454200980392156, -4.5854184108527125, -3.0678811440462734, -4.041649991297709, -2.795315060240964, -3.271789638447972, -0.2808577219817219, -4.4266000000000005, -2.3067197154515022, -1.4779813258636787, -4.110411788617886, -2.766806687188946, -3.1478709956709956, 0.18149220196610247, -3.776257577396784, -1.2467768081614115, -1.4746871031163047, -1.903025550482281, -3.1702051662948927, -3.337058858786036, -3.432887939809179, 0.17343030762508982, -0.896648737928738, -3.76310568551141, -5.011355637254902, -3.924784004884004, -3.355335259115005, -2.4052129251700674, -1.5007572642587015, -3.021414705882353, -3.992320129870131, -2.513683147809255, -1.5912729957805907, -2.7165311594202906, -3.5466310077519387, -3.5915529900332235, -3.904251623376624, -7.642679457775097, -3.1928770022770023, -0.9104844140421873, 0.4008514852727181, -1.714139558959869, -3.2506043770804944, -0.7354239277781083, -3.8476056962025322, 0.17933344969164372, -3.8905183098591554, -2.5625016121031745, -1.2387446590730486, -3.7183002136752132, -4.384865068493149, -4.116488682322244, -2.0036719502992355, -3.297035497835498, -3.948848190323191, -1.2080553734061934, -6.306529524886878, -3.6468003553660266, -1.0893476106908835, -3.5467236762403553, -8.377295852067444, -2.0367359389366704, -2.311926523993436, -2.9120378081120597, -1.3348810515873015, -3.110583915669442, -2.5220705261079153, -4.573019358178052]
0.5402670085747613
def num_aromatic_atoms(molecule):
count = 0
for atom in molecule.GetAtoms():
if atom.GetIsAromatic():
count+=1
return count
def empirical_encoding(SMILES_string):
"""Given a SMILES string return an estimate of log(Solubility) based on empirical model in Delaney paper
Arguments:
SMILES_string: A string representing the SMILE string of the molecule
Returns:
esol: The estimated log(Solubility) based on Delaney Paper
"""
mol = rdkit.Chem.MolFromSmiles(SMILES_string)
esol = 0.16 - 0.63*(rdkit.Chem.Crippen.MolLogP(mol)) - 0.0062*(rdkit.Chem.Descriptors.ExactMolWt(mol)) + \
0.066*rdkit.Chem.Descriptors.NumRotatableBonds(mol) - 0.74*(num_aromatic_atoms(mol)/mol.GetNumHeavyAtoms())
return esol
smile_strings = delaney_processed["smiles"].to_list()
esol_predictions = np.array([empirical_encoding(smile_string) for smile_string in smile_strings])
print(np.mean(np.square(esol_predictions - esol)))
1.2104303956907128
esol_predictions
esol