from matplotlib import pyplot as plt
from sklearn.decomposition import PCA
import orchest
import seaborn as sns
import pandas as pd
import numpy as np
Matplotlib is building the font cache; this may take a moment.
/opt/conda/lib/python3.7/site-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.6) or chardet (3.0.4) doesn't match a supported version!
RequestsDependencyWarning)
data = orchest.get_inputs()
bcell, covid, sars, bcell_sars = data["data"]
bcell_sars.head()
bcell_sars.target.value_counts()
idx_train = bcell_sars["target"].astype("bool").values
fig, axes = plt.subplots(2, 2, figsize=(16, 8))
axes = [x for a in axes for x in a]
for i, name in enumerate(["chou_fasman", "emini", "kolaskar_tongaonkar", "parker"]):
value = bcell_sars[name]
sns.distplot(value[~idx_train], ax=axes[i])
sns.distplot(value[idx_train], ax=axes[i])
axes[i].set_xlabel(name, fontsize=12)
fig.legend(labels=["target 0", "target 1"], loc="right", fontsize=12)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
clf = PCA(n_components=2)
z = clf.fit_transform(
bcell_sars[["chou_fasman", "emini", "kolaskar_tongaonkar", "parker"]]
)
plt.figure(figsize=(8, 6))
plt.scatter(*z[idx_train].T, s=3)
plt.scatter(*z[~idx_train].T, s=3)
plt.legend(labels=["target_1", "target_0"], fontsize=12)
plt.show()
idx_train = bcell_sars["target"].astype("bool").values
fig, axes = plt.subplots(2, 2, figsize=(16, 8))
axes = [x for a in axes for x in a]
for i, name in enumerate(
["isoelectric_point", "aromaticity", "hydrophobicity", "stability"]
):
value = bcell_sars[name]
sns.distplot(value[~idx_train], ax=axes[i])
sns.distplot(value[idx_train], ax=axes[i])
axes[i].set_xlabel(name, fontsize=12)
fig.legend(labels=["target 0", "target 1"], loc="right", fontsize=12)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
/opt/conda/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
clf = PCA(n_components=2)
z = clf.fit_transform(
bcell_sars[["isoelectric_point", "aromaticity", "hydrophobicity", "stability"]]
)
plt.figure(figsize=(8, 6))
plt.scatter(*z[idx_train].T, s=3)
plt.scatter(*z[~idx_train].T, s=3)
plt.legend(labels=["target_1", "target_0"], fontsize=12)
plt.show()
# create length columns
for df in [bcell, sars, covid, bcell_sars]:
df["length"] = df["end_position"] - df["start_position"] + 1
fig, ax = plt.subplots(figsize=(12, 6))
sns.countplot(bcell_sars["length"], ax=ax, color="lightblue")
sns.countplot(bcell_sars.query("target == 1")["length"], ax=ax, color="coral")
plt.legend(labels=["target 0", "target 1"], fontsize=12)
plt.show()
/opt/conda/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/opt/conda/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
# Corelation Matrix
corr_matrix = bcell_sars[
[
"parent_protein_id",
"protein_seq",
"start_position",
"end_position",
"peptide_seq",
"chou_fasman",
"emini",
"kolaskar_tongaonkar",
"parker",
"isoelectric_point",
"aromaticity",
"hydrophobicity",
"stability",
"target",
]
].corr()
mask = np.zeros_like(corr_matrix, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
#corr heatmap
sns.set_style('whitegrid')
f, ax = plt.subplots(figsize=(11, 15))
heatmap = sns.heatmap(corr_matrix,
mask = mask,
square = True,
linewidths = .5,
cmap = 'coolwarm',
cbar_kws = {'shrink': .4,
'ticks' : [-1, -.5, 0, 0.5, 1]},
vmin = -1,
vmax = 1,
annot = False,
annot_kws = {'size': 12})
#add the column names as labels
ax.set_yticklabels(corr_matrix.columns, rotation = 0)
ax.set_xticklabels(corr_matrix.columns)
sns.set_style({'xtick.bottom': True}, {'ytick.left': True})
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:21: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations