%matplotlib inline
import os
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from prettytable import PrettyTable
from IPython.display import Image
from sklearn.preprocessing import LabelEncoder
from keras.models import Model
from keras.regularizers import l2
from keras.constraints import max_norm
from keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping
from keras.layers import Input, Dense, Dropout, Flatten, Activation
from keras.layers import Conv1D, Add, MaxPooling1D, BatchNormalization
from keras.layers import Embedding, Bidirectional, CuDNNLSTM, GlobalMaxPooling1D
Using TensorFlow backend.
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from google.colab import drive
drive.mount('/content/drive')
data_path = 'drive/My Drive/Case_Study/pfam/random_split/'
print('Available data', os.listdir(data_path))
Available data ['dev', 'test', 'train']
# https://www.kaggle.com/drewbryant/starter-pfam-seed-random-split
# data is randomly splitted in three folders [train(80%), test(10%), dev(10%)]
# reading and concatinating data for each folder.
def read_data(partition):
data = []
for fn in os.listdir(os.path.join(data_path, partition)):
with open(os.path.join(data_path, partition, fn)) as f:
data.append(pd.read_csv(f, index_col=None))
return pd.concat(data)
# reading all data_partitions
df_train = read_data('train')
df_val = read_data('dev')
df_test = read_data('test')
df_train.head()
df_train.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1086741 entries, 0 to 13514
Data columns (total 5 columns):
family_id 1086741 non-null object
sequence_name 1086741 non-null object
family_accession 1086741 non-null object
aligned_sequence 1086741 non-null object
sequence 1086741 non-null object
dtypes: object(5)
memory usage: 49.7+ MB
# ex: unaligned sequence
# each character reperesents one of the 24(20 common + 4 uncommon) amino acids in the sequence.
df_train.head(1)['sequence'].values[0]
# Given data size
print('Train size: ', len(df_train))
print('Val size: ', len(df_val))
print('Test size: ', len(df_test))
Train size: 1086741
Val size: 126171
Test size: 126171
def calc_unique_cls(train, test, val):
"""
Prints # unique classes in data sets.
"""
train_unq = np.unique(train['family_accession'].values)
val_unq = np.unique(val['family_accession'].values)
test_unq = np.unique(test['family_accession'].values)
print('Number of unique classes in Train: ', len(train_unq))
print('Number of unique classes in Val: ', len(val_unq))
print('Number of unique classes in Test: ', len(test_unq))
# Unique classes in the given dataset : [df_train, df_val and df_test]
calc_unique_cls(df_train, df_test, df_val)
Number of unique classes in Train: 17929
Number of unique classes in Val: 13071
Number of unique classes in Test: 13071
# Length of sequence in train data.
df_train['seq_char_count']= df_train['sequence'].apply(lambda x: len(x))
df_val['seq_char_count']= df_val['sequence'].apply(lambda x: len(x))
df_test['seq_char_count']= df_test['sequence'].apply(lambda x: len(x))
def plot_seq_count(df, data_name):
sns.distplot(df['seq_char_count'].values)
plt.title(f'Sequence char count: {data_name}')
plt.grid(True)
plt.subplot(1, 3, 1)
plot_seq_count(df_train, 'Train')
plt.subplot(1, 3, 2)
plot_seq_count(df_val, 'Val')
plt.subplot(1, 3, 3)
plot_seq_count(df_test, 'Test')
plt.subplots_adjust(right=3.0)
plt.show()
def get_code_freq(df, data_name):
df = df.apply(lambda x: " ".join(x))
codes = []
for i in df: # concatination of all codes
codes.extend(i)
codes_dict= Counter(codes)
codes_dict.pop(' ') # removing white space
print(f'Codes: {data_name}')
print(f'Total unique codes: {len(codes_dict.keys())}')
df = pd.DataFrame({'Code': list(codes_dict.keys()), 'Freq': list(codes_dict.values())})
return df.sort_values('Freq', ascending=False).reset_index()[['Code', 'Freq']]
# train code sequence
train_code_freq = get_code_freq(df_train['sequence'], 'Train')
train_code_freq
Codes: Train
Total unique codes: 25
# val code sequence
val_code_freq = get_code_freq(df_val['sequence'], 'Val')
val_code_freq
Codes: Val
Total unique codes: 22
# test code sequence
test_code_freq = get_code_freq(df_test['sequence'], 'Test')
test_code_freq
Codes: Test
Total unique codes: 24
def plot_code_freq(df, data_name):
plt.title(f'Code frequency: {data_name}')
sns.barplot(x='Code', y='Freq', data=df)
plt.subplot(1, 3, 1)
plot_code_freq(train_code_freq, 'Train')
plt.subplot(1, 3, 2)
plot_code_freq(val_code_freq, 'Val')
plt.subplot(1, 3, 3)
plot_code_freq(test_code_freq, 'Test')
plt.subplots_adjust(right=3.0)
plt.show()
df_train.groupby('family_id').size().sort_values(ascending=False).head(20)
df_val.groupby('family_id').size().sort_values(ascending=False).head(20)
df_test.groupby('family_id').size().sort_values(ascending=False).head(20)
# Considering top 1000 classes based on most observations because of limited computational power.
classes = df_train['family_accession'].value_counts()[:1000].index.tolist()
len(classes)
# Filtering data based on considered 1000 classes.
train_sm = df_train.loc[df_train['family_accession'].isin(classes)].reset_index()
val_sm = df_val.loc[df_val['family_accession'].isin(classes)].reset_index()
test_sm = df_test.loc[df_test['family_accession'].isin(classes)].reset_index()
print('Data size after considering 1000 classes for each data split:')
print('Train size :', len(train_sm))
print('Val size :', len(val_sm))
print('Test size :', len(test_sm))
Data size after considering 1000 classes for each data split:
Train size : 439493
Val size : 54378
Test size : 54378
# No. of unique classes after reducing the data size.
calc_unique_cls(train_sm, test_sm, val_sm)
Number of unique classes in Train: 1000
Number of unique classes in Val: 1000
Number of unique classes in Test: 1000
# https://dmnfarrell.github.io/bioinformatics/mhclearning
# http://www.cryst.bbk.ac.uk/education/AminoAcid/the_twenty.html
# 1 letter code for 20 natural amino acids
codes = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
def create_dict(codes):
char_dict = {}
for index, val in enumerate(codes):
char_dict[val] = index+1
return char_dict
char_dict = create_dict(codes)
print(char_dict)
print("Dict Length:", len(char_dict))
{'A': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'K': 9, 'L': 10, 'M': 11, 'N': 12, 'P': 13, 'Q': 14, 'R': 15, 'S': 16, 'T': 17, 'V': 18, 'W': 19, 'Y': 20}
Dict Length: 20
def integer_encoding(data):
"""
- Encodes code sequence to integer values.
- 20 common amino acids are taken into consideration
and rest 4 are categorized as 0.
"""
encode_list = []
for row in data['sequence'].values:
row_encode = []
for code in row:
row_encode.append(char_dict.get(code, 0))
encode_list.append(np.array(row_encode))
return encode_list
train_encode = integer_encoding(train_sm)
val_encode = integer_encoding(val_sm)
test_encode = integer_encoding(test_sm)
from keras.preprocessing.sequence import pad_sequences
# padding sequences
max_length = 100
train_pad = pad_sequences(train_encode, maxlen=max_length, padding='post', truncating='post')
val_pad = pad_sequences(val_encode, maxlen=max_length, padding='post', truncating='post')
test_pad = pad_sequences(test_encode, maxlen=max_length, padding='post', truncating='post')
train_pad.shape, val_pad.shape, test_pad.shape
from keras.utils import to_categorical
# One hot encoding of sequences
train_ohe = to_categorical(train_pad)
val_ohe = to_categorical(val_pad)
test_ohe = to_categorical(test_pad)
train_ohe.shape, test_ohe.shape, test_ohe.shape
# del train_pad, val_pad, test_pad
# del train_encode, val_encode, test_encode
# gc.collect()
# label/integer encoding output variable: (y)
le = LabelEncoder()
y_train_le = le.fit_transform(train_sm['family_accession'])
y_val_le = le.transform(val_sm['family_accession'])
y_test_le = le.transform(test_sm['family_accession'])
y_train_le.shape, y_val_le.shape, y_test_le.shape
print('Total classes: ', len(le.classes_))
# le.classes_
Total classes: 1000
# One hot encoding of outputs
y_train = to_categorical(y_train_le)
y_val = to_categorical(y_val_le)
y_test = to_categorical(y_test_le)
y_train.shape, y_val.shape, y_test.shape
# Utility function: plot model's accuracy and loss
# https://realpython.com/python-keras-text-classification/
plt.style.use('ggplot')
def plot_history(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
x = range(1, len(acc) + 1)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(x, acc, 'b', label='Training acc')
plt.plot(x, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(x, loss, 'b', label='Training loss')
plt.plot(x, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
# Utility function: Display model score(Loss & Accuracy) across all sets.
def display_model_score(model, train, val, test, batch_size):
train_score = model.evaluate(train[0], train[1], batch_size=batch_size, verbose=1)
print('Train loss: ', train_score[0])
print('Train accuracy: ', train_score[1])
print('-'*70)
val_score = model.evaluate(val[0], val[1], batch_size=batch_size, verbose=1)
print('Val loss: ', val_score[0])
print('Val accuracy: ', val_score[1])
print('-'*70)
test_score = model.evaluate(test[0], test[1], batch_size=batch_size, verbose=1)
print('Test loss: ', test_score[0])
print('Test accuracy: ', test_score[1])
x_input = Input(shape=(100,))
emb = Embedding(21, 128, input_length=max_length)(x_input)
bi_rnn = Bidirectional(CuDNNLSTM(64, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))(emb)
x = Dropout(0.3)(bi_rnn)
# softmax classifier
x_output = Dense(1000, activation='softmax')(x)
model1 = Model(inputs=x_input, outputs=x_output)
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model1.summary()
Model: "model_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) (None, 100) 0
_________________________________________________________________
embedding_3 (Embedding) (None, 100, 128) 2688
_________________________________________________________________
bidirectional_2 (Bidirection (None, 128) 99328
_________________________________________________________________
dropout_2 (Dropout) (None, 128) 0
_________________________________________________________________
dense_2 (Dense) (None, 1000) 129000
=================================================================
Total params: 231,016
Trainable params: 231,016
Non-trainable params: 0
_________________________________________________________________
# Early Stopping
es = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
history1 = model1.fit(
train_pad, y_train,
epochs=50, batch_size=256,
validation_data=(val_pad, y_val),
callbacks=[es]
)
Train on 439493 samples, validate on 54378 samples
Epoch 1/50
439493/439493 [==============================] - 78s 178us/step - loss: 5.5381 - acc: 0.1120 - val_loss: 3.5944 - val_acc: 0.3442
Epoch 2/50
439493/439493 [==============================] - 77s 176us/step - loss: 2.8564 - acc: 0.4733 - val_loss: 2.0159 - val_acc: 0.6706
Epoch 3/50
439493/439493 [==============================] - 77s 175us/step - loss: 1.9031 - acc: 0.6607 - val_loss: 1.3857 - val_acc: 0.7869
Epoch 4/50
439493/439493 [==============================] - 77s 176us/step - loss: 1.4633 - acc: 0.7459 - val_loss: 1.0834 - val_acc: 0.8410
Epoch 5/50
439493/439493 [==============================] - 77s 176us/step - loss: 1.2147 - acc: 0.7920 - val_loss: 0.8749 - val_acc: 0.8773
Epoch 6/50
439493/439493 [==============================] - 77s 175us/step - loss: 1.0633 - acc: 0.8202 - val_loss: 0.8003 - val_acc: 0.8885
Epoch 7/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.9566 - acc: 0.8389 - val_loss: 0.6798 - val_acc: 0.9117
Epoch 8/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.8871 - acc: 0.8519 - val_loss: 0.6555 - val_acc: 0.9140
Epoch 9/50
439493/439493 [==============================] - 77s 176us/step - loss: 0.8342 - acc: 0.8612 - val_loss: 0.6059 - val_acc: 0.9209
Epoch 10/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.7958 - acc: 0.8671 - val_loss: 0.5734 - val_acc: 0.9268
Epoch 11/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.7650 - acc: 0.8735 - val_loss: 0.5454 - val_acc: 0.9326
Epoch 12/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.7413 - acc: 0.8780 - val_loss: 0.5222 - val_acc: 0.9375
Epoch 13/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.7192 - acc: 0.8821 - val_loss: 0.5129 - val_acc: 0.9375
Epoch 14/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6990 - acc: 0.8856 - val_loss: 0.5042 - val_acc: 0.9390
Epoch 15/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6847 - acc: 0.8882 - val_loss: 0.4822 - val_acc: 0.9442
Epoch 16/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6703 - acc: 0.8914 - val_loss: 0.5056 - val_acc: 0.9357
Epoch 17/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6580 - acc: 0.8935 - val_loss: 0.4658 - val_acc: 0.9478
Epoch 18/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6470 - acc: 0.8962 - val_loss: 0.4405 - val_acc: 0.9522
Epoch 19/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6367 - acc: 0.8976 - val_loss: 0.4493 - val_acc: 0.9489
Epoch 20/50
439493/439493 [==============================] - 77s 176us/step - loss: 0.6283 - acc: 0.8991 - val_loss: 0.4332 - val_acc: 0.9523
Epoch 21/50
439493/439493 [==============================] - 77s 176us/step - loss: 0.6163 - acc: 0.9015 - val_loss: 0.4241 - val_acc: 0.9548
Epoch 22/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.6093 - acc: 0.9026 - val_loss: 0.4167 - val_acc: 0.9549
Epoch 23/50
439493/439493 [==============================] - 77s 176us/step - loss: 0.6004 - acc: 0.9043 - val_loss: 0.4188 - val_acc: 0.9547
Epoch 24/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5955 - acc: 0.9054 - val_loss: 0.4193 - val_acc: 0.9547
Epoch 25/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5892 - acc: 0.9063 - val_loss: 0.4024 - val_acc: 0.9585
Epoch 26/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5794 - acc: 0.9081 - val_loss: 0.4065 - val_acc: 0.9560
Epoch 27/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5737 - acc: 0.9090 - val_loss: 0.3896 - val_acc: 0.9599
Epoch 28/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5653 - acc: 0.9106 - val_loss: 0.3972 - val_acc: 0.9584
Epoch 29/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5636 - acc: 0.9113 - val_loss: 0.3953 - val_acc: 0.9582
Epoch 30/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5598 - acc: 0.9121 - val_loss: 0.3765 - val_acc: 0.9623
Epoch 31/50
439493/439493 [==============================] - 77s 174us/step - loss: 0.5529 - acc: 0.9135 - val_loss: 0.3907 - val_acc: 0.9575
Epoch 32/50
439493/439493 [==============================] - 77s 175us/step - loss: 0.5441 - acc: 0.9150 - val_loss: 0.3771 - val_acc: 0.9606
Epoch 33/50
439493/439493 [==============================] - 77s 176us/step - loss: 0.5418 - acc: 0.9150 - val_loss: 0.3870 - val_acc: 0.9577
Epoch 00033: early stopping
# saving model weights.
model1.save_weights('drive/My Drive/Case_Study/pfam/model1.h5')
plot_history(history1)
display_model_score(model1,
[train_pad, y_train],
[val_pad, y_val],
[test_pad, y_test],
256)
439493/439493 [==============================] - 28s 65us/step
Train loss: 0.36330516427409587
Train accuracy: 0.9645910173696531
----------------------------------------------------------------------
54378/54378 [==============================] - 3s 63us/step
Val loss: 0.3869630661736021
Val accuracy: 0.9577034830108782
----------------------------------------------------------------------
54378/54378 [==============================] - 3s 64us/step
Test loss: 0.3869193921893196
Test accuracy: 0.9587149214887501
def residual_block(data, filters, d_rate):
"""
_data: input
_filters: convolution filters
_d_rate: dilation rate
"""
shortcut = data
bn1 = BatchNormalization()(data)
act1 = Activation('relu')(bn1)
conv1 = Conv1D(filters, 1, dilation_rate=d_rate, padding='same', kernel_regularizer=l2(0.001))(act1)
#bottleneck convolution
bn2 = BatchNormalization()(conv1)
act2 = Activation('relu')(bn2)
conv2 = Conv1D(filters, 3, padding='same', kernel_regularizer=l2(0.001))(act2)
#skip connection
x = Add()([conv2, shortcut])
return x
# model
x_input = Input(shape=(100, 21))
#initial conv
conv = Conv1D(128, 1, padding='same')(x_input)
# per-residue representation
res1 = residual_block(conv, 128, 2)
res2 = residual_block(res1, 128, 3)
x = MaxPooling1D(3)(res2)
x = Dropout(0.5)(x)
# softmax classifier
x = Flatten()(x)
x_output = Dense(1000, activation='softmax', kernel_regularizer=l2(0.0001))(x)
model2 = Model(inputs=x_input, outputs=x_output)
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model2.summary()
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) (None, 100, 21) 0
__________________________________________________________________________________________________
conv1d_6 (Conv1D) (None, 100, 128) 2816 input_5[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 100, 128) 512 conv1d_6[0][0]
__________________________________________________________________________________________________
activation_5 (Activation) (None, 100, 128) 0 batch_normalization_5[0][0]
__________________________________________________________________________________________________
conv1d_7 (Conv1D) (None, 100, 128) 16512 activation_5[0][0]
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 100, 128) 512 conv1d_7[0][0]
__________________________________________________________________________________________________
activation_6 (Activation) (None, 100, 128) 0 batch_normalization_6[0][0]
__________________________________________________________________________________________________
conv1d_8 (Conv1D) (None, 100, 128) 49280 activation_6[0][0]
__________________________________________________________________________________________________
add_3 (Add) (None, 100, 128) 0 conv1d_8[0][0]
conv1d_6[0][0]
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 100, 128) 512 add_3[0][0]
__________________________________________________________________________________________________
activation_7 (Activation) (None, 100, 128) 0 batch_normalization_7[0][0]
__________________________________________________________________________________________________
conv1d_9 (Conv1D) (None, 100, 128) 16512 activation_7[0][0]
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 100, 128) 512 conv1d_9[0][0]
__________________________________________________________________________________________________
activation_8 (Activation) (None, 100, 128) 0 batch_normalization_8[0][0]
__________________________________________________________________________________________________
conv1d_10 (Conv1D) (None, 100, 128) 49280 activation_8[0][0]
__________________________________________________________________________________________________
add_4 (Add) (None, 100, 128) 0 conv1d_10[0][0]
add_3[0][0]
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D) (None, 33, 128) 0 add_4[0][0]
__________________________________________________________________________________________________
dropout_3 (Dropout) (None, 33, 128) 0 max_pooling1d_1[0][0]
__________________________________________________________________________________________________
flatten_1 (Flatten) (None, 4224) 0 dropout_3[0][0]
__________________________________________________________________________________________________
dense_3 (Dense) (None, 1000) 4225000 flatten_1[0][0]
==================================================================================================
Total params: 4,361,448
Trainable params: 4,360,424
Non-trainable params: 1,024
__________________________________________________________________________________________________
# Early Stopping
es = EarlyStopping(monitor='val_loss', patience=3, verbose=1)
history2 = model2.fit(
train_ohe, y_train,
epochs=10, batch_size=256,
validation_data=(val_ohe, y_val),
callbacks=[es]
)
Train on 439493 samples, validate on 54378 samples
Epoch 1/10
439493/439493 [==============================] - 120s 272us/step - loss: 0.9157 - acc: 0.9294 - val_loss: 0.4761 - val_acc: 0.9838
Epoch 2/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.4438 - acc: 0.9788 - val_loss: 0.4545 - val_acc: 0.9831
Epoch 3/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.4331 - acc: 0.9814 - val_loss: 0.4443 - val_acc: 0.9848
Epoch 4/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.4198 - acc: 0.9825 - val_loss: 0.4279 - val_acc: 0.9863
Epoch 5/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.4098 - acc: 0.9830 - val_loss: 0.4314 - val_acc: 0.9859
Epoch 6/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.4033 - acc: 0.9834 - val_loss: 0.4181 - val_acc: 0.9867
Epoch 7/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.3943 - acc: 0.9840 - val_loss: 0.4180 - val_acc: 0.9862
Epoch 8/10
439493/439493 [==============================] - 114s 259us/step - loss: 0.3906 - acc: 0.9842 - val_loss: 0.4086 - val_acc: 0.9858
Epoch 9/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.3829 - acc: 0.9845 - val_loss: 0.4015 - val_acc: 0.9866
Epoch 10/10
439493/439493 [==============================] - 114s 260us/step - loss: 0.3841 - acc: 0.9844 - val_loss: 0.3962 - val_acc: 0.9887
# saving model weights.
model2.save_weights('drive/My Drive/Case_Study/pfam/model2.h5')
plot_history(history2)
display_model_score(
model2,
[train_ohe, y_train],
[val_ohe, y_val],
[test_ohe, y_test],
256)
439493/439493 [==============================] - 38s 85us/step
Train loss: 0.3558084576734698
Train accuracy: 0.9969123512774948
----------------------------------------------------------------------
54378/54378 [==============================] - 5s 85us/step
Val loss: 0.39615299251274316
Val accuracy: 0.9886718893955224
----------------------------------------------------------------------
54378/54378 [==============================] - 5s 85us/step
Test loss: 0.3949931418234982
Test accuracy: 0.9882489242257847
x = PrettyTable()
x.field_names = ['Sr.no', 'Model', 'Train Acc', 'Val Acc','Test Acc']
x.add_row(['1.', 'Bidirectional LSTM', '0.964', '0.957', '0.958'])
x.add_row(['2.', 'ProtCNN', '0.996', '0.988', '0.988'])
print(x)
+-------+--------------------+-----------+---------+----------+
| Sr.no | Model | Train Acc | Val Acc | Test Acc |
+-------+--------------------+-----------+---------+----------+
| 1. | Bidirectional LSTM | 0.964 | 0.957 | 0.958 |
| 2. | ProtCNN | 0.996 | 0.988 | 0.988 |
+-------+--------------------+-----------+---------+----------+