Prediction
I used the manually inserted file to train my model, and everytime 'new_articles.csv' is stored it will be predicted using the saved model I trained with LogisticRegression which had an accuracy of 83%
import os
import pandas as pd
import numpy as np
import pickle
import joblib
from sklearn.feature_extraction.text import CountVectorizer
import sklearn
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import *
from sklearn.feature_extraction.text import CountVectorizer
import pickle
import joblib
import os
df = pd.read_csv('/datasets/webscrape/new_articles.csv')
result = df
df.head()
df["Text"] = df['Title'] + ': ' + df["Description"]
df
df = df.drop(columns=['Date', 'Link', 'Category', 'Title', 'Description'])
df = df.rename(columns={'Text':'X'})
df
# load the vectorizer
loaded_vectorizer = pickle.load(open('/datasets/webscrape/vectorizer.pickle', 'rb'))
# load the model
loaded_model = pickle.load(open('/datasets/webscrape/saved_model.pkl', 'rb'))
# make a prediction
pred = loaded_model.predict(loaded_vectorizer.transform(df['X']))
pred_proba = loaded_model.predict_proba(loaded_vectorizer.transform(df['X']))
df['y'] = pred
df['prob'] = pred_proba[:,1]
df['Link'] = result['Link']
send = df.loc[df['y'] == 1]
send = send.nlargest(n=10, columns=['prob'])
send
send.to_csv('/datasets/webscrape/send.csv')