import pandas as pd
data1=pd.read_csv('/home/jovyan/work/fertility.csv')
data1.columns
data1.head(10)
from sklearn.preprocessing import OrdinalEncoder
ord_enc = OrdinalEncoder()
data1['Smoking habit']=ord_enc.fit_transform(data1[['Smoking habit']])
data1['Smoking habit'].head(10)
data1['Diagnosis']=ord_enc.fit_transform(data1[['Diagnosis']])
data1['Frequency of alcohol consumption']=ord_enc.fit_transform(data1[['Frequency of alcohol consumption']])
data1['Season']=ord_enc.fit_transform(data1[['Season']])
data1['Childish diseases']=ord_enc.fit_transform(data1[['Childish diseases']])
data1['Accident or serious trauma']=ord_enc.fit_transform(data1[['Accident or serious trauma']])
data1.isnull().sum()
data1['High fevers in the last year']=ord_enc.fit_transform(data1[['High fevers in the last year']])
data1['Surgical intervention']=ord_enc.fit_transform(data1[['Surgical intervention']])
x=data1[['Season','Age','Childish diseases','Accident or serious trauma','Frequency of alcohol consumption','Smoking habit','Smoking habit','Number of hours spent sitting per day']]
y=data1['Diagnosis']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=42 )
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_pred,y_test)