import Pkg; Pkg.add(["CSV","CategoricalArrays",
"Chain", "DataFrames", "GLM", "Plots", "Random", "StatsPlots",
"Statistics","Interact", "Blink"])
using CSV
using CategoricalArrays
using Chain
using DataFrames
using GLM
using Plots
using Random
using StatsPlots
using Statistics
ENV["LINES"] = 20 # to limit nuber of rows.
ENV["COLUMNS"] = 20 # to limit number of columns
ENV["LINES"] = 20
ENV["COLUMNS"] = 1000
df_raw = CSV.read("/work/Data/Heart Disease Dataset.csv", DataFrame)
size(df_raw)
describe(df_raw)
df = select(df_raw,:age,:sex => categorical => :sex,
Between(:cp, :chol),
:fbs => categorical => :fbs,:restecg,:thalach,
:exang => categorical => :exang,
Between(:oldpeak,:thal),
:target => categorical => :target
)
@chain df_raw begin
dropmissing
groupby(:target)
combine([:age, :sex, :chol, :restecg, :slope] .=> mean)
end
@chain df_raw begin
groupby(:target)
combine(names(df, Real) .=> mean)
end
@chain df_raw begin
groupby([:target, :sex])
combine(nrow)
end
@chain df_raw begin
groupby([:target, :sex])
combine(nrow)
unstack(:target, :sex, :nrow)
end
gd = groupby(df_raw, :target)
gd[1]
@df df_raw corrplot(cols(1:6), grid = false)
@df df_raw violin(string.(:target), :chol, linewidth=0,label = "voilin")
@df df_raw boxplot!(string.(:target), :chol, fillalpha=0.75, linewidth=2,label = "boxplot")
@df df_raw dotplot!(string.(:target), :chol, marker=(:black, stroke(0)),label = "dotplot")
@df df_raw groupedhist(:chol, group = :target, bar_position = :dodge)
@df df_raw andrewsplot(:target, cols(1:4), legend = :topleft)
@df df density(:chol, group=:target)
probit = glm(@formula(target ~ trestbps + age + chol + thalach + oldpeak + slope + ca),
df_raw, Binomial(), ProbitLink())
test_pred = predict(probit, test, interval=:confidence)
test.predict = test_pred.prediction;
prob_pred = predict(probit, df_pred, interval=:confidence)
plot(df_pred.cp, Matrix(prob_pred),labels=["Predicted" "Lower" "Upper"],
xlabel="cp", ylabel="Pr(lfp=1)")