安装pycaret

! pip install pycaret[full]

导入数据集

from pycaret.datasets import get_data
diabetes = get_data('diabetes')

setup环境,自动解析数据集并做预处理

from pycaret.classification import *
exp1 = setup(diabetes, target = 'Class variable')

比较模型

compare_models()
AccuracyAUCRecallPrec.F1KappaMCC
00.64810.68980.33330.46150.38710.14930.1531
10.74070.80790.55560.62500.58820.40000.4015
20.68520.75620.50000.52940.51430.28170.2819
30.87040.89970.83330.78950.81080.71230.7129
40.75930.73380.66670.63160.64860.46580.4661
50.75930.83160.57890.68750.62860.45240.4561
60.74070.81650.52630.66670.58820.40280.4088
70.71700.74920.50000.60000.54550.34240.3454
80.66040.70630.38890.50000.43750.19970.2029
90.64150.75560.61110.47830.53660.25130.2563
Mean0.72230.77470.54940.59690.56850.36580.3685
SD0.06530.06050.13300.09920.11140.15340.1526

创建模型

adaboost = create_model('ada')

fine tuning

tuned_adaboost = tune_model(adaboost)

创建ensmble模型(默认是bagging)

# creating a decision tree model
dt = create_model('dt')
# ensembling a trained dt model
dt_bagged = ensemble_model(dt)

可视化模型

# create a model
adaboost = create_model('ada')
# AUC plot
plot_model(adaboost, plot = 'auc')
# Decision Boundary
plot_model(adaboost, plot = 'boundary')
# Precision Recall Curve
plot_model(adaboost, plot = 'pr')
# Validation Curve(time comsumption is huge)
# plot_model(adaboost, plot = 'vc')

Evaluation

evaluate_model(adaboost)

构建模型

# create a model
xgboost = create_model('xgboost')
# summary plot
interpret_model(xgboost)
# correlation plot
interpret_model(xgboost, plot = 'correlation')

解释模型,默认是shap

interpret_model(xgboost, plot = 'reason', observation = 0)

在test上的performance

# create a model
rf = create_model('rf')
# predict test / hold-out dataset
rf_holdout_pred = predict_model(rf)

执行预测

predictions = predict_model(rf, data = diabetes)

模型保存到s3上

deploy_model(model = rf, model_name = 'rf_aws', platform = 'aws',authentication =  {'bucket'  : 'mlliuz-files'})

加载模型

loaded_model = load_model(model_name = 'rf_aws', platform = 'aws', authentication = {'bucket'  : 'mlliuz-files'})

做推理

predictions = predict_model(loaded_model, data = diabetes)

本地保存

# creating model
adaboost = create_model('ada')
# saving model
save_model(adaboost, model_name = 'ada_for_deployment')
最后修改:2021 年 09 月 03 日 12 : 14 AM
如果觉得我的文章对你有用,请随意赞赏