安装pycaret
! pip install pycaret[full]
导入数据集
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
setup环境,自动解析数据集并做预处理
from pycaret.classification import *
exp1 = setup(diabetes, target = 'Class variable')
比较模型
compare_models()
Accuracy | AUC | Recall | Prec. | F1 | Kappa | MCC | |
---|---|---|---|---|---|---|---|
0 | 0.6481 | 0.6898 | 0.3333 | 0.4615 | 0.3871 | 0.1493 | 0.1531 |
1 | 0.7407 | 0.8079 | 0.5556 | 0.6250 | 0.5882 | 0.4000 | 0.4015 |
2 | 0.6852 | 0.7562 | 0.5000 | 0.5294 | 0.5143 | 0.2817 | 0.2819 |
3 | 0.8704 | 0.8997 | 0.8333 | 0.7895 | 0.8108 | 0.7123 | 0.7129 |
4 | 0.7593 | 0.7338 | 0.6667 | 0.6316 | 0.6486 | 0.4658 | 0.4661 |
5 | 0.7593 | 0.8316 | 0.5789 | 0.6875 | 0.6286 | 0.4524 | 0.4561 |
6 | 0.7407 | 0.8165 | 0.5263 | 0.6667 | 0.5882 | 0.4028 | 0.4088 |
7 | 0.7170 | 0.7492 | 0.5000 | 0.6000 | 0.5455 | 0.3424 | 0.3454 |
8 | 0.6604 | 0.7063 | 0.3889 | 0.5000 | 0.4375 | 0.1997 | 0.2029 |
9 | 0.6415 | 0.7556 | 0.6111 | 0.4783 | 0.5366 | 0.2513 | 0.2563 |
Mean | 0.7223 | 0.7747 | 0.5494 | 0.5969 | 0.5685 | 0.3658 | 0.3685 |
SD | 0.0653 | 0.0605 | 0.1330 | 0.0992 | 0.1114 | 0.1534 | 0.1526 |
创建模型
adaboost = create_model('ada')
fine tuning
tuned_adaboost = tune_model(adaboost)
创建ensmble模型(默认是bagging)
# creating a decision tree model
dt = create_model('dt')
# ensembling a trained dt model
dt_bagged = ensemble_model(dt)
可视化模型
# create a model
adaboost = create_model('ada')
# AUC plot
plot_model(adaboost, plot = 'auc')
# Decision Boundary
plot_model(adaboost, plot = 'boundary')
# Precision Recall Curve
plot_model(adaboost, plot = 'pr')
# Validation Curve(time comsumption is huge)
# plot_model(adaboost, plot = 'vc')
Evaluation
evaluate_model(adaboost)
构建模型
# create a model
xgboost = create_model('xgboost')
# summary plot
interpret_model(xgboost)
# correlation plot
interpret_model(xgboost, plot = 'correlation')
解释模型,默认是shap
interpret_model(xgboost, plot = 'reason', observation = 0)
在test上的performance
# create a model
rf = create_model('rf')
# predict test / hold-out dataset
rf_holdout_pred = predict_model(rf)
执行预测
predictions = predict_model(rf, data = diabetes)
模型保存到s3上
deploy_model(model = rf, model_name = 'rf_aws', platform = 'aws',authentication = {'bucket' : 'mlliuz-files'})
加载模型
loaded_model = load_model(model_name = 'rf_aws', platform = 'aws', authentication = {'bucket' : 'mlliuz-files'})
做推理
predictions = predict_model(loaded_model, data = diabetes)
本地保存
# creating model
adaboost = create_model('ada')
# saving model
save_model(adaboost, model_name = 'ada_for_deployment')