Custom method example

Example comparison with a custom benchmark

Comparing the feature importance generated by lightgbm and xgboost

In the example below, we implement a simple method (GBImportancePlot) which saves the feature importance plot of lightgbm and xgboost methods to a local directory named importance_output.

"""
mlgauge example to compare feature importance from different methods.

In this example, we plot the feature importance from lightgbm and xgboost for a few datasets and export the plots in a local output directory. The example is one of using mlgauge for qualitative comparisons.

"""
from mlgauge import Analysis, Method
from xgboost import XGBClassifier, plot_importance as xgbplot
from lightgbm import LGBMClassifier, plot_importance as lgbplot
import matplotlib.pyplot as plt
import pandas as pd
import os


SEED = 42


class GBImportancePlot(Method):
    def __init__(self, gbmestimator):
        super().__init__()

        self.gbmestimator = gbmestimator

        if isinstance(gbmestimator, XGBClassifier):
            self.plot_imp = xgbplot
        elif isinstance(gbmestimator, LGBMClassifier):
            self.plot_imp = lgbplot
        else:
            raise TypeError("gbmestimator must be an XGBClassifier or LGBMClassifier")

    def train(self, X_train, y_train, feature_names, category_indicator=None):
        # passing dataframes as input to xgboost/lightgbm lets the plotting function automatically add tick labels
        X_train = pd.DataFrame(X_train, columns=feature_names)
        self.gbmestimator.fit(X_train, y_train)

        fig, ax = plt.subplots()
        self.plot_imp(
            self.gbmestimator,
            ax=ax,
            xlabel="Feature importance",
            importance_type="gain",
        )
        # self.output_dir is made available through the Analysis class
        fig.savefig(
            os.path.join(self.output_dir, "importance.png"), bbox_inches="tight"
        )
        plt.close(fig)


methods = [
    ("xgb", GBImportancePlot(XGBClassifier(n_jobs=-1, verbosity=0))),
    ("lgb", GBImportancePlot(LGBMClassifier(n_jobs=-1, silent=True))),
]

an = Analysis(
    methods=methods,
    datasets="classification",
    n_datasets=3,
    random_state=SEED,
    use_test_set=False,  # our method does not implement testing
    output_dir="importance_output",
)

an.run()

Output:

The code will save the figures generated by the different models in the output directory:

importance_output
└── Analysis_1
    ├── analcatdata_aids
    │   ├── lgb
    │   │   └── importance.png
    │   └── xgb
    │       └── importance.png
    ├── labor
    │   ├── lgb
    │   │   └── importance.png
    │   └── xgb
    │       └── importance.png
    └── mfeat_morphological
        ├── lgb
        │   └── importance.png
        └── xgb
            └── importance.png