Source code for auto_prep.preprocessing.variance_filtering

import pandas as pd

from ..utils.abstract import Numerical, RequiredStep
from ..utils.logging_config import setup_logger

logger = setup_logger(__name__)


[docs] class VarianceFilter(RequiredStep, Numerical): """ Transformer to remove numerical columns with zero variance. Attributes: dropped_columns (list): List of dropped columns. """ def __init__(self): """ Initializes the transformer with empty list of dropped columns. """ self.dropped_columns = []
[docs] def fit(self, X: pd.DataFrame, y: pd.Series = None) -> "VarianceFilter": """ Identifies columns with zero variances and adds to dropped_columns list. Args: X (pd.DataFrame): The input feature data. Returns: VarianceAndUniqueFilter: The fitted transformer instance. """ logger.start_operation("Fitting VarianceFilter") try: zero_variance = X.var() == 0 self.dropped_columns = X.columns[zero_variance].tolist() logger.debug( f"Successfully fitted VarianceFilter, columns with 0 variance: {self.dropped_columns}" ) except Exception as e: logger.error(f"Failed to fit VarianceFilter : {e}", exc_info=True) raise ValueError(f"Failed to fit VarianceFilter: {e}") finally: logger.end_operation() return self
[docs] def transform(self, X: pd.DataFrame, y: pd.Series = None) -> pd.DataFrame: """ Drops the identified columns with zero variance based on the fit method. Args: X (pd.DataFrame): The feature data. Returns: pd.DataFrame: The transformed data without dropped columns. """ logger.start_operation( f"Transforming data by dropping {len(self.dropped_columns)} zero variance columns." ) try: X_transformed = X.drop(columns=self.dropped_columns, errors="ignore") logger.debug( f"Successfully dropped zero variance columns : {self.dropped_columns}" ) except Exception as e: logger.error(f"Failed to transform VarianceFilter : {e}", exc_info=True) raise ValueError(f"Failed to transform VarianceFilter : {e}") finally: logger.end_operation() return X_transformed
[docs] def fit_transform(self, X: pd.DataFrame, y: pd.Series = None) -> pd.DataFrame: """ Fits and transforms the data in one step. Args: X (pd.DataFrame): The feature data. Returns: pd.DataFrame: The transformed data without dropped columns. """ logger.start_operation("Fitting and transforming data with zero variance") try: X_transformed = self.fit(X).transform(X) logger.debug( f"Successfully fit_transformed zero variance columns : {self.dropped_columns}" ) except Exception as e: logger.error(f"Failed to fit_transform VarianceFilter : {e}", exc_info=True) raise ValueError(f"Failed to fit_transform VarianceFilter : {e}") finally: logger.end_operation() return X_transformed
[docs] def to_tex(self) -> dict: """ Returns a description of the transformer in dictionary format. """ return { "desc": f"Removes columns with zero variance. Dropped columns: {self.dropped_columns}", }