from abc import ABC, abstractmethod
import pandas as pd
from ..utils.abstract import NonRequiredStep, Numerical
from ..utils.logging_config import setup_logger
logger = setup_logger(__name__)
[docs]
class DimentionReducer(NonRequiredStep, Numerical, ABC):
"""
Abstract class for dimensionality reduction techniques.
"""
def __init__(self):
super().__init__()
self.reducer = None
[docs]
@abstractmethod
def fit(self, X: pd.DataFrame, y: pd.Series = None) -> "DimentionReducer":
pass
[docs]
@abstractmethod
def to_tex(self) -> dict:
pass
[docs]
class FeatureImportanceSelector(NonRequiredStep):
"""
Transformer to select k% (rounded to whole number) of features
that are most important according to Random Forest model.
Attributes:
k (float): The percentage of top features to keep based on their importance.
selected_columns (list): List of selected columns based on feature importance.
"""
def __init__(self, k: float = 10.0):
"""
Initializes the transformer with a specified model and percentage of top important features to keep.
Args:
k (float): The percentage of features to retain based on their importance.
"""
if not (0 <= k <= 100):
raise ValueError("k must be between 0 and 100.")
self.k = k
self.selected_columns = []
[docs]
def fit(self, X: pd.DataFrame, y: pd.Series = None) -> "FeatureImportanceSelector":
"""
Identifies the top k% (rounded to whole value) of features most important according to the model.
Args:
X (pd.DataFrame): The input feature data.
y (pd.Series): The target variable.
Returns:
FeatureImportanceSelector: The fitted transformer instance.
"""
pass