Feature selection technique made simple with Python code
Feature selection technique with Python code
RFE (Recursive Feature Elimination )
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
# Create a logistic regression estimator
estimator = LogisticRegression()
# Select the top 5 features
k = 5
selector = RFE(estimator, n_features_to_select=k)
X_new = selector.fit_transform(X, y)
Recursive Feature Addition with Cross-Validation (RFECV):
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegression
# Create a logistic regression estimator
estimator = LogisticRegression()
# Perform RFECV with cross-validation
selector = RFECV(estimator, step=1, cv=5)
X_new = selector.fit_transform(X, y)
Stability Selection:
from sklearn.linear_model import RandomizedLasso
from sklearn.feature_selection import SelectFromModel
# Create a Randomized Lasso model
model = RandomizedLasso()
# Select features using stability selection
selector = SelectFromModel(model)
X_new = selector.fit_transform(X, y)
Recursive Feature Addition with Regularization (RFAR):
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegressionCV
# Create a logistic regression estimator with cross-validation
estimator = LogisticRegressionCV()
# Perform RFAR with cross-validation
selector = RFECV(estimator, step=1, cv=5)
X_new = selector.fit_transform(X, y)
Genetic Algorithm for Feature Selection:
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from genetic_selection import GeneticSelectionCV
# Create a pipeline with feature scaling, feature selection, and classification
pipe = make_pipeline(StandardScaler(), SelectKBest(score_func=f_classif), RandomForestClassifier())
# Perform feature selection using Genetic Algorithm
selector = GeneticSelectionCV(pipe, cv=5)
selector.fit(X, y)
领英推荐
X_new = selector.transform(X)
Feature Extraction using Autoencoders:
import tensorflow as tf
from tensorflow.keras import layers
# Build an autoencoder model for feature extraction
input_dim = X.shape[1]
encoding_dim = 10
input_layer = layers.Input(shape=(input_dim,))
encoder = layers.Dense(encoding_dim, activation='relu')(input_layer)
decoder = layers.Dense(input_dim, activation='relu')(encoder)
autoencoder = tf.keras.Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
# Fit the autoencoder to the data
autoencoder.fit(X, X, epochs=100, batch_size=32)
# Extract the encoded features
encoder = tf.keras.Model(inputs=input_layer, outputs=encoder)
X_new = encoder.predict(X)
Correlation-based Feature Selection:
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_regression
# Compute the correlation matrix
corr_matrix = pd.DataFrame(X).corr()
# Select the top 5 features based on correlation
k = 5
selector = SelectKBest(score_func=f_regression, k=k)
X_new = selector.fit_transform(X, y)
Mutual Information:
from sklearn.feature_selection import SelectKBest, mutual_info_classif
# Select features based on mutual information
selector = SelectKBest(score_func=mutual_info_classif, k=k)
X_new = selector.fit_transform(X, y)
ReliefF:
from sklearn.feature_selection import ReliefF
# Create a ReliefF model
model = ReliefF()
# Perform feature selection using ReliefF
X_new = model.fit_transform(X, y)
Kernel-based Feature Selection:
from skfeature.function.sparse_learning_based import NDFS
# Perform feature selection using NDFS (Nonnegative Discriminative Feature Selection)
X_new = NDFS.sparse_learning_based(X, y)