Coverage for src / features / build_features.py: 100%
13 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:30 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-26 08:30 +0000
1import pandas as pd
2from sklearn.compose import ColumnTransformer
3from sklearn.preprocessing import OneHotEncoder, StandardScaler
6def clean_raw_data(df: pd.DataFrame) -> pd.DataFrame:
7 """
8 Realiza a limpeza inicial dos dados.
9 Converte TotalCharges para numérico (preenchendo os casos 'tenure=0' com 0.0).
10 """
11 df = df.copy()
13 # Tratamento do TotalCharges (Conversão de String Vazia para Float)
14 df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
15 df["TotalCharges"] = df["TotalCharges"].fillna(0.0)
17 return df
20def get_preprocessor(feature_columns: list) -> ColumnTransformer:
21 """
22 Retorna a instância configurada do ColumnTransformer para aplicar StandardScaler
23 e OneHotEncoder.
24 """
25 numeric_features = ["tenure", "MonthlyCharges", "TotalCharges"]
26 categorical_features = [
27 col for col in feature_columns if col not in numeric_features
28 ]
30 preprocessor = ColumnTransformer(
31 transformers=[
32 ("num", StandardScaler(), numeric_features),
33 (
34 "cat",
35 OneHotEncoder(handle_unknown="ignore", drop="first"),
36 categorical_features,
37 ),
38 ]
39 )
41 return preprocessor