Coverage for src / features / build_features.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-26 08:30 +0000

1import pandas as pd 

2from sklearn.compose import ColumnTransformer 

3from sklearn.preprocessing import OneHotEncoder, StandardScaler 

4 

5 

6def clean_raw_data(df: pd.DataFrame) -> pd.DataFrame: 

7 """ 

8 Realiza a limpeza inicial dos dados. 

9 Converte TotalCharges para numérico (preenchendo os casos 'tenure=0' com 0.0). 

10 """ 

11 df = df.copy() 

12 

13 # Tratamento do TotalCharges (Conversão de String Vazia para Float) 

14 df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce") 

15 df["TotalCharges"] = df["TotalCharges"].fillna(0.0) 

16 

17 return df 

18 

19 

20def get_preprocessor(feature_columns: list) -> ColumnTransformer: 

21 """ 

22 Retorna a instância configurada do ColumnTransformer para aplicar StandardScaler 

23 e OneHotEncoder. 

24 """ 

25 numeric_features = ["tenure", "MonthlyCharges", "TotalCharges"] 

26 categorical_features = [ 

27 col for col in feature_columns if col not in numeric_features 

28 ] 

29 

30 preprocessor = ColumnTransformer( 

31 transformers=[ 

32 ("num", StandardScaler(), numeric_features), 

33 ( 

34 "cat", 

35 OneHotEncoder(handle_unknown="ignore", drop="first"), 

36 categorical_features, 

37 ), 

38 ] 

39 ) 

40 

41 return preprocessor