Skip to content

plots

factorize_all_categoricals(df)

Factorize all categorical columns in a dataframe.

Source code in src/nhssynth/modules/plotting/plots.py
def factorize_all_categoricals(
    df: pd.DataFrame,
) -> pd.DataFrame:
    """Factorize all categorical columns in a dataframe."""
    for col in df.columns:
        if df[col].dtype == "object":
            df[col] = pd.factorize(df[col])[0]
        elif df[col].dtype == "datetime64[ns]":
            df[col] = pd.to_numeric(df[col])
        min_val = df[col].min()
        max_val = df[col].max()
        df[col] = (df[col] - min_val) / (max_val - min_val)

    return df