Skip to content

2_Plots

compute_tsne(data)

Compute t-SNE projection with caching.

Source code in src/nhssynth/modules/dashboard/pages/2_Plots.py
@st.cache_data(show_spinner=False)
def compute_tsne(data: np.ndarray) -> np.ndarray:
    """Compute t-SNE projection with caching."""
    reducer = TSNE(n_components=2, init="pca")
    return reducer.fit_transform(data)

compute_umap(data)

Compute UMAP projection with caching.

Source code in src/nhssynth/modules/dashboard/pages/2_Plots.py
@st.cache_data(show_spinner=False)
def compute_umap(data: np.ndarray) -> np.ndarray:
    """Compute UMAP projection with caching."""
    reducer = umap.UMAP()
    return reducer.fit_transform(data)

prepare_for_dimensionality(df)

Factorize all categorical columns in a dataframe and normalize values.

Source code in src/nhssynth/modules/dashboard/pages/2_Plots.py
def prepare_for_dimensionality(df: pd.DataFrame) -> pd.DataFrame:
    """Factorize all categorical columns in a dataframe and normalize values."""
    df = df.copy()
    for col in df.columns:
        if df[col].dtype == "object":
            df[col] = pd.factorize(df[col])[0]
        elif df[col].dtype == "datetime64[ns]":
            df[col] = pd.to_numeric(df[col])
        min_val = df[col].min()
        max_val = df[col].max()
        # Avoid division by zero when all values are the same
        if max_val != min_val:
            df[col] = (df[col] - min_val) / (max_val - min_val)
        else:
            df[col] = 0.0
    # Drop rows with NaN values (required for UMAP/t-SNE)
    df = df.dropna()
    return df