Skip to content

io

check_input_paths(fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata, dir_experiment)

Sets up the input and output paths for the model files.

Parameters:

Name Type Description Default
fn_dataset str

The base name of the dataset.

required
fn_typed str

The name of the typed real dataset file.

required
fn_synthetic_datasets str

The filename of the collection of synethtic datasets.

required
fn_sdv_metadata str

The name of the SDV metadata file.

required
dir_experiment Path

The path to the experiment directory.

required

Returns:

Type Description
tuple[str, str]

The paths to the data, metadata and metatransformer files.

Source code in src/nhssynth/modules/evaluation/io.py
def check_input_paths(
    fn_dataset: str, fn_typed: str, fn_synthetic_datasets: str, fn_sdv_metadata: str, dir_experiment: Path
) -> tuple[str, str]:
    """
    Sets up the input and output paths for the model files.

    Args:
        fn_dataset: The base name of the dataset.
        fn_typed: The name of the typed real dataset file.
        fn_synthetic_datasets: The filename of the collection of synethtic datasets.
        fn_sdv_metadata: The name of the SDV metadata file.
        dir_experiment: The path to the experiment directory.

    Returns:
        The paths to the data, metadata and metatransformer files.
    """
    fn_dataset = Path(fn_dataset).stem
    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.consistent_endings(
        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata]
    )
    fn_typed, fn_synthetic_datasets, fn_sdv_metadata = io.potential_suffixes(
        [fn_typed, fn_synthetic_datasets, fn_sdv_metadata], fn_dataset
    )
    io.warn_if_path_supplied([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)
    io.check_exists([fn_typed, fn_synthetic_datasets, fn_sdv_metadata], dir_experiment)
    return fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata

load_required_data(args, dir_experiment)

Loads the data from args or from disk when the dataloader has not be run previously.

Parameters:

Name Type Description Default
args Namespace

The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.

required
dir_experiment Path

The path to the experiment directory.

required

Returns:

Type Description
tuple[str, DataFrame, DataFrame, dict[str, dict[str, Any]]]

The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.

Source code in src/nhssynth/modules/evaluation/io.py
def load_required_data(
    args: argparse.Namespace, dir_experiment: Path
) -> tuple[str, pd.DataFrame, pd.DataFrame, dict[str, dict[str, Any]]]:
    """
    Loads the data from `args` or from disk when the dataloader has not be run previously.

    Args:
        args: The arguments passed to the module, in this case potentially carrying the outputs of the dataloader module.
        dir_experiment: The path to the experiment directory.

    Returns:
        The dataset name, the real data, the bundle of synthetic data from the modelling stage, and the SDV metadata.
    """
    if all(x in args.module_handover for x in ["dataset", "typed", "synthetic_datasets", "sdv_metadata"]):
        return (
            args.module_handover["dataset"],
            args.module_handover["typed"],
            args.module_handover["synthetic_datasets"],
            args.module_handover["sdv_metadata"],
        )
    else:
        fn_dataset, fn_typed, fn_synthetic_datasets, fn_sdv_metadata = check_input_paths(
            args.dataset, args.typed, args.synthetic_datasets, args.sdv_metadata, dir_experiment
        )
        with open(dir_experiment / fn_typed, "rb") as f:
            real_data = pickle.load(f).contents
        with open(dir_experiment / fn_sdv_metadata, "rb") as f:
            sdv_metadata = pickle.load(f)
        with open(dir_experiment / fn_synthetic_datasets, "rb") as f:
            synthetic_datasets = pickle.load(f).contents

        return fn_dataset, real_data, synthetic_datasets, sdv_metadata

output_eval(evaluations, fn_dataset, fn_evaluations, dir_experiment)

Sets up the input and output paths for the model files.

Parameters:

Name Type Description Default
evaluations DataFrame

The evaluations to output.

required
fn_dataset Path

The base name of the dataset.

required
fn_evaluations str

The filename of the collection of evaluations.

required
dir_experiment Path

The path to the experiment output directory.

required

Returns:

Type Description
None

The path to output the model.

Source code in src/nhssynth/modules/evaluation/io.py
def output_eval(
    evaluations: pd.DataFrame,
    fn_dataset: Path,
    fn_evaluations: str,
    dir_experiment: Path,
) -> None:
    """
    Sets up the input and output paths for the model files.

    Args:
        evaluations: The evaluations to output.
        fn_dataset: The base name of the dataset.
        fn_evaluations: The filename of the collection of evaluations.
        dir_experiment: The path to the experiment output directory.

    Returns:
        The path to output the model.
    """
    fn_evaluations = io.consistent_ending(fn_evaluations)
    fn_evaluations = io.potential_suffix(fn_evaluations, fn_dataset)
    io.warn_if_path_supplied([fn_evaluations], dir_experiment)
    with open(dir_experiment / fn_evaluations, "wb") as f:
        pickle.dump(Evaluations(evaluations), f)