Skip to content

excel_writer

Excel writing functionality for the devices_rap pipeline.

create_excel_reports(output_workbooks, output_directory, use_multiprocessing)

Create the Excel reports based on the processed data. The function will create an Excel file for each region containing the processed worksheets for that region.

Parameters:

Name Type Description Default
output_workbooks dict

The processed data for each region

required
output_directory Path

The path to save the Excel reports to

required
use_multiprocessing bool

Whether to use multiprocessing for writing the Excel files.

required

Returns:

Type Description
None
Source code in devices_rap/data_io/output/excel_writer.py
def create_excel_reports(
    output_workbooks: dict[str, dict[str, pd.DataFrame]],
    output_directory: Path,
    use_multiprocessing: bool,
):
    """
    Create the Excel reports based on the processed data. The function will create an Excel file for
    each region containing the processed worksheets for that region.

    Parameters
    ----------
    output_workbooks : dict
        The processed data for each region
    output_directory : Path
        The path to save the Excel reports to
    use_multiprocessing : bool
        Whether to use multiprocessing for writing the Excel files.

    Returns
    -------
    None
    """
    logger.info("Creating excel reports")

    if use_multiprocessing:
        with ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(
                    process_region,
                    output_directory=output_directory,
                    region=region,
                    worksheets=worksheets,
                    use_multiprocessing=use_multiprocessing,
                )
                for region, worksheets in output_workbooks.items()
            ]
            for future in tqdm.tqdm(as_completed(futures), total=len(futures), position=0):
                future.result()
    else:
        for region, worksheets in output_workbooks.items():
            process_region(
                output_directory=output_directory,
                region=region,
                worksheets=worksheets,
                use_multiprocessing=use_multiprocessing,
            )

    logger.success("Excel reports created successfully.")

process_region(output_directory, region, worksheets, use_multiprocessing)

Process the data for a region and create the Excel report.

Parameters:

Name Type Description Default
output_directory Path

The path to save the Excel reports to

required
region str

The region to process

required
worksheets dict

The processed data for the region

required
use_multiprocessing bool

Whether to use multiprocessing for writing the Excel file

required

Returns:

Type Description
None
Source code in devices_rap/data_io/output/excel_writer.py
def process_region(
    output_directory: Path,
    region: str,
    worksheets: dict[str, pd.DataFrame],
    use_multiprocessing: bool,
):
    """
    Process the data for a region and create the Excel report.

    Parameters
    ----------
    output_directory : Path
        The path to save the Excel reports to
    region : str
        The region to process
    worksheets : dict
        The processed data for the region
    use_multiprocessing : bool
        Whether to use multiprocessing for writing the Excel file

    Returns
    -------
    None
    """
    output_file = output_directory / f"{region.upper().replace(' ', '_')}_RAG_STATUS_REPORT.xlsx"

    if output_file.exists():
        logger.warning(f"Overwriting the existing Excel file: {output_file}")
        output_file.unlink()
    else:
        logger.info(f"Creating Excel report for {region}")

    create_excel_file(
        output_file=output_file, worksheets=worksheets, use_multiprocessing=use_multiprocessing
    )

    logger.success(f"Excel report for {region} created successfully.")

create_excel_file(output_file, worksheets, use_multiprocessing)

Create an Excel file with the given worksheets.

Parameters:

Name Type Description Default
output_file Path

The path to save the Excel file to

required
worksheets dict

The worksheets to include in the Excel file

required
use_multiprocessing bool

Whether to use multiprocessing for writing the Excel file

required

Returns:

Type Description
None
Source code in devices_rap/data_io/output/excel_writer.py
def create_excel_file(output_file: Path, worksheets: dict[str, pd.DataFrame], use_multiprocessing):
    """
    Create an Excel file with the given worksheets.

    Parameters
    ----------
    output_file : Path
        The path to save the Excel file to
    worksheets : dict
        The worksheets to include in the Excel file
    use_multiprocessing : bool
        Whether to use multiprocessing for writing the Excel file

    Returns
    -------
    None
    """
    logger.debug(f"Creating the Excel file: {output_file}")
    with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
        workbook = writer.book
        formats = create_formats(workbook)

        if use_multiprocessing:
            with ThreadPoolExecutor() as executor:
                futures = [
                    executor.submit(
                        write_worksheet,
                        writer=writer,
                        sheet_name=sheet_name,
                        data=data,
                        formats=formats,
                        output_file=output_file,
                    )
                    for sheet_name, data in worksheets.items()
                ]
                for future in tqdm.tqdm(as_completed(futures), total=len(futures), position=1):
                    future.result()
        else:
            for sheet_name, data in worksheets.items():
                write_worksheet(
                    writer=writer,
                    sheet_name=sheet_name,
                    data=data,
                    formats=formats,
                    output_file=output_file,
                )

create_formats(workbook)

Create the formats for the Excel file. This include the formats for the: - Header rows - Total rows

Parameters:

Name Type Description Default
workbook object

The workbook object

required

Returns:

Type Description
FORMATS_DICT(dict[str, object])

A dictionary containing the formats for the Excel file

Source code in devices_rap/data_io/output/excel_writer.py
def create_formats(workbook) -> FormatsDict:
    """
    Create the formats for the Excel file. This include the formats for the:
    - Header rows
    - Total rows

    Parameters
    ----------
    workbook : object
        The workbook object

    Returns
    -------
    FORMATS_DICT (dict[str, object])
        A dictionary containing the formats for the Excel file
    """
    format_config = {
        "default": {},
        "cost": {
            "num_format": "£#,##0",
        },
        "header": {
            "bold": True,
            "text_wrap": True,
            "valign": "top",
            "fg_color": "#D9E1F2",
            "border": 1,
        },
        "total": {
            "bold": True,
            "fg_color": "#C6EFCE",
            "num_format": "£#,##0",
        },
    }

    formats = {}

    for name, config in format_config.items():
        if config:
            formats[name] = workbook.add_format(config)
        else:
            formats[name] = workbook.add_format()

    return formats

apply_excel_formatting(writer, data, formats, sheet_name)

Apply formatting to the Excel worksheet. This includes: - Header formatting - Total row formatting - Column formatting for floats - Autofilter for the header row - Autofit for the columns

Parameters:

Name Type Description Default
writer ExcelWriter

The Excel writer object

required
data DataFrame

The data to write to the worksheet

required
formats dict

The formats to apply to the worksheet

required
sheet_name str

The name of the worksheet

required
Source code in devices_rap/data_io/output/excel_writer.py
def apply_excel_formatting(writer, data: pd.DataFrame, formats: FormatsDict, sheet_name: str):
    """
    Apply formatting to the Excel worksheet. This includes:
    - Header formatting
    - Total row formatting
    - Column formatting for floats
    - Autofilter for the header row
    - Autofit for the columns

    Parameters
    ----------
    writer : pd.ExcelWriter
        The Excel writer object
    data : pd.DataFrame
        The data to write to the worksheet
    formats : dict
        The formats to apply to the worksheet
    sheet_name : str
        The name of the worksheet
    """
    # Write the column headers with the defined format.
    header_format = formats["header"]

    worksheet = writer.sheets[sheet_name]
    for col_num, value in enumerate(data.columns.values):
        worksheet.write(0, col_num, value, header_format)

    worksheet.autofilter(0, 0, data.shape[0], data.shape[1] - 1)

    # Apply formatting for rows with "Total" in the provider code column
    for row_num, (provider_code, region) in enumerate(
        zip(data["Provider Code"], data["Region"], strict=False), start=1
    ):  # Start from row 1 (after header)
        if "Total" in str(provider_code) or "Total" in str(region):
            worksheet.set_row(row_num, None, formats["total"])

    # Apply formatting for columns with floats to be formatted as Accounting "£#,##0"
    for col_num, dtype in enumerate(data.dtypes):
        if dtype in ["float64", "float32"]:
            worksheet.set_column(col_num, col_num, None, formats["cost"])

    worksheet.autofit()

write_worksheet(writer, sheet_name, data, formats, output_file)

Write a worksheet to the Excel file with conditional formatting.

Parameters:

Name Type Description Default
writer ExcelWriter

The Excel writer object

required
sheet_name str

The name of the worksheet

required
data DataFrame

The data to write to the worksheet

required
formats dict

The formats to apply to the worksheet

required
output_file Path

The path to the Excel file that is been written to (only used for logging context)

required

Returns:

Type Description
None
Source code in devices_rap/data_io/output/excel_writer.py
def write_worksheet(
    writer: pd.ExcelWriter,
    sheet_name: str,
    data: pd.DataFrame,
    formats: FormatsDict,
    output_file: Path,
):
    """
    Write a worksheet to the Excel file with conditional formatting.

    Parameters
    ----------
    writer : pd.ExcelWriter
        The Excel writer object
    sheet_name : str
        The name of the worksheet
    data : pd.DataFrame
        The data to write to the worksheet
    formats : dict
        The formats to apply to the worksheet
    output_file : Path
        The path to the Excel file that is been written to (only used for logging context)

    Returns
    -------
    None
    """
    logger.debug(
        f"Writing data with {data.shape[0]} rows and {data.shape[1]} columns to "
        f"the {sheet_name} worksheet to the Excel file, {output_file}."
    )

    # Format datetime columns to %d-%m-%Y
    for col in data.select_dtypes(include=["datetime"]).columns:
        data[col] = data[col].dt.strftime("%d-%m-%Y")

    data.to_excel(
        excel_writer=writer,
        sheet_name=sheet_name,
        index=False,
        startrow=1,
        header=False,
    )

    apply_excel_formatting(
        writer=writer,
        data=data,
        formats=formats,
        sheet_name=sheet_name,
    )

    logger.debug(f"Worksheet {sheet_name} written to the Excel file: {output_file}")

create_excel_zip_reports(output_directory, fin_month, fin_year)

Create a zip file containing all the Excel reports for each region. The zip file will be saved in the output directory with a name that includes the financial year and month.

This function will gather all the Excel files created for each region and compress them into a single zip file.

Parameters:

Name Type Description Default
output_directory Path

The output directory where the Excel files are saved and where the zip file will be created

required
fin_month str

The financial month for which the data is being processed

required
fin_year str

The financial year for which the data is being processed

required

Returns:

Type Description
None
Source code in devices_rap/data_io/output/excel_writer.py
def create_excel_zip_reports(
    output_directory: Path,
    fin_month: str,
    fin_year: str,
):
    """
    Create a zip file containing all the Excel reports for each region. The zip file will be saved in
    the output directory with a name that includes the financial year and month.

    This function will gather all the Excel files created for each region and compress them into a
    single zip file.

    Parameters
    ----------
    output_directory : Path
        The output directory where the Excel files are saved and where the zip file will be created
    fin_month : str
        The financial month for which the data is being processed
    fin_year : str
        The financial year for which the data is being processed

    Returns
    -------
    None
    """
    logger.info("Creating zip file for Excel reports")
    output_file = output_directory / f"{fin_year}_M{fin_month}_AMBER_REPORTS_ALL_REGIONS.zip"
    if output_file.exists():
        logger.warning(f"Overwriting the existing zip file: {output_file}")
        output_file.unlink()
    else:
        logger.info(f"Creating zip file for all regions for {fin_month} {fin_year}")

    excel_files = tqdm.tqdm(list(output_directory.glob("*_RAG_STATUS_REPORT.xlsx")))
    with zipfile.ZipFile(output_file, "w", zipfile.ZIP_DEFLATED) as zip_file:
        for excel_file in excel_files:
            zip_file.write(excel_file, arcname=excel_file.name)

    logger.success(
        f"Zip file for all regions for {fin_month} {fin_year} created successfully at {output_file}."
    )