"""summary.py: summary of the reduced data."""
import csv
import json
import logging
import os
import pandas
__author__ = "Yingrui Shang"
__copyright__ = "Copyright 2021, NSD, ORNL"
__all__ = ["generate_report"]
# separate logging in file and console
logging.basicConfig(filename="file.log", filemode="w", level=logging.INFO)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
logging.getLogger("").addHandler(console)
suffix = 0
def format_sheet_name(filename: str) -> str:
"""Reformat the file name to a valid sheetname in excel worksheet."""
global suffix
invalid = ["[", "]", ":", "*", "?", "/", "\\"]
new_filename = filename
for cc in invalid:
new_filename = new_filename.replace(cc, "_")
if len(new_filename) > 20:
suffix += 1
new_filename = new_filename[:20] + str(suffix)
return new_filename
def get_filenames_from_samples(sample_name: str) -> list[str]:
"""Get a list of reduced file names based on a sample name."""
if sample_name:
return [
"UN_" + sample_name + "_det_1.txt",
"UN_" + sample_name + "_det_1_lb.txt",
"UN_" + sample_name + "_det_1_lbs.txt",
"UN_" + sample_name + "_det_1_unscaled.txt",
]
else:
raise ValueError(f"Sample name is empty or not valid: {sample_name}")
[docs]
def generate_report(config_file_path: str, data_dir: str | None = None, output_dir: str | None = None):
"""Generate report from a reduction config file.
Parameters
----------
config_file_path : str
Path to the configuration file (CSV or JSON).
data_dir : str | None
Directory where the reduced data are stored. If None, use the `reduced` dir in the config file directory.
output_dir : str | None
Where to save the report. If None, use `data_folder/reduced`.
"""
# Validate inputs
if not os.path.exists(config_file_path):
raise FileNotFoundError(f"The file path: {config_file_path} does not exist")
_, ext = os.path.splitext(config_file_path)
if ext.lower() not in [".csv", ".json"]:
raise ValueError(f"Unsupported configuration file format: {ext}")
# Set up directories
if not data_dir:
data_dir = os.path.dirname(config_file_path)
if not output_dir:
output_dir = os.path.join(data_dir, "reduced")
# Make sure the output directory exists
os.makedirs(output_dir, exist_ok=True)
xlsx_writer = pandas.ExcelWriter(os.path.join(output_dir, "summary.xlsx"), engine="xlsxwriter")
# Create a workbook and add chartsheets for different data types
workbook = xlsx_writer.book
# nbFormat = workbook.add_format({"bold": False})
# Create a chart sheet for unscaled data
chartsheet_unscaled = workbook.add_chartsheet("Unscaled")
main_chart_unscaled = workbook.add_chart({"type": "scatter", "subtype": "smooth_with_markers"})
# Create a chart sheet for original data
chartsheet_orig = workbook.add_chartsheet("Original")
main_chart_orig = workbook.add_chart({"type": "scatter", "subtype": "smooth_with_markers"})
# log binned data
chartsheet_log_binned = workbook.add_chartsheet("Log Binned")
main_chart_log_binned = workbook.add_chart({"type": "scatter", "subtype": "smooth_with_markers"})
# log binned data with background removed
chartsheet_subtracted = workbook.add_chartsheet("BG Subtracted")
main_chart_subtracted = workbook.add_chart({"type": "scatter", "subtype": "smooth_with_markers"})
main_chart_unscaled.set_x_axis({"name": "Q (1/A)", "log_base": 10})
main_chart_unscaled.set_y_axis({"name": "I (1/cn)", "log_base": 10})
main_chart_unscaled.set_title({"name": "Unscaled Data"})
main_chart_orig.set_x_axis({"name": "Q (1/A)", "log_base": 10})
main_chart_orig.set_y_axis({"name": "I (1/cn)", "log_base": 10})
main_chart_orig.set_title({"name": "Original Data"})
main_chart_log_binned.set_x_axis({"name": "Q (1/A)", "log_base": 10})
main_chart_log_binned.set_y_axis({"name": "I (1/cn)", "log_base": 10})
main_chart_log_binned.set_title({"name": "Log Binned"})
main_chart_subtracted.set_x_axis({"name": "Q (1/A)", "log_base": 10})
main_chart_subtracted.set_y_axis({"name": "I (1/cn)", "log_base": 10})
main_chart_subtracted.set_title({"name": "Background Subtracted"})
# Collect sample files from the config file
if ext.lower() == ".json":
with open(config_file_path) as json_file:
data = json.load(json_file)
sample_files = []
background = data.get("background", {})
background_name = background.get("name")
if background_name:
sample_files.extend(get_filenames_from_samples(background_name))
for sample in data.get("samples", []):
sample_name = sample.get("name", "")
sample_files.extend(get_filenames_from_samples(sample_name))
else:
sample_files = []
with open(config_file_path, newline="") as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
for row in filter(lambda r: len(r) > 1 and not r[0].startswith("#"), csv_reader):
sample_files.extend(get_filenames_from_samples(row[1]))
# Process each sample file and add data to the corresponding charts
for file in sample_files:
fp = os.path.join(output_dir, file)
if not os.path.exists(fp):
logging.info(f"Sample {file} file path does not exist!")
continue
if os.stat(fp).st_size == 0:
logging.warning(f"Sample file {file} is empty and will be skipped. ")
continue
logging.info(f"Reading sample file {file} to summary.xlsx")
df = pandas.read_csv(
fp,
sep=",",
# delim_whitespace=True,
# index_col = 0
names=["Q(1/A)", "I(1/cm)", "dI(1/cm)"],
index_col=False,
)
# drop all nonpositive values for log-log ploting
# df = df.assign(F = (df["Q(1/A)"] > 0) & (df["I(1/cm)"] > 0) )
# Append new columns with non zero values
df = pandas.concat(
[
df,
df[(df["Q(1/A)"] > 0) & (df["I(1/cm)"] > 0) & (df["dI(1/cm)"] > 0)],
],
ignore_index=False,
axis=1,
)
# df.reset_index(drop=True, inplace=True)
cnames = [
"Q(1/A)",
"I(1/cm)",
"dI(1/cm)",
"Q(1/A)_positive",
"I(1/cm)_positive",
"dI(1/cm)_positive",
]
df.columns = cnames
wn = format_sheet_name(file)
df.to_excel(xlsx_writer, sheet_name=wn, index=False)
worksheet = xlsx_writer.sheets[wn]
# worksheet.set_column('A:A', 12, nbFormat)
chart = workbook.add_chart({"type": "scatter", "subtype": "smooth_with_markers"})
chart.add_series(
{
"name": f"{wn}",
"categories": f"={wn}!$D$2:$D$100",
"values": f"={wn}!$E$2:$E$100",
}
)
# Add data series to the main chartsheets
if file.endswith("lbs.txt"):
main_chart_subtracted.add_series(
{
"name": f"{wn}",
"categories": f"={wn}!$D$2:$D$100",
"values": f"={wn}!$E$2:$E$100",
}
)
elif file.endswith("lb.txt"):
main_chart_log_binned.add_series(
{
"name": f"{wn}",
"categories": f"={wn}!$D$2:$D$100",
"values": f"={wn}!$E$2:$E$100",
}
)
elif file.endswith("unscaled.txt"):
main_chart_unscaled.add_series(
{
"name": f"{wn}",
"categories": f"={wn}!$D$2:$D$100",
"values": f"={wn}!$E$2:$E$100",
}
)
else:
main_chart_orig.add_series(
{
"name": f"{wn}",
"categories": f"={wn}!$D$2:$D$100",
"values": f"={wn}!$E$2:$E$100",
}
)
chart.set_x_axis({"name": f"={wn}!$A$1", "log_base": 10})
chart.set_y_axis({"name": f"={wn}!$B$1", "log_base": 10})
# print(f'={file}!$A:$B')
worksheet.insert_chart("F1", chart)
if main_chart_unscaled.series:
chartsheet_unscaled.set_chart(main_chart_unscaled)
if main_chart_orig.series:
chartsheet_orig.set_chart(main_chart_orig)
if main_chart_log_binned.series:
chartsheet_log_binned.set_chart(main_chart_log_binned)
if main_chart_subtracted.series:
chartsheet_subtracted.set_chart(main_chart_subtracted)
chartsheet_subtracted.activate()
# workbook.close()
xlsx_writer.close()
logging.info(f"complete processing {config_file_path}")
return
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Generate a report from a config file.")
parser.add_argument("config_file", help="Path to the configuration file")
parser.add_argument("-d", "--data-folder", help="Folder where the reduced data are stored.", default=None)
parser.add_argument(
"-o",
"--output",
help="Where to save the report. If not provided, a 'reduced' folder will be created in the config file folder.",
default=None,
)
args = parser.parse_args()
generate_report(config_file_path=args.config_file, data_dir=args.data_folder, output_dir=args.output)