<a href="https://colab.research.google.com/github/yongchanzzz/enzymology/blob/main/Plot_Bargraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Plot bar graph
This notebook dras bar graphs with individual data points

In [None]:
#@title ### Cell 1: Upload a CSV File
from google.colab import files
import io
import pandas as pd
uploaded = files.upload()  # Upload your CSV file
filename = list(uploaded.keys())[0]
data = pd.read_csv(io.BytesIO(uploaded[filename]))

In [None]:
#@title ### Cell 2: User Parameters
# @markdown **User Inputs**
series_column   = "series"    #@param {type:"string", description:"Name of the primary grouping column"}
category_column = "category"  #@param {type:"string", description:"Secondary grouping column. Leave empty for single bars."}
value_column    = "value"     #@param {type:"string", description:"Name of the observed value column"}

# Data Preprocessing
import numpy as np


In [None]:
#@title Cell 3: Prepare Summary (no fitting)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os, time
import sys, platform

# ensure timestamp in Asia/Tokyo
os.environ['TZ'] = 'Asia/Tokyo'
time.tzset()
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# --- mean±SEM helper ---
def _sem(a):
    a = np.asarray(a, dtype=float)
    a = a[~np.isnan(a)]
    if a.size <= 1:
        return np.nan
    return a.std(ddof=1) / np.sqrt(a.size)

# --- Required columns check ---
if series_column not in data.columns or value_column not in data.columns:
    raise ValueError(f"Expected columns '{series_column}' and '{value_column}' in data.")

# --- Detect grouped vs single (same rule as Cell 4) ---
_grouped = bool(isinstance(category_column, str) and category_column.strip() and (category_column in data.columns))

# --- Preserve plot order(s) and share across cells ---
series_order = list(pd.unique(data[series_column]))
if _grouped:
    category_order = list(pd.unique(data[category_column]))

# --- Build summary dataframe(s) ---
if _grouped:
    # per (series, category)
    gsummary = (
        data.groupby([series_column, category_column], as_index=False)[value_column]
            .agg(mean='mean', sem=_sem, n='count')
    )
    # Reindex to enforce desired ordering grid
    idx = pd.MultiIndex.from_product([series_order, category_order],
                                     names=[series_column, category_column])
    gsummary = (
        gsummary.set_index([series_column, category_column])
                .reindex(idx)
                .reset_index()
    )
    # For compatibility with any downstream use of 'summary_df'
    summary_df = gsummary.copy()
else:
    # per series (original behavior)
    summary_df = (
        data.groupby(series_column)[value_column]
            .agg(mean='mean', sem=_sem, n='count')
            .reset_index()
    )
    # reorder to match series_order
    summary_df = (
        summary_df.set_index(series_column)
                  .loc[series_order]
                  .reset_index()
    )

# --- Build output lines (keeps your style/metadata sections) ---
lines = []
title = "Bar Plot Summary (mean ± SEM by series × category)" if _grouped \
        else "Bar Plot Summary (mean ± SEM by series)"
lines.append(title)
lines.append(f"Date and time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
lines.append(f"Input file: {filename}")
lines.append("")
lines.append("Calculated statistics:")

if _grouped:
    # Print in series-major order, then category order
    for s in series_order:
        for c in category_order:
            row = summary_df[(summary_df[series_column]==s) &
                             (summary_df[category_column]==c)]
            if row.empty or pd.isna(row['n'].iloc[0]) or int(row['n'].iloc[0]) == 0:
                continue
            m  = row['mean'].iloc[0]
            se = row['sem'].iloc[0]
            n  = int(row['n'].iloc[0])
            lines.append(f"{str(s):15} / {str(c):12}  mean = {m:.5g},  SEM = {se:.5g},  N = {n}")
else:
    for _, row in summary_df.iterrows():
        s = row[series_column]
        m = row['mean']
        se = row['sem']
        n = int(row['n'])
        lines.append(f"{str(s):15}  mean = {m:.5g},  SEM = {se:.5g},  N = {n}")

# --- Session Info (kept) ---
lines.append("")
lines.append("Session Info:")
lines.append(f"Python version {platform.python_version()} ({sys.version.split()[0]})")
import numpy, pandas, matplotlib
lines.append(f"NumPy version: {numpy.__version__}")
lines.append(f"pandas  version: {pandas.__version__}")
lines.append(f"Matplotlib version: {matplotlib.__version__}")
lines.append(f"Platform: {platform.platform()}")
from datetime import datetime as _dt
now = _dt.now().astimezone()
lines.append(f"Time zone: {now.tzinfo} (UTC{now.utcoffset()})")

# Write summary to file (keeps your filename pattern)
summary_file = f"Barplot_{timestamp}_summary.txt"
with open(summary_file, 'w') as f:
    for L in lines:
        f.write(L + "\n")

# Print to stdout
for L in lines:
    print(L)

print(f"\nSummary written to: {summary_file}")


In [None]:
#@title ### Cell 4: Plot SVG — supports grouped (series × category) or single (series only)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# @markdown **Output Plot Size (cm)**
height_cm = 7.2   #@param {type:"number", description:"Figure height in cm"}
width_cm  = 8     #@param {type:"number", description:"Figure width in cm (manual)"}

# @markdown **Auto width by series (overrides width_cm if set True)**
auto_width_by_series = True   #@param {type:"boolean", description:"Figure width = per_series_cm * N"}
per_series_cm        = 1.7    #@param {type:"number", description:"Width (cm) allocated per series (plot area scale)"}

# @markdown **Gap between series (relative to one bar)**
series_gap_bar_ratio = 0.8    #@param {type:"number", description:"Gap between series = ratio × one bar width"}

# @markdown **Font and Text Options**
axis_tick_fontsize   = 8  #@param {type:"number", description:"Font size for axis tick labels"}
axis_title_fontsize  = 9  #@param {type:"number", description:"Font size for axis titles"}

# @markdown **Styling Options**
point_size     = 5       #@param {type:"number", description:"Data point size"}
point_color    = "#808080" #@param {type:"string", description:"Data point color"}
axis_thickness = 1       #@param {type:"number", description:"Axis line/tick thickness"}
show_minor_ticks = False #@param {type:"boolean", description:"Turn minor ticks on/off"}

# @markdown **Series/value and bar/point options**
bar_edgecolor   = "black"    #@param {type:"string", description:"Bar edge color"}
bar_linewidth   = 1          #@param {type:"number"}
errorbar_linewidth   = 1     #@param {type:"number"}
errorbar_capsize     = 3     #@param {type:"number"}
point_alpha          = 1.0   #@param {type:"number"}
jitter               = 0.2   #@param {type:"number", description:"Horizontal jitter for individual points"}

# @markdown **Bar color controls**
bar_fill_colors  = "#D3D3D3"        #@param {type:"string", description:"one color or comma-separated list cycling across series"}
bar_group_colors = "#D3D3D3, white" #@param {type:"string", description:"one color or comma-separated list cycling across categories"}

# Optional legend (used only when grouped)
show_legend = False           #@param {type:"boolean"}

# @markdown **Tick label rotation**
xtick_rotation = 90  #@param {type:"number", description:"X tick label rotation (deg). e.g., 0, 45, 90"}

# @markdown **Axis baseline**
xaxis_at = "0"  #@param [null, 0.0, -0.5, 1.0] {allow-input: true}
# Use `null`/None to keep default; set a number (e.g., 0.0) to place X-axis at that Y value.

# Prepare plot data
plot = data.copy()

# Decide grouped vs single
_grouped = bool(isinstance(category_column, str) and category_column.strip())
if _grouped and (category_column not in plot.columns):
    _grouped = False

# Orders
if 'series_order' not in globals():
    series_order = list(pd.unique(plot[series_column]))
N = len(series_order)

if _grouped:
    category_order = list(pd.unique(plot[category_column]))
    K = len(category_order)
else:
    category_order = ["__single__"]  # dummy
    K = 1

# --- Color helpers (simple + explicit) ---
def _parse_color_list(s):
    if not isinstance(s, str):
        return []
    return [x.strip() for x in s.split(",") if x.strip()]

_series_colors  = _parse_color_list(bar_fill_colors)   # used when not grouped
_category_colors = _parse_color_list(bar_group_colors) # used when grouped

def _series_color(i):
    # Single/series-only mode: cycle across series by index
    if _series_colors:
        return _series_colors[i % len(_series_colors)]
    return None  # fall back to Matplotlib default if empty

def _category_color(j):
    # Grouped mode: cycle across categories by index
    if _category_colors:
        return _category_colors[j % len(_category_colors)]
    return None  # fall back to Matplotlib default if empty

# --- Summary dataframe ---
import math
def _sem_series(s):
    s = s.dropna()
    n = len(s)
    if n <= 1: return float("nan")
    return s.std(ddof=1) / math.sqrt(n)

if _grouped:
    gsummary = (
        plot.groupby([series_column, category_column], as_index=False)[value_column]
            .agg(mean="mean", sem=_sem_series, n="count")
    )
    idx = pd.MultiIndex.from_product([series_order, category_order],
                                     names=[series_column, category_column])
    gsummary = gsummary.set_index([series_column, category_column]).reindex(idx).reset_index()
else:
    gsummary = (
        plot.groupby([series_column], as_index=False)[value_column]
            .agg(mean="mean", sem=_sem_series, n="count")
    )
    gsummary[category_column] = category_order[0]

# --- Bar widths, series centers with extra margin ---
r = max(0.0, float(series_gap_bar_ratio))
bar_w = 1.0 / (K + r) if K > 0 else 0.5
block_w = K * bar_w
left_edge = -0.5 * block_w
cat_offsets = np.array([left_edge + (j + 0.5) * bar_w for j in range(K)], dtype=float)

# Series centers (add gap on both sides)
x_centers = np.arange(N) * (block_w + r * bar_w)

# --- Figure size ---
width_cm_eff = (per_series_cm * N) if auto_width_by_series else width_cm
fig, ax = plt.subplots(figsize=(width_cm_eff/2.54, height_cm/2.54), constrained_layout=True)
fig.patch.set_facecolor('none'); ax.set_facecolor('none')

# --- Draw bars + error bars ---
for i, s in enumerate(series_order):
    for j, c in enumerate(category_order):
        if _grouped:
            row = gsummary[(gsummary[series_column]==s) & (gsummary[category_column]==c)]
        else:
            row = gsummary[gsummary[series_column]==s]
        if row.empty: continue
        m  = float(row["mean"].iloc[0]) if pd.notna(row["mean"].iloc[0]) else np.nan
        se = float(row["sem"].iloc[0])  if pd.notna(row["sem"].iloc[0])  else 0.0

        color = _category_color(j) if _grouped else _series_color(i)
        # Fall back to a neutral default if user lists are empty
        if color in (None, "None", "null", ""):
            color = "#D3D3D3"

        ax.bar(x_centers[i] + cat_offsets[j], m,
               width=bar_w, color=color,
               edgecolor=bar_edgecolor, linewidth=bar_linewidth,
               zorder=2, label=(c if (_grouped and i==0) else None))
        ax.errorbar(x_centers[i] + cat_offsets[j], m, yerr=se,
                    fmt='none', elinewidth=errorbar_linewidth,
                    capsize=errorbar_capsize,
                    ecolor=bar_edgecolor if bar_edgecolor not in (None,"None","null") else 'black',
                    zorder=3)

# --- Individual points ---
rng = np.random.default_rng(12345)
jitter_eff = min(jitter, 0.5*bar_w)
for i, s in enumerate(series_order):
    if _grouped:
        for j, c in enumerate(category_order):
            vals = plot.loc[(plot[series_column]==s) & (plot[category_column]==c), value_column].to_numpy(float)
            if vals.size==0: continue
            xs = x_centers[i] + cat_offsets[j] + rng.uniform(-jitter_eff, jitter_eff, size=vals.size)
            ax.scatter(xs, vals, s=point_size, color=point_color, alpha=point_alpha, zorder=4)
    else:
        vals = plot.loc[(plot[series_column]==s), value_column].to_numpy(float)
        if vals.size>0:
            xs = x_centers[i] + rng.uniform(-jitter_eff, jitter_eff, size=vals.size)
            ax.scatter(xs, vals, s=point_size, color=point_color, alpha=point_alpha, zorder=4)

# --- Labels & ticks ---

labels = [str(s) for s in series_order]
ax.set_xticks(x_centers, labels=labels)
ax.tick_params(axis='x', labelrotation=float(xtick_rotation), labelsize=axis_tick_fontsize)

# Ticks & spines
ax.tick_params(width=axis_thickness, labelsize=axis_tick_fontsize)
ax.minorticks_on() if show_minor_ticks else ax.minorticks_off()
for spine in ['bottom','left']:
    ax.spines[spine].set_linewidth(axis_thickness)
ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False)

# Baseline
if xaxis_at not in (None,"None","null",""):
    try:
        y0 = float(xaxis_at)
        ax.spines['bottom'].set_position(('data',y0))
        ax.xaxis.set_ticks_position('bottom')
    except: pass

if _grouped and show_legend:
    ax.legend(frameon=False, fontsize=axis_tick_fontsize)

plot_file = f"Barplot_{timestamp}_plot.svg"
plt.savefig(plot_file, format='svg', transparent=True)
plt.show()


In [None]:
#@title ### Cell 5: Download Results
files.download(summary_file)
files.download(plot_file)