<a href="https://colab.research.google.com/github/yongchanzzz/enzymology/blob/main/Fit_Hill.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Km and Hill Coefficient Calculation Using Hill's Equation
This notebook performs Km (K0.5) and Hill coefficient calculations using the Hill equation.

In [None]:
#@title ### Cell 1: Upload a CSV File
from google.colab import files
import io
import pandas as pd
uploaded = files.upload()  # Upload your CSV file
filename = list(uploaded.keys())[0]
data = pd.read_csv(io.BytesIO(uploaded[filename]))

In [None]:
#@title ### Cell 2: User Parameters
# @markdown **User Inputs**
unit_in_M = 1e-3  #@param {type:"number", description:"Conversion factor from nM to M"}
substrate_column = "substrate"  #@param {type:"string", description:"Name of the substrate concentration column"}
velocity_column  = "velocity"   #@param {type:"string", description:"Name of the observed rate/velocity column"}

# Data Preprocessing
import numpy as np
# Convert substrate concentrations from nM to M
data[substrate_column] = data[substrate_column] * unit_in_M

In [None]:
#@title Cell 3: Define Model and Fit
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import t
import matplotlib.pyplot as plt
import datetime
import os, time
import sys, platform

# ensure timestamp in Asia/Tokyo
os.environ['TZ'] = 'Asia/Tokyo'
time.tzset()
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# Define Hill equation
def hill_eq(S, Vmax, K0_5, n):
    """
    Hill equation: V = Vmax * [S]^n / (K0.5^n + [S]^n)
    """
    return Vmax * S**n / (K0_5**n + S**n)

# Initial parameter guesses
Vmax0   = data[velocity_column].max()
K0_5_0  = data[substrate_column].median()
n0      = 1.0
p0      = [Vmax0, K0_5_0, n0]
bounds  = (0, np.inf)

# Perform curve fitting
popt, pcov = curve_fit(
    hill_eq,
    data[substrate_column],
    data[velocity_column],
    p0=p0,
    bounds=bounds
)

# Package parameters
param_names = ['Vmax', 'K0_5', 'Hill_n']
params      = dict(zip(param_names, popt))

# Compute 95% confidence intervals
alpha = 0.05
dof   = max(0, len(data) - len(popt))
sigma = np.sqrt(np.diag(pcov))
t_val = t.ppf(1 - alpha/2, dof)
ci    = {
    name: (popt[i] - t_val * sigma[i],
           popt[i] + t_val * sigma[i])
    for i, name in enumerate(param_names)
}

# --- Build output lines ---
lines = []
lines.append("Hill Equation Fit Results")
lines.append(f"Date and time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
# Echo the input file name
lines.append(f"Input file: {filename}")
# Echo the equation
lines.append("Equation fitted: V = Vmax * [S]^n / (K0.5^n + [S]^n)")
lines.append("")
lines.append("Calculated parameters:")

# Parameter estimates + CIs
for name in param_names:
    lo, hi = ci[name]
    lines.append(f"{name:10} = {params[name]:.5g} (95% CI: {lo:.5g} – {hi:.5g})")

# --- Goodness‑of‑fit metrics (GraphPad Prism style) ---
residuals = data[velocity_column] - hill_eq(data[substrate_column], *popt)
ss_res    = np.sum(residuals**2)
n_points  = len(data)
p_params  = len(popt)
dof       = n_points - p_params
syx       = np.sqrt(ss_res / dof)
ss_tot    = np.sum((data[velocity_column] - data[velocity_column].mean())**2)
r2        = 1 - ss_res / ss_tot
aicc      = (
    n_points * np.log(ss_res / n_points)
  + 2 * p_params
  + 2 * p_params * (p_params + 1) / (n_points - p_params - 1)
)

lines.append("")
lines.append("Goodness‑of‑fit metrics:")
lines.append(f"SSR (SSE)  = {ss_res:.5g}")
lines.append(f"sy.x       = {syx:.5g}")
lines.append(f"R²         = {r2:.5f}")
lines.append(f"AICc       = {aicc:.5f}")

# --- Replicate counts per substrate concentration ---
from collections import defaultdict
rep_counts = data.groupby(substrate_column).size()
count_map  = defaultdict(list)
for conc, cnt in rep_counts.items():
    count_map[cnt].append(conc)

lines.append("")
lines.append("Number of replicates:")
for cnt in sorted(count_map, reverse=True):
    concs = ", ".join(f"{c:.3g}" for c in sorted(count_map[cnt]))
    lines.append(f"N={cnt}: {concs}")

# --- Session Info ---
import sys, platform
from datetime import datetime as _dt

lines.append("")
lines.append("Session Info:")
lines.append(f"Python version {platform.python_version()} ({sys.version.split()[0]})")
import numpy, pandas, matplotlib
lines.append(f"NumPy version: {numpy.__version__}")
lines.append(f"pandas  version: {pandas.__version__}")
lines.append(f"Matplotlib version: {matplotlib.__version__}")
lines.append(f"Platform: {platform.platform()}")
now = _dt.now().astimezone()
lines.append(f"Time zone: {now.tzinfo} (UTC{now.utcoffset()})")

# Write summary to file
summary_file = f"FitHill_{timestamp}_summary.txt"
with open(summary_file, 'w') as f:
    for L in lines:
        f.write(L + "\n")

# Print to stdout
for L in lines:
    print(L)

print(f"\nSummary written to: {summary_file}")


In [None]:
#@title ### Cell 4: Plot SVG
import matplotlib.pyplot as plt
import numpy as np

# @markdown **Output Plot Size (cm)**
width_cm = 8        #@param {type:"number", description:"Width in cm"}
height_cm = 6       #@param {type:"number", description:"Height in cm"}
# @markdown **Font and Text Options**
axis_tick_fontsize = 8      #@param {type:"number", description:"Font size for axis tick labels"}
axis_title_fontsize = 9     #@param {type:"number", description:"Font size for axis titles"}
# @markdown **Styling Options**
point_size = 5                   #@param {type:"number", description:"Data point size"}
point_color = "black"            #@param {type:"string", description:"Data point color"}
curve_color = "black"            #@param {type:"string", description:"Fit curve color"}
curve_thickness = 1              #@param {type:"number", description:"Fit curve line thickness"}
axis_thickness = 1               #@param {type:"number", description:"Axis line/tick thickness"}
show_minor_ticks = False         #@param {type:"boolean", description:"Turn minor ticks on/off"}

# Prepare plot data
plot = data.copy()

# Generate fitted curve
substrate_range = np.linspace(data[substrate_column].min(), data[substrate_column].max(), 300)
y_fit = hill_eq(substrate_range,
                params['Vmax'], params['K0_5'], params['Hill_n'])

# Create figure
fig, ax = plt.subplots(figsize=(width_cm/2.54, height_cm/2.54))
fig.patch.set_facecolor('none')
ax.set_facecolor('none')

# Plot data points and fit
ax.scatter(plot[substrate_column], plot[velocity_column], s=point_size, color=point_color, label='Data')
ax.plot(substrate_range, y_fit, color=curve_color, linewidth=curve_thickness, label='Hill fit')

# Labels
ax.set_xlabel('[Substrate] (M)', fontsize=axis_title_fontsize)
ax.set_ylabel('Velocity', fontsize=axis_title_fontsize)

# Ticks
ax.tick_params(width=axis_thickness, labelsize=axis_tick_fontsize)
if show_minor_ticks:
    ax.minorticks_on()
else:
    ax.minorticks_off()

# Spine styling
for spine in ['bottom','left']:
    ax.spines[spine].set_linewidth(axis_thickness)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
# Save and show
plot_file = f"FitHill_{timestamp}_plot.svg"
plt.savefig(plot_file, format='svg', transparent=True)
plt.show()


In [None]:
#@title ### Cell 5: Download Results
files.download(summary_file)
files.download(plot_file)