In [3]:
import openml

def generate_latex_table(dataset_ids):
    """Generates a LaTeX table with an overview of OpenML datasets."""
    table_header = r"""\begin{table}[h]
\centering
\begin{tabular}{|c|c|c|c|c|}
\hline
ID & Name & Features & Classes & Data Points \\
\hline"""
    table_rows = []
    
    for dataset_id in dataset_ids:
        try:
            dataset = openml.datasets.get_dataset(dataset_id)
            name = dataset.name
            features = len(dataset.features)
            classes = len(dataset.retrieve_class_labels()) if dataset.retrieve_class_labels() else "N/A"
            datapoints = dataset.qualities.get("NumberOfInstances", "N/A")
            
            row = f"{dataset_id} & {name} & {features} & {classes} & {datapoints} \\\hline"
            table_rows.append(row)
        except Exception as e:
            print(f"Failed to fetch dataset {dataset_id}: {e}")
    
    table_footer = """\end{tabular}
\caption{Overview of OpenML datasets}
\label{tab:openml_datasets}
\end{table}"""
    
    latex_table = "\n".join([table_header] + table_rows + [table_footer])
    return latex_table

# Example usage
dataset_ids = [35,41,54,61, 187, 15, 31, 4534]  # Replace with desired OpenML dataset IDs
latex_code = generate_latex_table(dataset_ids)
print(latex_code)


\begin{table}[h]
\centering
\begin{tabular}{|c|c|c|c|c|}
\hline
ID & Name & Features & Classes & Data Points \\
\hline
35 & dermatology & 35 & 6 & 366.0 \\hline
41 & glass & 10 & N/A & 214.0 \\hline
54 & vehicle & 19 & N/A & 846.0 \\hline
61 & iris & 5 & 3 & 150.0 \\hline
187 & wine & 14 & 3 & 178.0 \\hline
15 & breast-w & 10 & N/A & 699.0 \\hline
31 & credit-g & 21 & 2 & 1000.0 \\hline
4534 & PhishingWebsites & 31 & N/A & 11055.0 \\hline
\end{tabular}
\caption{Overview of OpenML datasets}
\label{tab:openml_datasets}
\end{table}


In [11]:
import openml

def generate_latex_table(dataset_ids):
    """Generates a LaTeX table with an overview of OpenML datasets suitable for a single column in a two-column document."""
    table_header = r"""\begin{table}[h]
\centering
\resizebox{\columnwidth}{!}{%
\begin{tabular}{|c|c|c|c|c|}
\hline
ID & Name & Feat. & Classes & Inst. \\
\hline"""
    table_rows = []
    
    dataset_info = []
    
    for dataset_id in dataset_ids:
        try:
            dataset = openml.datasets.get_dataset(dataset_id)
            name = dataset.name
            features = len(dataset.features)
            qualities = dataset.qualities
            classes = qualities.get("NumberOfClasses", "N/A")
            datapoints = qualities.get("NumberOfInstances", "N/A")
            classes = int(classes)
            datapoints = int(datapoints)
            
            dataset_info.append((dataset_id, name, features, classes, datapoints))
        except Exception as e:
            print(f"Failed to fetch dataset {dataset_id}: {e}")
    
    dataset_info.sort(key=lambda x: (x[3] if isinstance(x[3], (int, float)) else float('inf')))
    
    for dataset_id, name, features, classes, datapoints in dataset_info:
        row = f"{dataset_id} & {name} & {features} & {classes} & {datapoints} \\\hline"
        table_rows.append(row)
    
    table_footer = r"""\end{tabular}}
\caption{Overview of OpenML datasets sorted by number of classes}
\label{tab:openml_datasets}
\end{table}"""
    
    latex_table = "\n".join([table_header] + table_rows + [table_footer])
    return latex_table

# Example usage
dataset_ids = [35,41,54,61, 187, 15, 31, 4534]  # Replace with desired OpenML dataset IDs
latex_code = generate_latex_table(dataset_ids)
print(latex_code)


\begin{table}[h]
\centering
\resizebox{\columnwidth}{!}{%
\begin{tabular}{|c|c|c|c|c|}
\hline
ID & Name & Feat. & Classes & Inst. \\
\hline
15 & breast-w & 10 & 2 & 699 \\hline
31 & credit-g & 21 & 2 & 1000 \\hline
4534 & PhishingWebsites & 31 & 2 & 11055 \\hline
61 & iris & 5 & 3 & 150 \\hline
187 & wine & 14 & 3 & 178 \\hline
54 & vehicle & 19 & 4 & 846 \\hline
35 & dermatology & 35 & 6 & 366 \\hline
41 & glass & 10 & 6 & 214 \\hline
\end{tabular}}
\caption{Overview of OpenML datasets sorted by number of classes}
\label{tab:openml_datasets}
\end{table}
