# SciTeX Scholar Module - Managing Academic Papers

This notebook demonstrates how to use SciTeX's Scholar module to search, download, and manage academic papers.

## 1. Setup and Import

In [None]:
import scitex
from scitex.scholar import Scholar, Papers
import pandas as pd
from pathlib import Path

## 2. Initialize Scholar

In [None]:
# Initialize Scholar with default configuration
scholar = Scholar()

# Or with custom output directory
# scholar = Scholar(output_dir="./my_papers")

## 3. Search for Papers

In [None]:
# Search by query
query = "machine learning neuroscience"
papers = scholar.search(query, max_results=5)

print(f"Found {len(papers)} papers")
papers.preview()

In [None]:
# Search by DOI
doi = "10.1038/s41586-020-2649-2"
paper_by_doi = scholar.search_by_doi(doi)

if paper_by_doi:
    print(f"Title: {paper_by_doi.title}")
    print(f"Authors: {', '.join(paper_by_doi.authors[:3])}...")
    print(f"Year: {paper_by_doi.year}")

## 4. Working with Papers Collection

In [None]:
# Create Papers collection from search results
my_papers = Papers(papers.papers[:3])  # Take first 3 papers

# Add a paper manually
from scitex.scholar import Paper

new_paper = Paper(
    title="Deep Learning in Neuroscience",
    authors=["John Doe", "Jane Smith"],
    year=2024,
    doi="10.1234/example.2024"
)
my_papers.add(new_paper)

print(f"Total papers: {len(my_papers)}")

## 5. Export Papers

In [None]:
# Save as BibTeX
output_dir = Path("./scholar_output")
output_dir.mkdir(exist_ok=True)

bibtex_file = output_dir / "my_papers.bib"
my_papers.to_bibtex(bibtex_file)
print(f"Saved to {bibtex_file}")

# Export to DataFrame
df = my_papers.to_dataframe()
print("\nPapers DataFrame:")
print(df[['title', 'year', 'doi']].head())

## 6. Filter and Sort Papers

In [None]:
# Filter by year
recent_papers = my_papers.filter_by_year(min_year=2020)
print(f"Papers from 2020 onwards: {len(recent_papers)}")

# Sort by year (descending)
sorted_papers = my_papers.sort_by_year(ascending=False)
print("\nPapers sorted by year (newest first):")
for paper in sorted_papers.papers[:3]:
    print(f"- {paper.year}: {paper.title[:50]}...")

## 7. Download PDFs (if available)

In [None]:
# Note: PDF download requires proper authentication or open access papers
# This example shows the interface

# Download PDFs for all papers
pdf_dir = output_dir / "pdfs"
pdf_dir.mkdir(exist_ok=True)

# Attempt to download (will work for open access papers)
results = scholar.download_pdfs(my_papers, output_dir=pdf_dir)

# Check results
successful = sum(1 for r in results if r['success'])
print(f"Successfully downloaded: {successful}/{len(results)} PDFs")

## 8. Load Papers from BibTeX

In [None]:
# Load papers from existing BibTeX file
if bibtex_file.exists():
    loaded_papers = Papers.from_bibtex(bibtex_file)
    print(f"Loaded {len(loaded_papers)} papers from BibTeX")
    
    # Verify loaded content
    for paper in loaded_papers.papers[:2]:
        print(f"\n- {paper.title}")
        print(f"  Authors: {', '.join(paper.authors[:2])}...")
        print(f"  Year: {paper.year}")

## 9. Integration with Other SciTeX Features

In [None]:
# Save papers data for analysis
papers_data = {
    'papers': [p.to_dict() for p in my_papers.papers],
    'metadata': {
        'search_query': query,
        'total_papers': len(my_papers),
        'date_range': (min(p.year for p in my_papers.papers if p.year),
                      max(p.year for p in my_papers.papers if p.year))
    }
}

# Use SciTeX's unified save
scitex.io.save(papers_data, output_dir / 'papers_data.json')
print(f"Saved papers data to {output_dir / 'papers_data.json'}")

## 10. Visualize Paper Statistics

In [None]:
# Create year distribution plot
df = my_papers.to_dataframe()
if not df.empty and 'year' in df.columns:
    year_counts = df['year'].value_counts().sort_index()
    
    fig, ax = scitex.plt.subplots(figsize=(8, 5))
    ax.bar(year_counts.index, year_counts.values)
    ax.set_xlabel('Year')
    ax.set_ylabel('Number of Papers')
    ax.set_title('Papers by Year')
    
    # Save figure
    fig.save(output_dir / 'papers_by_year.png')
    print("Saved year distribution plot")

## Summary

This notebook demonstrated:
- Searching for papers by query and DOI
- Managing paper collections with the Papers class
- Exporting to BibTeX and DataFrame formats
- Filtering and sorting papers
- Downloading PDFs (when available)
- Integration with other SciTeX features

For more advanced features like OpenAthens authentication or custom search engines, refer to the Scholar module documentation.