In [1]:
# Proper usage
md_content = """

## Current Assets
### Cash and Cash Equivalents
Currency in checking/savings accounts Short-term Treasury bills (maturing <3 months) Commercial paper from AAA-rated corporations Money market funds with daily liquidity 
Petty cash reserves for office expenses Foreign currency holdings in major currencies Undeposited checks from customers Cash in transit between bank accounts
### Marketable Securities
Corporate bonds with <1yr maturity
Government agency securities Certificates of deposit (CDs)
Bankers' acceptances Commercial paper holdings
Treasury notes maturing within 12 months
Highly liquid ETF positions
### Accounts Receivable
Trade receivables from normal operations
Installment receivables from long-term contracts
Receivables from affiliated companies
Allowance for doubtful accounts calculation
Aging schedule analysis (30/60/90 days)
Credit memo adjustments
Factored receivables disclosure
Unbilled receivables from progress contracts

## Non-Current Assets
### Property, Plant & Equipment
Land acquisition costs (original purchase) Building improvements capitalization
Machinery installation costs Equipment depreciation schedules
Leasehold improvement amortization
Construction-in-progress accounts
Capitalized interest during construction
### Intangible Assets
Patent acquisition and amortization
Trademark registration/maintenance costs
Customer list valuations
Non-compete agreement valuations
Software development costs
Licensing agreements fair value
Goodwill impairment testing methodology
### Long-Term Investments
Held-to-maturity securities portfolio
Equity method investment accounting
Real estate held for appreciation
Venture capital fund investments
Convertible debt instruments
Restricted stock holdings
Investments in subsidiaries

## Current Liabilities
### Accounts Payable
Trade payables to suppliers
Accrued purchases for goods received
Third-party processor withholdings Construction retainage payable
Dividends declared but unpaid
Customer deposits/advance payments
Escheat liability estimates
### Short-Term Debt
Commercial paper outstanding
Revolving credit facility draws
Current portion of long-term debt
Bank overdraft facilities used
Short-term lease liabilities
Vendor financing arrangements
Convertible debt equity component

## Long-Term Liabilities
### Bonds Payable
Corporate bond issuance at premium/discount
Debenture conversion features
Sinking fund requirements
Unamortized bond issuance costs
Fair value hedge adjustments
Callable bond provisions
Convertible bond accounting
### Pension Liabilities
Defined benefit obligation calculations
Actuarial gains/losses recognition
Plan asset valuations
Curtailment/settlement accounting
Multi-employer plan disclosures
Post-employment benefits accrual
Termination benefit provisions

## Shareholders' Equity
### Common Stock
Par value per share disclosure
Authorized shares vs outstanding
Treasury stock accounting method
Stock split adjustments
Stock option pool reserves
Restricted stock unit accruals
Dividend reinvestment plan shares
### Retained Earnings
Prior period adjustments
Dividend declaration accounting
ESOP allocation impacts
Foreign currency translation adjustments
Hedging reserve balances
Revaluation surplus accounts
Accumulated other comprehensive income
"""


In [2]:
from pptx import Presentation
from pptx.util import Pt, Inches
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN, MSO_AUTO_SIZE
from dataclasses import dataclass
from typing import List, Tuple
import textwrap
import logging

logging.basicConfig(level=logging.INFO)

@dataclass
class FinancialItem:
    accounting_type: str
    account_title: str
    descriptions: List[str]
    layer1_continued: bool = False
    layer2_continued: bool = False

class PowerPointGenerator:
    def __init__(self, template_path: str):
        self.prs = Presentation(template_path)
        self._validate_template()
        self.current_slide_index = 0
        #self.ROWS_PER_SECTION = 28
        self.LINE_HEIGHT = Pt(9)
        self.ROWS_PER_SECTION = self._calculate_max_rows()
        self.CHARS_PER_ROW = 50
        self.BULLET_CHAR = chr(0x25A0) + ' '
        self.DARK_BLUE = RGBColor(0, 50, 150)
        self.DARK_GREY = RGBColor(169, 169, 169)
        self.prev_layer1 = None
        self.prev_layer2 = None
        
    def _calculate_max_rows(self):
        """Dynamically calculate max rows based on template"""
        slide = self.prs.slides[0]
        shape = next(s for s in slide.shapes if s.name == "textMainBullets")
        return int(shape.height / self.LINE_HEIGHT) #- 4  # Account for margins
        
    def _apply_paragraph_formatting(self, paragraph):
        """Universal paragraph formatting with version fallback"""
        try:
            # For python-pptx >= 0.6.18
            pf = paragraph.paragraph_format
            pf.alignment = PP_ALIGN.LEFT
            pf.left_indent = Inches(0.21)
            pf.first_line_indent = Inches(-0.19)
            pf.space_before = Pt(6.06)
            pf.line_spacing = 1.0
        except AttributeError:
            # Fallback for older versions
            paragraph.alignment = PP_ALIGN.LEFT
            paragraph.left_indent = Inches(0.21)
            paragraph.first_line_indent = Inches(-0.19)
            paragraph.space_before = Pt(6.06)
            paragraph.line_spacing = 1.0

    def _validate_template(self):
        """Main version validation logic"""
        required_shapes = {
            0: ["textMainBullets"],
            1: ["textMainBullets_L", "textMainBullets_R"]
        }
        for slide_idx, slide in enumerate(self.prs.slides):
            if slide_idx in required_shapes:
                missing = [name for name in required_shapes[slide_idx]
                          if not any(s.name == name for s in slide.shapes)]
                if missing:
                    raise ValueError(f"Missing shapes on slide {slide_idx+1}: {', '.join(missing)}")

    def _validate_content_placement(self, distribution):
        """Anti-duplication safeguard"""
        seen = set()
        for slide_idx, section, items in distribution:
            for item in items:
                key = (item.accounting_type, item.account_title, tuple(item.descriptions))
                if key in seen:
                    raise ValueError(f"Duplicate content detected: {key}")
                seen.add(key)


    def _calculate_chunk_size(self, items: List[FinancialItem]) -> Tuple[List[FinancialItem], FinancialItem]:
        """Calculate chunk with partial item handling"""
        lines_used = 0
        capacity = []
        
        for item in items:
            # Handle partial items from previous section
            if self.partial_item:
                item = self.partial_item
                self.partial_item = None

            # Calculate lines needed including remaining lines
            item_lines = 2  # Type + title
            if item.remaining_lines:
                item_lines += len(item.remaining_lines)
            else:
                item_lines += len(item.descriptions)
            
            if lines_used + item_lines > self.ROWS_PER_SECTION:
                # Split descriptions if possible
                if lines_used < self.ROWS_PER_SECTION:
                    available_lines = self.ROWS_PER_SECTION - lines_used - 2  # Reserve for type/title
                    if available_lines > 0:
                        partial_desc = (item.remaining_lines or item.descriptions)[:available_lines]
                        remaining_desc = (item.remaining_lines or item.descriptions)[available_lines:]
                        
                        # Create partial item for next section
                        self.partial_item = FinancialItem(
                            item.accounting_type,
                            item.account_title,
                            [],
                            continued=True,
                            remaining_lines=remaining_desc
                        )
                        
                        # Add partial item to current chunk
                        capacity.append(FinancialItem(
                            item.accounting_type,
                            item.account_title,
                            partial_desc,
                            item.continued
                        ))
                        lines_used += 2 + len(partial_desc)
                break
                
            lines_used += item_lines
            capacity.append(item)
            self.partial_item = None  # Reset partial after full consumption
            
        return capacity, self.partial_item

    def parse_markdown(self, md_content: str) -> List[FinancialItem]:
        """Improved parser that ignores sections without descriptions"""
        items = []
        current_type = ""
        current_title = ""
        current_descs = []
        
        for line in md_content.strip().split('\n'):
            stripped_line = line.rstrip()
            
            # Handle section headers
            if stripped_line.startswith('## '):
                if current_descs:  # Only add if descriptions exist
                    items.append(FinancialItem(current_type, current_title, current_descs))
                current_type = stripped_line[3:].strip()
                current_title = ""
                current_descs = []
            elif stripped_line.startswith('### '):
                if current_descs:  # Only add if descriptions exist
                    items.append(FinancialItem(current_type, current_title, current_descs))
                current_title = stripped_line[4:].strip()
                current_descs = []
            else:
                # Handle empty lines as bullet separators
                if not stripped_line:
                    if current_descs:
                        items.append(FinancialItem(current_type, current_title, current_descs))
                        current_descs = []
                else:
                    # Split line into wrapped chunks
                    wrapper = textwrap.TextWrapper(
                        width=self.CHARS_PER_ROW,
                        break_long_words=True,
                        replace_whitespace=False
                    )
                    chunks = wrapper.wrap(stripped_line)
                    current_descs.extend(chunks)
        
        # Final check for remaining content
        if current_descs:
            items.append(FinancialItem(current_type, current_title, current_descs))
        
        return items

    
    def _plan_content_distribution(self, items: List[FinancialItem]):
        """Distribution planning without splitting individual descriptions"""
        distribution = []
        content_queue = items.copy()
        slide_idx = 0
        prev_item = None

        while content_queue:
            sections = ['c'] if slide_idx == 0 else ['b', 'c']
            
            for section in sections:
                if not content_queue:
                    break
                    
                section_items = []
                lines_used = 0
                
                while content_queue and lines_used < self.ROWS_PER_SECTION:
                    item = content_queue[0]
                    item_lines = self._calculate_item_lines(item)
                    
                    # Determine continuation status
                    layer1_cont = (prev_item and 
                                item.accounting_type == prev_item.accounting_type and
                                len(distribution) > 0)
                    
                    # Check if entire item fits
                    if lines_used + item_lines <= self.ROWS_PER_SECTION:
                        modified_item = FinancialItem(
                            item.accounting_type,
                            item.account_title,
                            item.descriptions,  # Keep all descriptions intact
                            layer1_cont,
                            False
                        )
                        section_items.append(modified_item)
                        content_queue.pop(0)
                        lines_used += item_lines
                        prev_item = item
                    else:
                        # Item doesn't fit - move to next section
                        # DO NOT split descriptions
                        break
                        
                if section_items:
                    distribution.append((slide_idx, section, section_items))
            
            slide_idx += 1
            
            # Safety check
            if slide_idx > 10:
                break
        
        return distribution


    def _split_descriptions(self, descriptions: List[str], max_lines: int, account_title: str = "", remaining: bool = False) -> List[str]:
        """Split descriptions with compensated width calculation"""
        if not descriptions:
            return []
        
        collected = []
        lines_used = 0
        
        if not remaining and account_title:
            # Handle first description with compensated width
            effective_width = self._calculate_effective_width_for_description(account_title)
            
            wrapper = textwrap.TextWrapper(
                width=effective_width,
                break_long_words=True,
                replace_whitespace=False
            )
            
            first_desc_wrapped = wrapper.wrap(descriptions[0])
            
            if len(first_desc_wrapped) <= max_lines:
                collected.append(descriptions[0])
                lines_used += len(first_desc_wrapped)
                start_idx = 1
            else:
                # Take only what fits in available lines
                if max_lines > 0:
                    truncated_lines = first_desc_wrapped[:max_lines]
                    truncated_desc = ' '.join(truncated_lines)
                    collected.append(truncated_desc)
                    lines_used += len(truncated_lines)
                start_idx = 1
        else:
            start_idx = 0
        
        # Handle remaining descriptions with full width
        wrapper_full = textwrap.TextWrapper(
            width=self.CHARS_PER_ROW,
            break_long_words=True,
            replace_whitespace=False
        )
        
        for desc in descriptions[start_idx:]:
            wrapped = wrapper_full.wrap(desc)
            if lines_used + len(wrapped) <= max_lines:
                collected.append(desc)
                lines_used += len(wrapped)
            else:
                remaining_space = max_lines - lines_used
                if remaining_space > 0:
                    truncated = '\n'.join(wrapped[:remaining_space])
                    collected.append(truncated)
                break
        
        return collected

    def _calculate_section_capacity(self, items):
        """Bulk item processing without per-item line counting"""
        return items[:self.ROWS_PER_SECTION//3]  # Approximate 3 lines per item

    def _wrap_text(self, text: str) -> List[str]:
        return textwrap.wrap(text, width=self.CHARS_PER_ROW, break_long_words=True)

    def _calculate_wrapped_lines(self, text: str) -> int:
        """Calculate actual wrapped lines using textwrap"""
        wrapper = textwrap.TextWrapper(
            width=self.CHARS_PER_ROW,
            break_long_words=True,
            replace_whitespace=False
        )
        return len(wrapper.wrap(text))
    
    def _calculate_effective_width_for_description(self, account_title: str) -> int:
        """Calculate available character width for description after accounting for bullet and title"""
        bullet_overhead = len(self.BULLET_CHAR)  # "■ " = 2 characters
        separator_overhead = len(" - ")  # " - " = 3 characters
        title_overhead = len(account_title)
        
        total_overhead = bullet_overhead + title_overhead + separator_overhead
        effective_width = self.CHARS_PER_ROW - total_overhead
        
        # Ensure minimum width for description
        return max(effective_width, 10)  # At least 10 chars for description

    def _calculate_item_lines(self, item: FinancialItem) -> int:
        """Calculate lines accounting for natural text wrapping for distribution planning"""
        lines = 0
        
        # Layer 1: Account type lines (with wrapping)
        lines += self._calculate_wrapped_lines(item.accounting_type)
        
        # Layer 2+3: Combined first line + wrapped continuation lines
        if item.descriptions:
            # Calculate as if joined together
            joined_descriptions = " ".join(item.descriptions)
            combined_text = f"{self.BULLET_CHAR}{item.account_title} - {joined_descriptions}"
            lines += self._calculate_wrapped_lines(combined_text)
        else:
            # Just account title with bullet
            title_with_bullet = f"{self.BULLET_CHAR}{item.account_title}"
            lines += self._calculate_wrapped_lines(title_with_bullet)
        
        return lines



    def _get_section_shape(self, slide, section: str):
        """Handle both placeholders and regular textboxes"""
        if self.current_slide_index == 0 and section == 'c':
            target_name = "textMainBullets"
        elif self.current_slide_index > 0:
            suffix = 'L' if section == 'b' else 'R'
            target_name = f"textMainBullets_{suffix}"
        else:
            return None
        
        # Find the shape by name
        shape = next((s for s in slide.shapes if s.name == target_name), None)
        if not shape:
            raise ValueError(f"Template missing required shape: {target_name}")
        
        # Check if it's a placeholder and needs replacement
        if hasattr(shape, 'is_placeholder') and shape.is_placeholder:
            return self._replace_placeholder_with_textbox(slide, shape)
        else:
            # It's already a regular textbox, use it directly
            return shape
    
    def _clear_placeholder_text(self, shape):
        """Properly clear placeholder text including default prompt text"""
        if hasattr(shape, 'text_frame') and shape.text_frame:
            # Clear all existing content
            shape.text_frame.clear()
            
            # For placeholders, also check for prompt text
            if hasattr(shape, 'placeholder_format'):
                # Remove any default placeholder text
                for paragraph in shape.text_frame.paragraphs:
                    paragraph.clear()
        
        # Alternative method - directly set empty text
        if hasattr(shape, 'text'):
            shape.text = ""
    
    def _replace_placeholder_with_textbox(self, slide, placeholder_shape):
        """Convert placeholder to regular textbox"""
        try:
            # Create new textbox with same properties
            left = placeholder_shape.left
            top = placeholder_shape.top
            width = placeholder_shape.width
            height = placeholder_shape.height
            textbox = slide.shapes.add_textbox(left, top, width, height)
            textbox.name = placeholder_shape.name  # Preserve the name
            
            # Copy text frame properties if they exist
            if hasattr(placeholder_shape, 'text_frame'):
                textbox.text_frame.word_wrap = placeholder_shape.text_frame.word_wrap
                if hasattr(placeholder_shape.text_frame, 'vertical_anchor'):
                    textbox.text_frame.vertical_anchor = placeholder_shape.text_frame.vertical_anchor
            
            # Remove original placeholder
            placeholder_shape._element.getparent().remove(placeholder_shape._element)
            
            return textbox
        except Exception as e:
            raise ValueError(f"Failed to replace placeholder: {str(e)}")

    def _apply_layer_formatting(self, paragraph, layer: int):
        """Apply formatting to layer 1 (accounting type) and layer 2 (account title)"""
        if layer == 1:
            paragraph.font.bold = True
            paragraph.font.color.rgb = RGBColor(0, 32, 96)  # Dark blue: #003296
            paragraph.paragraph_format.space_after = Pt(4)
        elif layer == 2:
            paragraph.font.italic = False
            paragraph.paragraph_format.left_indent = Inches(0.25)
            
    from pptx.oxml.xmlchemy import OxmlElement

    def _apply_bullet_formatting(self, paragraph):
        """Comprehensive XML-based bullet formatting"""
        pPr = paragraph._p.get_or_add_pPr()
        
        # Remove existing bullet elements
        for elem in pPr.xpath(".//a:buFont|.//a:buSzPct|.//a:buChar"):
            pPr.remove(elem)
        
        # Bullet size (350% of text size)
        SubElement(pPr, "a:buSzPct", val="350000")
        
        # Bullet font configuration
        SubElement(pPr, "a:buFont", 
                typeface="Calibri",
                panose="020F0502020204030204",
                pitchFamily="34",
                charset="0")
        
        # Solid square bullet character
        SubElement(pPr, "a:buChar", char="\u25A0")
        
        # Precise indentation controls
        ind = pPr.get_or_add_ind()
        ind.set('left', '302400')    # 0.3 inches
        ind.set('hanging', '201600') # 0.2 inches negative

                
    def _apply_continuation_markers(self, distribution):
        """Precise continuation tracking"""
        prev_type = prev_title = None
        for slide_idx, section, items in distribution:
            for idx, item in enumerate(items):
                item.layer1_continued = (item.accounting_type == prev_type)
                item.layer2_continued = (item.account_title == prev_title) and item.layer1_continued
                
                if idx == len(items)-1:
                    prev_type = item.accounting_type
                    prev_title = item.account_title

    def _populate_section(self, shape, items: List[FinancialItem]):
        """Join descriptions as single paragraphs with justify alignment"""
        tf = shape.text_frame
        tf.clear()
        tf.word_wrap = True  # Enable PowerPoint's text wrapping
        
        current_accounting_type = None

        for item in items:
            # Layer 1: Accounting type
            if item.accounting_type != current_accounting_type:
                p = tf.add_paragraph()
                run = p.add_run()
                run.text = f"{item.accounting_type} (continued)" if item.layer1_continued else item.accounting_type
                run.font.size = Pt(9)
                run.font.bold = True
                run.font.color.rgb = self.DARK_BLUE
                current_accounting_type = item.accounting_type

            # Layer 2 + Layer 3: Combined with descriptions joined as single paragraph
            if item.descriptions:
                # Join ALL descriptions as one flowing paragraph
                joined_descriptions = " ".join(item.descriptions)
                
                p = tf.add_paragraph()
                run = p.add_run()
                run.text = f"{self.BULLET_CHAR}{item.account_title} - {joined_descriptions}"
                run.font.size = Pt(9)
                run.font.color.rgb = RGBColor(0, 0, 0)
                
                # Set justify alignment for clean appearance
                try:
                    pf = p.paragraph_format
                    pf.alignment = PP_ALIGN.JUSTIFY
                except AttributeError:
                    p.alignment = PP_ALIGN.JUSTIFY



    def generate(self, md_content: str, output_path: str):
        """Enhanced generation with automatic slide cleanup"""
        try:
            items = self.parse_markdown(md_content)
            distribution = self._plan_content_distribution(items)
            self._validate_content_placement(distribution)

            # Populate slides with content
            for slide_idx, section, section_items in distribution:
                if slide_idx >= len(self.prs.slides):
                    raise ValueError("Insufficient slides in template")
                slide = self.prs.slides[slide_idx]
                self.current_slide_index = slide_idx
                shape = self._get_section_shape(slide, section)
                if shape:
                    self._populate_section(shape, section_items)

            # Detect and remove unused slides
            unused_slides = self._detect_unused_slides(distribution)
            if unused_slides:
                logging.info(f"Removing unused slides: {[idx+1 for idx in unused_slides]}")
                self._remove_slides(unused_slides)

            self.prs.save(output_path)
            logging.info(f"Successfully generated PowerPoint with {len(self.prs.slides)} slides")

        except Exception as e:
            logging.error(f"Generation failed: {str(e)}")
            raise
    
    def _add_continuation_marks(self, items: List[FinancialItem]):
        """Mark items that continue across sections"""
        if not items:
            return
        
        # Track previous item across calls
        if self.prev_item and items:  # Add safety check
            if items[0].accounting_type == self.prev_item.accounting_type:  # FIX: Access first item
                items[0].continued = True  # FIX: Set attribute on item, not list
                if items[0].account_title == self.prev_item.account_title:  # FIX: Access first item
                    items[0].continued = True  # This line is redundant but corrected
        
        # Update previous item tracking
        self.prev_item = items[-1] if items else None


    def _advance_section(self):
        if self.current_slide_index == 0:
            # Move from Slide 0 to Slide 1 _L
            self.current_slide_index = 1
            self.current_section = 'b'
        else:
            if self.current_section == 'b':
                # Switch to _R on same slide
                self.current_section = 'c'
            else:
                # Move to next slide's _L
                self.current_slide_index += 1
                self.current_section = 'b'
                
    def _detect_unused_slides(self, distribution):
        """Detect which slides are not used based on content distribution"""
        used_slides = set()
        total_sections = 0
        
        for slide_idx, section, section_items in distribution:
            used_slides.add(slide_idx)
            total_sections += 1
        
        # Check conditions for slide removal
        max_slide_used = max(used_slides) if used_slides else 0
        remove_slides = []
        
        # Condition 1: If total sections <= 5, we only need 2 slides max
        if total_sections <= 5:
            for slide_idx in range(2, len(self.prs.slides)):
                remove_slides.append(slide_idx)
        else:
            # Condition 2: Remove slides 3,4 if they're not used
            for slide_idx in range(2, len(self.prs.slides)):
                if slide_idx not in used_slides:
                    remove_slides.append(slide_idx)
        
        return sorted(remove_slides, reverse=True)  # Remove from end to avoid index shifting
    
    def _remove_slides(self, slide_indices):
        """Remove slides by indices (must be in descending order)"""
        for slide_idx in slide_indices:
            if slide_idx < len(self.prs.slides):
                # Method 1: Using _sldIdLst (more reliable)
                xml_slides = self.prs.slides._sldIdLst
                slides = list(xml_slides)
                
                # Remove relationship
                rId = slides[slide_idx].rId
                self.prs.part.drop_rel(rId)
                
                # Remove from slide list
                xml_slides.remove(slides[slide_idx])
                
                logging.info(f"Removed slide {slide_idx + 1}")


# Usage
if __name__ == "__main__":
    generator = PowerPointGenerator("template.pptx")
    
    try:
        generator.generate(md_content, "financial_report.pptx")
    except Exception as e:
        logging.error(f"Generation failed: {str(e)}")


INFO:root:Removing unused slides: [4, 3]
INFO:root:Removed slide 4
INFO:root:Removed slide 3
INFO:root:Successfully generated PowerPoint with 2 slides
