Source code for cocopack.figure_ops

import os, re
import glob
import subprocess
import platform
from copy import copy
from PIL import Image, ImageChops

__all__ = [
    'slides_to_images',
    'convert_to_pdf',
    'convert_images_to_pdf',
    'mogrify_images_to_pdf',
]

# Core Functions ------------------------------------------------------------


[docs]
def slides_to_images(input_path, output_path, filename_format='figure{:01d}.png',
                     crop_images=True, margin_size='1cm', dpi=300):
    """Convert presentation slides to image files.
    
    Args:
        input_path (str): Path to the presentation file (.ppt, .pptx, or .key).
        output_path (str): Directory path where the images will be saved.
        filename_format (str, optional): Format string for the output filenames. Defaults to 'figure{:01d}.png'.
        crop_images (bool, optional): Whether to crop whitespace around images. Defaults to True.
        margin_size (str, optional): Margin size to add around cropped images. Defaults to '1cm'.
        dpi (int, optional): DPI for the output images. Defaults to 300.
    """
    input_ext = _check_slides_extension(input_path)

    if input_ext in ['.ppt', '.pptx']:
        powerpoint_to_images(input_path, output_path, filename_format)

    if input_ext == '.key':
        keynote_to_images(input_path, output_path, filename_format)

    if crop_images:
        crop_whitespace(output_path, margin_size=margin_size, dpi=dpi)



[docs]
def keynote_to_images(input_path, output_path, filename_format='figure{:01d}.png'):
    """Convert Keynote slides to image files using AppleScript.
    
    Args:
        input_path (str): Path to the Keynote file.
        output_path (str): Directory path where the images will be saved.
        filename_format (str, optional): Format string for the output filenames. Defaults to 'figure{:01d}.png'.
    
    Note:
        This function only works on macOS systems with Keynote installed.
        Source: https://iworkautomation.com/keynote/document-export.html
    """
    input_path = os.path.abspath(input_path)
    output_path = os.path.abspath(output_path)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    applescript = f'''
    tell application "Keynote"
        set theDocument to open "{input_path}"
        set documentName to the name of theDocument
        set targetFolderHFSPath to POSIX file "{output_path}" as string

        export theDocument as slide images to file targetFolderHFSPath with properties {{image format:PNG, skipped slides:FALSE}}
    end tell
    '''
    
    subprocess.run(['osascript', '-e', applescript])

    if filename_format:
        reformat_image_filenames(output_path, filename_format)



[docs]
def powerpoint_to_images(input_path, output_path, filename_format='figure{:01d}.png'):
    """Convert PowerPoint slides to image files.
    
    Args:
        input_path (str): Path to the PowerPoint file (.ppt or .pptx).
        output_path (str): Directory path where the images will be saved.
        filename_format (str, optional): Format string for the output filenames. Defaults to 'figure{:01d}.png'.
    
    Note:
        This function uses different methods depending on the operating system:
        - On macOS: Uses AppleScript with PowerPoint
        - On Windows: Uses win32com.client
        - On other platforms: Attempts to use LibreOffice or python-pptx
    """
    input_path = os.path.abspath(input_path)
    output_path = os.path.abspath(output_path)
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    if platform.system() == 'Darwin':  # macOS
        applescript = f'''
        tell application "Microsoft PowerPoint"
            open "{input_path}"
            set thePresentation to active presentation
            
            set slideCount to count of slides in thePresentation
            repeat with i from 1 to slideCount
                set current slide of thePresentation to slide i of thePresentation
                set slideFile to "{output_path}/Slide" & i & ".png"
                save thePresentation in slideFile as save as PNG
            end repeat
            
            close thePresentation saving no
        end tell
        '''
        subprocess.run(['osascript', '-e', applescript])
    
    elif platform.system() == 'Windows':
        try:
            import win32com.client
            
            # Initialize PowerPoint application
            ppt = win32com.client.Dispatch("PowerPoint.Application")
            ppt.Visible = True
            
            # Open the presentation
            presentation = ppt.Presentations.Open(input_path)
            
            # Export slides as images
            for i in range(1, presentation.Slides.Count + 1):
                slide_path = os.path.join(output_path, f"Slide{i}.png")
                presentation.Slides(i).Export(slide_path, "PNG")
            
            # Close presentation without saving changes
            presentation.Close()
            ppt.Quit()
            
        except ImportError:
            print("Error: win32com is required for Windows. Install with 'pip install pywin32'")
            return
        except Exception as e:
            print(f"Error exporting PowerPoint slides: {e}")
            return
    
    else:
        try:
            from pptx import Presentation
            
            # This is a limited fallback as python-pptx doesn't directly support exporting slides as images
            # For full functionality, consider using LibreOffice CLI in a subprocess
            print("Using python-pptx for basic PowerPoint handling. For full slide export, use Windows or macOS.")
            
            # For Linux/other platforms, can use LibreOffice command line:
            # subprocess.run(['soffice', '--headless', '--convert-to', 'png', '--outdir', output_path, input_path])
            
            # Example LibreOffice conversion (uncomment if LibreOffice is available)
            libreoffice_cmd = ['soffice', '--headless', '--convert-to', 'png', '--outdir', output_path, input_path]
            try:
                subprocess.run(libreoffice_cmd, check=True)
            except subprocess.CalledProcessError:
                print("Warning: LibreOffice conversion failed. Limited functionality available.")
                print("Install LibreOffice for better platform-independent conversion.")
            
        except ImportError:
            print("Error: pptx package is required. Install with 'pip install python-pptx'")
            return
    
    if filename_format:
        reformat_image_filenames(output_path, filename_format)


# Helper Functions ------------------------------------------------------------

def _check_slides_extension(input_path):
    input_ext = os.path.splitext(input_path)[1]

    if input_ext not in ['.key', '.ppt', '.pptx']:
        raise ValueError(f"Unsupported file extension: {input_ext}",
                         "Supported extensions: .key, .ppt, .pptx")
    
    return input_ext

def reformat_image_filenames(output_path, reformat_pattern):
    """Rename image files based on a specified pattern.
    
    Args:
        output_path (str): Directory containing the image files.
        reformat_pattern (str): Format string for the new filenames (e.g., 'figure{:01d}.png').
            The format string should contain a placeholder for the slide number.
    """
    image_files = glob.glob(os.path.join(output_path, '*.png'))
    
    for image_file in image_files:
        basename = os.path.basename(image_file)
        slide_number = re.search(r'\d+', basename).group(0)
        new_filename = reformat_pattern.format(int(slide_number))
        new_filepath = os.path.join(output_path, new_filename)
        os.rename(image_file, new_filepath)


[docs]
def crop_whitespace(image_path, output_path=None, margin_size='1cm', dpi=300):
    """Crop whitespace around images and add a specified margin.
    
    Args:
        image_path (str): Path to an image file or a directory containing image files.
        output_path (str, optional): Path where the cropped images will be saved. 
            If None, overwrites the original files. Defaults to None.
        margin_size (str, optional): Margin size to add around cropped images in cm. Defaults to '1cm'.
        dpi (int, optional): DPI for the output images, used for margin calculation. Defaults to 300.
    """
    def add_margin(image, margin_pixels):
        width, height = image.size
        new_width = width + 2 * margin_pixels
        new_height = height + 2 * margin_pixels
        new_image = Image.new("RGBA", (new_width, new_height), (255, 255, 255, 255))
        new_image.paste(image, (margin_pixels, margin_pixels))
        return new_image

    def crop_single_image(source_file, output_file):
        image = Image.open(source_file)
        image = image.convert("RGBA")

        # Remove alpha channel by pasting the image onto a white background
        background = Image.new("RGBA", image.size, (255, 255, 255, 255))
        background.paste(image, mask=image.split()[3])
        image_rgb = background.convert("RGB")

        # Find the bounding box and crop the image
        difference = ImageChops.difference(image_rgb, Image.new("RGB", image.size, (255, 255, 255)))
        bounds = difference.getbbox()
        cropped_image = image.crop(bounds)

        # Add margin if specified
        if margin_size:
            margin_cm = float(margin_size.strip('cm'))
            margin_pixels = int(margin_cm * dpi / 2.54)  # Convert cm to pixels
            cropped_image = add_margin(cropped_image, margin_pixels)

        cropped_image.save(output_file)

    if os.path.isdir(image_path):
        for filename in os.listdir(image_path):
            if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                source_file = os.path.join(image_path, filename)
                if output_path:
                    output_file = os.path.join(output_path, filename)
                else:
                    output_file = source_file
                crop_single_image(source_file, output_file)
    else:
        if output_path is None:
            output_path = image_path
        crop_single_image(image_path, output_path)



[docs]
def convert_to_pdf(image_path, output_path=None, dpi=300, **kwargs):
    """Convert {PNG, JPEG, TIFF} images to high-quality PDF files.
    
    Args:
        image_path (str): Path to an image file or a directory containing image files.
        output_path (str, optional): Path where the PDF files will be saved.
            If None, uses the same location as the input. Defaults to None.
        dpi (int, optional): DPI for the output PDF files. Defaults to 300.
        **kwargs: Additional keyword arguments.
            pdf_only (bool): If True, removes the original image files. Defaults to False.
    
    Returns:
        None
    """
    if output_path is None:
        output_path = copy(image_path)
    
    if os.path.isdir(image_path):
        for filename in os.listdir(image_path):
            if filename.lower().endswith('.png', '.jpg', '.jpeg', '.tiff', '.tif'):
                source_file = os.path.join(image_path, filename)
                output_file = os.path.join(output_path, os.path.splitext(filename)[0] + '.pdf')
                print(f'Converting {source_file} to {output_file}...')
                image = Image.open(source_file)
                # Convert to RGB mode if necessary
                if image.mode in ('RGBA', 'LA'):
                    background = Image.new('RGB', image.size, (255, 255, 255))
                    background.paste(image, mask=image.split()[-1])
                    image = background
                image.save(output_file, 'PDF', resolution=dpi)
    else:
        output_file = os.path.splitext(output_path)[0] + '.pdf'
        image = Image.open(image_path)
        if image.mode in ('RGBA', 'LA'):
            background = Image.new('RGB', image.size, (255, 255, 255))
            background.paste(image, mask=image.split()[-1])
            image = background
        image.save(output_file, 'PDF', resolution=dpi)

    if kwargs.get('pdf_only', False):
        os.remove(image_path)



[docs]
def convert_images_to_pdf(input_path, dpi=300, **kwargs):
    """Convert all {PNG, JPEG, TIFF} images in a directory and its subdirectories to PDF files.
    
    Args:
        input_path (str): Path to the directory containing {PNG, JPEG, TIFF} images.
        dpi (int, optional): DPI for the output PDF files. Defaults to 300.
        **kwargs: Additional keyword arguments passed to convert_to_pdf.
            pdf_only (bool): If True, removes the original image files. Defaults to False.
    """
    image_exts = ['.png', '.jpg', '.jpeg', '.tiff', '.tif']
    image_files = glob.glob(os.path.join(input_path, f'**/*.{",".join(image_exts)}'), recursive=True)
    for image_file in image_files:
        convert_to_pdf(image_file, None, dpi, **kwargs)



[docs]
def mogrify_images_to_pdf(input_path, **kwargs):
    """Convert {PNG, JPEG, TIFF} images to PDF using ImageMagick's mogrify command.
    
    Args:
        input_path (str): Path to the directory containing {PNG, JPEG, TIFF} images.
        **kwargs: Additional keyword arguments.
            pdf_only (bool): If True, removes the original image files. Defaults to False.
            
    Note:
        This function requires ImageMagick to be installed on the system.
    """
    image_exts = ['.png', '.jpg', '.jpeg', '.tiff', '.tif']
    image_files = glob.glob(os.path.join(input_path, f'**/*.{",".join(image_exts)}'), recursive=True)
    for image_file in image_files:
        subprocess.run(['mogrify', '-format', 'pdf', '-quality', '100', '-density', '300', image_file])

    if kwargs.get('pdf_only', False):
        for image_file in image_files:
            os.remove(image_file)