import asyncio
import tempfile
import os
from weakref import WeakValueDictionary
from . import _openxlsx
from ._openxlsx import XLProperty, XLLineStyle, XLPatternType
from .worksheet import Worksheet
from .styles import Style
class DocumentProperties:
"""
High-level wrapper for Excel document properties.
Supports both Core and App/Extended properties.
"""
_PROPERTY_MAP = {
"title": XLProperty.Title,
"subject": XLProperty.Subject,
"creator": XLProperty.Creator,
"keywords": XLProperty.Keywords,
"description": XLProperty.Description,
"last_modified_by": XLProperty.LastModifiedBy,
"last_printed": XLProperty.LastPrinted,
"created": XLProperty.CreationDate,
"modified": XLProperty.ModificationDate,
"category": XLProperty.Category,
"application": XLProperty.Application,
"doc_security": XLProperty.DocSecurity,
"scale_crop": XLProperty.ScaleCrop,
"manager": XLProperty.Manager,
"company": XLProperty.Company,
"links_up_to_date": XLProperty.LinksUpToDate,
"shared_doc": XLProperty.SharedDoc,
"hyperlink_base": XLProperty.HyperlinkBase,
"hyperlinks_changed": XLProperty.HyperlinksChanged,
"app_version": XLProperty.AppVersion,
}
def __init__(self, doc):
self._doc = doc
def __getitem__(self, key):
if isinstance(key, XLProperty):
return self._doc.property(key)
prop = self._PROPERTY_MAP.get(key.lower().replace(" ", "_"))
if prop is not None:
return self._doc.property(prop)
# Fallback to string-based lookup in AppProperties (most flexible)
return self._doc.app_properties().property(key)
def __setitem__(self, key, value):
if isinstance(key, XLProperty):
self._doc.set_property(key, str(value))
return
prop = self._PROPERTY_MAP.get(key.lower().replace(" ", "_"))
if prop is not None:
self._doc.set_property(prop, str(value))
else:
# Fallback to string-based set in AppProperties
self._doc.app_properties().set_property(key, str(value))
def __delitem__(self, key):
if isinstance(key, XLProperty):
self._doc.delete_property(key)
return
prop = self._PROPERTY_MAP.get(key.lower().replace(" ", "_"))
if prop is not None:
self._doc.delete_property(prop)
else:
self._doc.app_properties().delete_property(key)
@property
def title(self):
return self[XLProperty.Title]
@title.setter
def title(self, value):
self[XLProperty.Title] = value
@property
def creator(self):
return self[XLProperty.Creator]
@creator.setter
def creator(self, value):
self[XLProperty.Creator] = value
@property
def last_modified_by(self):
return self[XLProperty.LastModifiedBy]
@last_modified_by.setter
def last_modified_by(self, value):
self[XLProperty.LastModifiedBy] = value
@property
def subject(self):
return self[XLProperty.Subject]
@subject.setter
def subject(self, value):
self[XLProperty.Subject] = value
@property
def description(self):
return self[XLProperty.Description]
@description.setter
def description(self, value):
self[XLProperty.Description] = value
@property
def keywords(self):
return self[XLProperty.Keywords]
@keywords.setter
def keywords(self, value):
self[XLProperty.Keywords] = value
@property
def category(self):
return self[XLProperty.Category]
@category.setter
def category(self, value):
self[XLProperty.Category] = value
@property
def company(self):
return self[XLProperty.Company]
@company.setter
def company(self, value):
self[XLProperty.Company] = value
class CustomProperties:
"""
Proxy for custom document properties.
"""
def __init__(self, doc):
self._doc = doc
def __getitem__(self, name):
return self._doc.custom_property(name)
def __setitem__(self, name, value):
self._doc.set_custom_property(name, str(value))
def __delitem__(self, name):
self._doc.delete_custom_property(name)
def __contains__(self, name):
try:
val = self._doc.custom_property(name)
return val != ""
except Exception:
return False
[docs]
class Workbook:
"""
Represents an Excel workbook.
Uses WeakValueDictionary for worksheet caching to allow garbage collection
of Worksheet objects when they are no longer referenced elsewhere.
"""
def __init__(self, filename=None, force_overwrite=True, password=None):
self._doc = _openxlsx.XLDocument()
self._temp_file = None # Track temp file for cleanup
if filename:
if password is not None:
self._doc.open(str(filename), password)
else:
self._doc.open(str(filename))
self._filename = str(filename)
else:
# Use a temporary file to avoid polluting the current directory
# OpenXLSX's create() writes to disk immediately
fd, temp_path = tempfile.mkstemp(suffix=".xlsx", prefix="pyopenxlsx_")
os.close(fd) # Close the file descriptor, XLDocument will manage the file
self._temp_file = temp_path
self._doc.create(temp_path, force_overwrite)
self._filename = None
self._wb = self._doc.workbook()
# Use WeakValueDictionary to avoid keeping Worksheet objects alive indefinitely
# Worksheets will be garbage collected when no external references remain
self._sheets = WeakValueDictionary()
self._styles = None
self._date_format_cache = {}
@property
def has_macro(self):
"""Check if the loaded document contains a VBA macro project."""
return self._doc.has_macro()
[docs]
def save(self, filename=None, force_overwrite=True, password=None):
if filename:
if password is not None:
self._doc.save_as(str(filename), force_overwrite, password)
else:
self._doc.save_as(str(filename), force_overwrite)
elif self._filename:
# OpenXLSX's save() doesn't take a password, but save_as does
if password is not None:
self._doc.save_as(self._filename, force_overwrite, password)
else:
self._doc.save()
else:
raise ValueError("No filename specified")
[docs]
async def save_async(self, filename=None, force_overwrite=True, password=None):
await asyncio.to_thread(self.save, filename, force_overwrite, password)
[docs]
def close(self):
self._doc.close()
# Clean up temporary file if it was created
if self._temp_file and os.path.exists(self._temp_file):
try:
os.unlink(self._temp_file)
except OSError:
pass # Ignore errors during cleanup
self._temp_file = None
[docs]
async def close_async(self):
await asyncio.to_thread(self.close)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close_async()
@property
def styles(self):
if self._styles is None:
self._styles = self._doc.styles()
return self._styles
@property
def defined_names(self):
"""
Access the collection of defined names (named ranges) in the workbook.
"""
return self._wb.defined_names()
@property
def properties(self):
if not hasattr(self, "_properties"):
self._properties = DocumentProperties(self._doc)
return self._properties
@property
def custom_properties(self):
"""
Get the custom document properties.
"""
if not hasattr(self, "_custom_properties"):
self._custom_properties = CustomProperties(self._doc)
return self._custom_properties
[docs]
def add_style(
self,
font=None,
fill=None,
border=None,
alignment=None,
number_format=None,
protection=None,
):
style_obj = None
if isinstance(font, Style):
style_obj = font
font = style_obj.font
fill = style_obj.fill
border = style_obj.border
alignment = style_obj.alignment
number_format = style_obj.number_format
protection = style_obj.protection
# Create a new cell format entry (default)
index = self.styles.cell_formats().create()
xf = self.styles.cell_formats().cell_format_by_index(index)
if font is not None:
if isinstance(font, int):
xf.set_font_index(font)
else:
fonts = self.styles.fonts()
idx = fonts.create()
target_font = fonts.font_by_index(idx)
target_font.set_name(font.name())
target_font.set_size(font.size())
target_font.set_bold(font.bold())
target_font.set_italic(font.italic())
# TODO: Handle underline, etc. if added to Font class
if font.color():
target_font.set_color(font.color())
xf.set_font_index(idx)
xf.set_apply_font(True)
if fill is not None:
if isinstance(fill, int):
xf.set_fill_index(fill)
else:
fills = self.styles.fills()
idx = fills.create()
target_fill = fills.fill_by_index(idx)
# Check for None pattern
p_type = fill.pattern_type()
if p_type != getattr(XLPatternType, "None"):
target_fill.set_pattern_type(p_type)
if fill.color():
target_fill.set_color(fill.color())
if fill.background_color():
target_fill.set_background_color(fill.background_color())
xf.set_fill_index(idx)
xf.set_apply_fill(True)
if border is not None:
if isinstance(border, int):
xf.set_border_index(border)
else:
borders = self.styles.borders()
idx = borders.create()
target_border = borders.border_by_index(idx)
line_none = getattr(XLLineStyle, "None")
left_side = border.left()
if left_side and left_side.style() and left_side.style() != line_none:
target_border.set_left(left_side.style(), left_side.color())
r = border.right()
if r and r.style() and r.style() != line_none:
target_border.set_right(r.style(), r.color())
t = border.top()
if t and t.style() and t.style() != line_none:
target_border.set_top(t.style(), t.color())
b = border.bottom()
if b and b.style() and b.style() != line_none:
target_border.set_bottom(b.style(), b.color())
d = border.diagonal()
if d and d.style() and d.style() != line_none:
target_border.set_diagonal(d.style(), d.color())
xf.set_border_index(idx)
xf.set_apply_border(True)
if alignment:
target_align = xf.alignment(True)
if alignment.horizontal():
target_align.set_horizontal(alignment.horizontal())
if alignment.vertical():
target_align.set_vertical(alignment.vertical())
target_align.set_wrap_text(alignment.wrap_text())
xf.set_apply_alignment(True)
if number_format:
if isinstance(number_format, int):
# Assume it's a numberFormatId
xf.set_number_format_id(number_format)
elif isinstance(number_format, str):
# Check if this format code already exists
nfs = self.styles.number_formats()
found = False
target_id = 0
count = nfs.count()
for i in range(count):
nf = nfs.number_format_by_index(i)
if nf.format_code() == number_format:
target_id = nf.number_format_id()
found = True
break
if found:
xf.set_number_format_id(target_id)
else:
# Create new custom format
max_id = 163
for i in range(count):
nf = nfs.number_format_by_index(i)
if nf.number_format_id() > max_id:
max_id = nf.number_format_id()
new_id = max_id + 1
# Create new empty number format entry
nfs.create()
# Retrieve it (assume appended)
nf = nfs.number_format_by_index(nfs.count() - 1)
nf.set_number_format_id(new_id)
nf.set_format_code(number_format)
xf.set_number_format_id(new_id)
xf.set_apply_number_format(True)
if protection:
target_prot = xf
if hasattr(protection, "locked"):
target_prot.set_locked(protection.locked)
if hasattr(protection, "hidden"):
target_prot.set_hidden(protection.hidden)
xf.set_apply_protection(True)
if style_obj:
style_obj.style_index = index
return index
[docs]
async def add_style_async(
self,
font=None,
fill=None,
border=None,
alignment=None,
number_format=None,
protection=None,
):
return await asyncio.to_thread(
self.add_style, font, fill, border, alignment, number_format, protection
)
@property
def workbook(self):
return self._wb
@property
def active(self):
try:
names = self.sheetnames
for name in names:
ws = self.workbook.worksheet(name)
if ws.is_active():
return self[name] # Use cache via __getitem__
except Exception:
pass
if self.sheetnames:
return self[self.sheetnames[0]]
return None
@active.setter
def active(self, ws):
if not isinstance(ws, Worksheet):
raise TypeError("Must be a Worksheet object")
self.workbook.clear_active_tab()
ws._sheet.set_active()
[docs]
def create_sheet(self, title=None, index=None):
if title is None:
i = 1
while f"Sheet{i}" in self.sheetnames:
i += 1
title = f"Sheet{i}"
self.workbook.add_worksheet(title)
ws = self[title]
if index is not None:
ws._sheet.set_index(index + 1)
return ws
[docs]
async def create_sheet_async(self, title=None, index=None):
return await asyncio.to_thread(self.create_sheet, title, index)
[docs]
def remove(self, worksheet):
self.workbook.delete_sheet(worksheet.title)
[docs]
async def remove_async(self, worksheet):
await asyncio.to_thread(self.remove, worksheet)
[docs]
def copy_worksheet(self, from_worksheet):
new_name = f"{from_worksheet.title} Copy"
i = 1
while new_name in self.sheetnames:
new_name = f"{from_worksheet.title} Copy{i}"
i += 1
self.workbook.clone_sheet(from_worksheet.title, new_name)
return self[new_name]
[docs]
async def copy_worksheet_async(self, from_worksheet):
return await asyncio.to_thread(self.copy_worksheet, from_worksheet)
@property
def sheetnames(self):
return list(self.workbook.worksheet_names())
def __getitem__(self, key):
if key in self._sheets:
return self._sheets[key]
if self.workbook.sheet_exists(key):
ws = Worksheet(self.workbook.worksheet(key), self)
self._sheets[key] = ws
return ws
raise KeyError(f"Worksheet {key} does not exist")
def __delitem__(self, key):
if self.workbook.sheet_exists(key):
self.workbook.delete_sheet(key)
if key in self._sheets:
del self._sheets[key]
else:
raise KeyError(f"Worksheet {key} does not exist")
def __iter__(self):
for name in self.sheetnames:
yield self[name]
def __len__(self):
return self.workbook.sheet_count()
def __contains__(self, key):
return self.workbook.sheet_exists(key)
[docs]
def get_archive_entries(self):
"""
Get a list of all files/directories in the underlying zip archive.
Returns:
list[str]: List of entry paths.
"""
return self._doc.get_archive_entries()
[docs]
def has_archive_entry(self, path):
"""
Check if the underlying zip archive contains an entry with the given path.
Args:
path: Path in the archive (e.g., 'xl/workbook.xml').
Returns:
bool: True if the entry exists.
"""
return self._doc.has_archive_entry(path)
[docs]
def get_archive_entry(self, path):
"""
Get the raw bytes of an entry from the underlying zip archive.
Args:
path: Path in the archive (e.g., 'xl/workbook.xml').
Returns:
bytes: The raw binary data of the entry.
Raises:
RuntimeError: If the entry is not found in the archive.
"""
return self._doc.get_archive_entry(path)
[docs]
def get_embedded_images(self):
"""
Get a list of all embedded images in the workbook.
Returns:
list[ImageInfo]: List of ImageInfo objects with name, path, and extension attributes.
Example:
>>> wb = Workbook("test.xlsx")
>>> images = wb.get_embedded_images()
>>> for img in images:
... print(f"Found image: {img.name}")
"""
return self._doc.get_embedded_images()
[docs]
def get_image_data(self, image_path_or_name):
"""
Get the binary data of an embedded image.
Args:
image_path_or_name: Either the full path (e.g., 'xl/media/image1.png')
or just the filename (e.g., 'image1.png')
Returns:
bytes: The raw binary data of the image.
Raises:
RuntimeError: If the image is not found in the archive.
Example:
>>> wb = Workbook("test.xlsx")
>>> images = wb.get_embedded_images()
>>> if images:
... data = wb.get_image_data(images[0].name)
... with open("extracted_image.png", "wb") as f:
... f.write(data)
"""
return self._doc.get_image_data(image_path_or_name)
def __del__(self):
# Ensure temporary file is cleaned up even if close() was not called
if hasattr(self, "_temp_file") and self._temp_file:
try:
os.unlink(self._temp_file)
except (OSError, FileNotFoundError):
pass
[docs]
def load_workbook(filename, password=None):
return Workbook(filename, password=password)
[docs]
async def load_workbook_async(filename, password=None):
return await asyncio.to_thread(load_workbook, filename, password)