taulu
Taulu - segment tables from images
Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.
Here is an example python script of how to use Taulu:
from taulu import Taulu
import os
def setup():
# create an Annotation file of the headers in the image
# (one for the left header, one for the right)
# and store them in the examples directory
print("Annotating the LEFT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
print("Annotating the RIGHT header...")
Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
def main():
taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png"))
table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True)
table.show_cells("../data/table_00.png")
if __name__ == "__main__":
if os.path.exists("table_00_header_left.png") and os.path.exists(
"table_00_header_right.png"
):
main()
else:
setup()
main()
If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class
1""" 2Taulu - *segment tables from images* 3 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells). 5 6To use this package, you first need to make an annotation of the headers in your table images. 7The idea is that these headers will be similar across your full set of images, and they will be 8used as a starting point for the search algorithm that finds the table grid. 9 10Here is an example python script of how to use Taulu: 11```python 12from taulu import Taulu 13import os 14 15 16def setup(): 17 # create an Annotation file of the headers in the image 18 # (one for the left header, one for the right) 19 # and store them in the examples directory 20 print("Annotating the LEFT header...") 21 Taulu.annotate("../data/table_00.png", "table_00_header_left.png") 22 23 print("Annotating the RIGHT header...") 24 Taulu.annotate("../data/table_00.png", "table_00_header_right.png") 25 26 27def main(): 28 taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png")) 29 table = taulu.segment_table("../data/table_00.png", cell_height_factor=0.8, debug_view=True) 30 31 table.show_cells("../data/table_00.png") 32 33 34if __name__ == "__main__": 35 if os.path.exists("table_00_header_left.png") and os.path.exists( 36 "table_00_header_right.png" 37 ): 38 main() 39 else: 40 setup() 41 main() 42 43``` 44 45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu) 46""" 47 48from .grid import GridDetector, TableGrid 49from .header_aligner import HeaderAligner 50from .header_template import HeaderTemplate 51from .table_indexer import TableIndexer 52from .split import Split 53from .taulu import Taulu 54 55__pdoc__ = {} 56__pdoc__["constants"] = False 57__pdoc__["main"] = False 58__pdoc__["decorators"] = False 59__pdoc__["error"] = False 60__pdoc__["types"] = False 61__pdoc__["img_util"] = False 62 63__all__ = [ 64 "GridDetector", 65 "TableGrid", 66 "HeaderAligner", 67 "HeaderTemplate", 68 "TableIndexer", 69 "Split", 70 "Taulu", 71] 72 73try: 74 from . import gpu 75 76 __all__.append("gpu") 77except ImportError: 78 pass
120class GridDetector: 121 """ 122 Detects table grid intersections using morphological filtering and template matching. 123 124 This detector implements a multi-stage pipeline: 125 126 1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting 127 2. **Morphological operations**: Dilation to connect broken rule segments 128 3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find 129 rule intersections where horizontal and vertical lines meet 130 4. **Grid growing**: Iterative point detection starting from a known seed point 131 132 The cross-kernel is designed to match the specific geometry of your table rules. 133 It should be sized so that after morphology, it aligns with actual corner shapes. 134 135 ## Tuning Guidelines 136 137 - **kernel_size**: Increase if you need more selectivity (fewer false positives) 138 - **cross_width/height**: Should match rule thickness after morphology 139 - **morph_size**: Increase to connect more broken lines, but this thickens rules 140 - **sauvola_k**: Increase to threshold more aggressively (remove noise) 141 - **search_region**: Increase for documents with more warping/distortion 142 - **distance_penalty**: Increase to prefer corners closer to expected positions 143 144 ## Visual Debugging 145 146 Set `visual=True` in methods to see intermediate results and tune parameters. 147 """ 148 149 def __init__( 150 self, 151 kernel_size: int = 21, 152 cross_width: int = 6, 153 cross_height: Optional[int] = None, 154 morph_size: Optional[int] = None, 155 sauvola_k: float = 0.04, 156 sauvola_window: int = 15, 157 scale: float = 1.0, 158 search_region: int = 40, 159 distance_penalty: float = 0.4, 160 skip_astar_threshold: float = 0.2, 161 min_rows: int = 5, 162 grow_threshold: float = 0.3, 163 look_distance: int = 4, 164 cuts: int = 3, 165 cut_fraction: float = 0.5, 166 ): 167 """ 168 Args: 169 kernel_size (int): the size of the cross kernel 170 a larger kernel size often means that more penalty is applied, often leading 171 to more sparse results 172 cross_width (int): the width of one of the edges in the cross filter, should be 173 roughly equal to the width of the rules in the image after morphology is applied 174 cross_height (int | None): useful if the horizontal rules and vertical rules 175 have different sizes 176 morph_size (int | None): the size of the morphology operators that are applied before 177 the cross kernel. 'bridges the gaps' of broken-up lines 178 sauvola_k (float): threshold parameter for sauvola thresholding 179 sauvola_window (int): window_size parameter for sauvola thresholding 180 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 181 search_region (int): area in which to search for a new max value in `find_nearest` etc. 182 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 183 skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding 184 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 185 grow_threshold (float): the threshold for accepting a new point when growing the table 186 look_distance (int): how many points away to look when calculating the median slope 187 cuts (int): The amount of cuts (large deletions) to do in the grid during table growing 188 cut_fraction (float): The portion of the already-chosen corner points to delete during cutting 189 """ 190 self._validate_parameters( 191 kernel_size, 192 cross_width, 193 cross_height, 194 morph_size, 195 search_region, 196 sauvola_k, 197 sauvola_window, 198 distance_penalty, 199 skip_astar_threshold, 200 cuts, 201 cut_fraction, 202 ) 203 204 self._kernel_size = kernel_size 205 self._cross_width = cross_width 206 self._cross_height = cross_width if cross_height is None else cross_height 207 self._morph_size = morph_size if morph_size is not None else cross_width 208 self._search_region = search_region 209 self._sauvola_k = sauvola_k 210 self._sauvola_window = sauvola_window 211 self._distance_penalty = distance_penalty 212 self._scale = scale 213 self._skip_astar_threshold = skip_astar_threshold 214 self._min_rows = min_rows 215 self._grow_threshold = grow_threshold 216 self._look_distance = look_distance 217 self._cuts = cuts 218 self._cut_fraction = cut_fraction 219 220 self._cross_kernel = self._create_cross_kernel() 221 222 def _validate_parameters( 223 self, 224 kernel_size: int, 225 cross_width: int, 226 cross_height: Optional[int], 227 morph_size: Optional[int], 228 search_region: int, 229 sauvola_k: float, 230 sauvola_window: int, 231 distance_penalty: float, 232 skip_astar_threshold: float, 233 cuts: int, 234 cut_fraction: float, 235 ) -> None: 236 """Validate initialization parameters.""" 237 if kernel_size % 2 == 0: 238 raise ValueError("kernel_size must be odd") 239 if ( 240 kernel_size <= 0 241 or cross_width <= 0 242 or search_region <= 0 243 or sauvola_window <= 0 244 ): 245 raise ValueError("Size parameters must be positive") 246 if cross_height is not None and cross_height <= 0: 247 raise ValueError("cross_height must be positive") 248 if morph_size is not None and morph_size <= 0: 249 raise ValueError("morph_size must be positive") 250 if not 0 <= distance_penalty <= 1: 251 raise ValueError("distance_penalty must be in [0, 1]") 252 if sauvola_k <= 0: 253 raise ValueError("sauvola_k must be positive") 254 if skip_astar_threshold < 0 or skip_astar_threshold > 1: 255 raise ValueError("skip_astar_threshold must be in [0, 1]") 256 if cut_fraction < 0 or cut_fraction > 1: 257 raise ValueError("cut_fraction must be in [0, 1]") 258 if cuts < 0: 259 raise ValueError("cuts must be zero or positive") 260 261 def _create_gaussian_weights(self, region_size: int) -> NDArray: 262 """ 263 Create a 2D Gaussian weight mask. 264 265 Args: 266 shape (tuple[int, int]): Shape of the region (height, width) 267 p (float): Minimum value at the edge = 1 - p 268 269 Returns: 270 NDArray: Gaussian weight mask 271 """ 272 if self._distance_penalty == 0: 273 return np.ones((region_size, region_size), dtype=np.float32) 274 275 y = np.linspace(-1, 1, region_size) 276 x = np.linspace(-1, 1, region_size) 277 xv, yv = np.meshgrid(x, y) 278 dist_squared = xv**2 + yv**2 279 280 # Prevent log(0) when distance_penalty is 1 281 if self._distance_penalty >= 0.999: 282 sigma = 0.1 # Small sigma for very sharp peak 283 else: 284 sigma = np.sqrt(-1 / (2 * np.log(1 - self._distance_penalty))) 285 286 weights = np.exp(-dist_squared / (2 * sigma**2)) 287 288 return weights.astype(np.float32) 289 290 def _create_cross_kernel(self) -> NDArray: 291 kernel = np.zeros((self._kernel_size, self._kernel_size), dtype=np.uint8) 292 center = self._kernel_size // 2 293 294 # Create horizontal bar 295 h_start = max(0, center - self._cross_height // 2) 296 h_end = min(self._kernel_size, center + (self._cross_height + 1) // 2) 297 kernel[h_start:h_end, :] = 255 298 299 # Create vertical bar 300 v_start = max(0, center - self._cross_width // 2) 301 v_end = min(self._kernel_size, center + (self._cross_width + 1) // 2) 302 kernel[:, v_start:v_end] = 255 303 304 return kernel 305 306 def _apply_morphology(self, binary: MatLike) -> MatLike: 307 # Define a horizontal kernel (adjust width as needed) 308 kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._morph_size, 1)) 309 kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._morph_size)) 310 311 # Apply dilation 312 dilated = cv.dilate(binary, kernel_hor, iterations=1) 313 dilated = cv.dilate(dilated, kernel_ver, iterations=1) 314 315 return dilated 316 317 def _apply_cross_matching(self, img: MatLike) -> MatLike: 318 """Apply cross kernel template matching.""" 319 pad_y = self._cross_kernel.shape[0] // 2 320 pad_x = self._cross_kernel.shape[1] // 2 321 322 padded = cv.copyMakeBorder( 323 img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0 324 ) 325 326 filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED) 327 # Invert and normalize to 0-255 range 328 filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX) 329 return filtered.astype(np.uint8) 330 331 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 332 """ 333 Apply the grid detection filter to the input image. 334 335 Args: 336 img (MatLike): the input image 337 visual (bool): whether to show intermediate steps 338 339 Returns: 340 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 341 """ 342 343 if img is None or img.size == 0: 344 raise ValueError("Input image is empty or None") 345 346 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 347 348 if visual: 349 imu.show(binary, title="thresholded") 350 351 binary = self._apply_morphology(binary) 352 353 if visual: 354 imu.show(binary, title="dilated") 355 356 filtered = self._apply_cross_matching(binary) 357 358 return filtered 359 360 @log_calls(level=logging.DEBUG, include_return=True) 361 def find_nearest( 362 self, filtered: MatLike, point: Point, region: Optional[int] = None 363 ) -> Tuple[Point, float]: 364 """ 365 Find the nearest 'corner match' in the image, along with its score [0,1] 366 367 Args: 368 filtered (MatLike): the filtered image (obtained through `apply`) 369 point (tuple[int, int]): the approximate target point (x, y) 370 region (None | int): alternative value for search region, 371 overwriting the `__init__` parameter `region` 372 """ 373 374 if filtered is None or filtered.size == 0: 375 raise ValueError("Filtered image is empty or None") 376 377 region_size = region if region is not None else self._search_region 378 x, y = point 379 380 # Calculate crop boundaries 381 crop_x = max(0, x - region_size // 2) 382 crop_y = max(0, y - region_size // 2) 383 crop_width = min(region_size, filtered.shape[1] - crop_x) 384 crop_height = min(region_size, filtered.shape[0] - crop_y) 385 386 # Handle edge cases 387 if crop_width <= 0 or crop_height <= 0: 388 logger.warning(f"Point {point} is outside image bounds") 389 return point, 0.0 390 391 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 392 393 if cropped.size == 0: 394 return point, 0.0 395 396 # Always apply Gaussian weighting by extending crop if needed 397 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 398 # Perfect size - apply weights directly 399 weights = self._create_gaussian_weights(region_size) 400 weighted = cropped.astype(np.float32) * weights 401 else: 402 # Extend crop to match region_size, apply weights, then restore 403 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 404 405 # Calculate offset to center the cropped region in extended array 406 offset_y = (region_size - cropped.shape[0]) // 2 407 offset_x = (region_size - cropped.shape[1]) // 2 408 409 # Place cropped region in center of extended array 410 extended[ 411 offset_y : offset_y + cropped.shape[0], 412 offset_x : offset_x + cropped.shape[1], 413 ] = cropped 414 415 # Apply Gaussian weights to extended array 416 weights = self._create_gaussian_weights(region_size) 417 weighted_extended = extended.astype(np.float32) * weights 418 419 # Extract the original region back out 420 weighted = weighted_extended[ 421 offset_y : offset_y + cropped.shape[0], 422 offset_x : offset_x + cropped.shape[1], 423 ] 424 425 best_idx = np.argmax(weighted) 426 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 427 428 result_point = ( 429 int(crop_x + best_x), 430 int(crop_y + best_y), 431 ) 432 result_confidence = float(weighted[best_y, best_x]) / 255.0 433 434 return result_point, result_confidence 435 436 def find_table_points( 437 self, 438 img: MatLike | PathLike[str], 439 top_row: list[Point | None], 440 cell_widths: list[int], 441 cell_heights: list[int] | int, 442 visual: bool = False, 443 window: str = WINDOW, 444 goals_width: Optional[int] = None, 445 filtered: Optional[MatLike | PathLike[str]] = None, 446 smooth: bool = False, 447 ) -> "TableGrid": 448 """ 449 Parse the image to a `TableGrid` structure that holds all of the 450 intersections between horizontal and vertical rules, starting near the `left_top` point 451 452 Args: 453 img (MatLike): the input image of a table 454 top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching) 455 cell_widths (list[int]): the expected widths of the cells (based on a header template) 456 cell_heights (list[int]): the expected height of the rows of data. 457 The last value from this list is used until the image has no more vertical space. 458 visual (bool): whether to show intermediate steps 459 window (str): the name of the OpenCV window to use for visualization 460 goals_width (int | None): the width of the goal region when searching for the next point. 461 If None, defaults to 1.5 * search_region 462 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 463 calculating the filtered image from scratch 464 smooth (bool): if True, smooth the grid after detection, using local heuristics 465 466 Returns: 467 a TableGrid object 468 """ 469 470 if goals_width is None: 471 goals_width = self._search_region * 3 // 2 472 473 if not cell_widths: 474 raise ValueError("cell_widths must contain at least one value") 475 476 if not isinstance(img, np.ndarray): 477 img = cv.imread(os.fspath(img)) 478 479 if filtered is None: 480 filtered = self.apply(img, visual) 481 else: 482 if not isinstance(filtered, np.ndarray): 483 filtered = cv.imread(os.fspath(filtered)) 484 485 filtered = ensure_gray(filtered) 486 487 if visual: 488 imu.show(filtered, window=window) 489 490 if isinstance(cell_heights, int): 491 cell_heights = [cell_heights] 492 493 for i in range(len(top_row)): 494 if top_row[i] is None: 495 continue 496 497 adjusted, confidence = self.find_nearest( 498 filtered, top_row[i], int(self._search_region * 2) 499 ) 500 501 if confidence < 0.15: 502 top_row[i] = None 503 else: 504 top_row[i] = adjusted 505 506 if not any(top_row): 507 logger.error("No good starting candidates given") 508 509 # resize all parameters according to scale 510 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 511 512 if visual: 513 imu.push(img) 514 515 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 516 cell_widths = [int(w * self._scale) for w in cell_widths] 517 cell_heights = [int(h * self._scale) for h in cell_heights] 518 top_row = [ 519 (int(p[0] * self._scale), int(p[1] * self._scale)) 520 if p is not None 521 else None 522 for p in top_row 523 ] 524 search_region = int(self._search_region * self._scale) 525 526 img_gray = ensure_gray(img) 527 filtered_gray = ensure_gray(filtered) 528 529 table_grower = TableGrower( 530 filtered_gray, 531 cell_widths, # pyright: ignore 532 cell_heights, # pyright: ignore 533 top_row, 534 search_region, 535 self._distance_penalty, 536 self._look_distance, 537 self._grow_threshold, 538 self._skip_astar_threshold, 539 self._min_rows, 540 self._cuts, 541 self._cut_fraction, 542 ) 543 544 def show_grower_progress(wait: bool = False): 545 img_orig = np.copy(img) 546 corners = table_grower.get_all_corners() 547 for y in range(len(corners)): 548 for x in range(len(corners[y])): 549 if corners[y][x] is not None: 550 img_orig = imu.draw_points( 551 img_orig, 552 [corners[y][x]], 553 color=(0, 0, 255), 554 thickness=30, 555 ) 556 557 edge = table_grower.get_edge_points() 558 559 for point, score in edge: 560 color = (100, int(clamp(score * 255, 0, 255)), 100) 561 imu.draw_point(img_orig, point, color=color, thickness=20) 562 563 imu.show(img_orig, wait=wait) 564 565 if visual: 566 threshold = self._grow_threshold 567 look_distance = self._look_distance 568 569 # python implementation of rust loops, for visualization purposes 570 # note this is a LOT slower 571 while table_grower.grow_point(img_gray, filtered_gray) is not None: 572 show_grower_progress() 573 574 show_grower_progress(True) 575 576 original_threshold = threshold 577 578 loops_without_change = 0 579 580 while not table_grower.is_table_complete(): 581 loops_without_change += 1 582 583 if loops_without_change > 50: 584 break 585 586 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 587 show_grower_progress() 588 589 loops_without_change = 0 590 591 grown = False 592 while table_grower.grow_point(img_gray, filtered_gray) is not None: 593 show_grower_progress() 594 grown = True 595 threshold = min(0.1 + 0.9 * threshold, original_threshold) 596 table_grower.set_threshold(threshold) 597 598 if not grown: 599 threshold *= 0.9 600 table_grower.set_threshold(threshold) 601 602 else: 603 threshold *= 0.9 604 table_grower.set_threshold(threshold) 605 606 if table_grower.grow_point(img_gray, filtered_gray) is not None: 607 show_grower_progress() 608 loops_without_change = 0 609 610 else: 611 table_grower.grow_table(img_gray, filtered_gray) 612 613 if smooth: 614 table_grower.smooth_grid() 615 corners = table_grower.get_all_corners() 616 logger.info( 617 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 618 ) 619 # rescale corners back to original size 620 if self._scale != 1.0: 621 for y in range(len(corners)): 622 for x in range(len(corners[y])): 623 if corners[y][x] is not None: 624 corners[y][x] = ( 625 int(corners[y][x][0] / self._scale), # pyright:ignore 626 int(corners[y][x][1] / self._scale), # pyright:ignore 627 ) 628 629 return TableGrid(corners) # pyright: ignore 630 631 def _visualize_grid(self, img: MatLike, points: List[List[Point]]) -> None: 632 """Visualize the detected grid points.""" 633 all_points = [point for row in points for point in row] 634 drawn = imu.draw_points(img, all_points) 635 imu.show(drawn, wait=True) 636 637 def _visualize_path_finding( 638 self, 639 path: List[Point], 640 current: Point, 641 next_point: Point, 642 previous_row_target: Optional[Point] = None, 643 region_center: Optional[Point] = None, 644 region_size: Optional[int] = None, 645 ) -> None: 646 """Visualize the path finding process for debugging.""" 647 global show_time 648 649 screen = imu.pop() 650 651 # if gray, convert to BGR 652 if len(screen.shape) == 2 or screen.shape[2] == 1: 653 debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR) 654 else: 655 debug_img = cast(MatLike, screen) 656 657 debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2) 658 debug_img = imu.draw_points( 659 debug_img, [current], color=(0, 255, 0), thickness=3 660 ) 661 debug_img = imu.draw_points( 662 debug_img, [next_point], color=(0, 0, 255), thickness=2 663 ) 664 665 # Draw previous row target if available 666 if previous_row_target is not None: 667 debug_img = imu.draw_points( 668 debug_img, [previous_row_target], color=(255, 0, 255), thickness=2 669 ) 670 671 # Draw search region if available 672 if region_center is not None and region_size is not None: 673 top_left = ( 674 max(0, region_center[0] - region_size // 2), 675 max(0, region_center[1] - region_size // 2), 676 ) 677 bottom_right = ( 678 min(debug_img.shape[1], region_center[0] + region_size // 2), 679 min(debug_img.shape[0], region_center[1] + region_size // 2), 680 ) 681 cv.rectangle( 682 debug_img, 683 top_left, 684 bottom_right, 685 color=(255, 0, 0), 686 thickness=2, 687 lineType=cv.LINE_AA, 688 ) 689 690 imu.push(debug_img) 691 692 show_time += 1 693 if show_time % 10 != 1: 694 return 695 696 imu.show(debug_img, title="Next column point", wait=False) 697 # time.sleep(0.003) 698 699 @log_calls(level=logging.DEBUG, include_return=True) 700 def _astar( 701 self, 702 img: np.ndarray, 703 start: tuple[int, int], 704 goals: list[tuple[int, int]], 705 direction: str, 706 ) -> Optional[List[Point]]: 707 """ 708 Find the best path between the start point and one of the goal points on the image 709 """ 710 711 if not goals: 712 return None 713 714 if self._scale != 1.0: 715 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 716 start = (int(start[0] * self._scale), int(start[1] * self._scale)) 717 goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals] 718 719 # calculate bounding box with margin 720 all_points = goals + [start] 721 xs = [p[0] for p in all_points] 722 ys = [p[1] for p in all_points] 723 724 margin = 30 725 top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin)) 726 bottom_right = ( 727 min(img.shape[1], max(xs) + margin), 728 min(img.shape[0], max(ys) + margin), 729 ) 730 731 # check bounds 732 if ( 733 top_left[0] >= bottom_right[0] 734 or top_left[1] >= bottom_right[1] 735 or top_left[0] >= img.shape[1] 736 or top_left[1] >= img.shape[0] 737 ): 738 return None 739 740 # transform coordinates to cropped image 741 start_local = (start[0] - top_left[0], start[1] - top_left[1]) 742 goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals] 743 744 cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]] 745 746 if cropped.size == 0: 747 return None 748 749 path = rust_astar(cropped, start_local, goals_local, direction) 750 751 if path is None: 752 return None 753 754 if self._scale != 1.0: 755 path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path] 756 top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale)) 757 758 return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]
Detects table grid intersections using morphological filtering and template matching.
This detector implements a multi-stage pipeline:
- Binarization: Sauvola adaptive thresholding to handle varying lighting
- Morphological operations: Dilation to connect broken rule segments
- Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
- Grid growing: Iterative point detection starting from a known seed point
The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.
Tuning Guidelines
- kernel_size: Increase if you need more selectivity (fewer false positives)
- cross_width/height: Should match rule thickness after morphology
- morph_size: Increase to connect more broken lines, but this thickens rules
- sauvola_k: Increase to threshold more aggressively (remove noise)
- search_region: Increase for documents with more warping/distortion
- distance_penalty: Increase to prefer corners closer to expected positions
Visual Debugging
Set visual=True in methods to see intermediate results and tune parameters.
149 def __init__( 150 self, 151 kernel_size: int = 21, 152 cross_width: int = 6, 153 cross_height: Optional[int] = None, 154 morph_size: Optional[int] = None, 155 sauvola_k: float = 0.04, 156 sauvola_window: int = 15, 157 scale: float = 1.0, 158 search_region: int = 40, 159 distance_penalty: float = 0.4, 160 skip_astar_threshold: float = 0.2, 161 min_rows: int = 5, 162 grow_threshold: float = 0.3, 163 look_distance: int = 4, 164 cuts: int = 3, 165 cut_fraction: float = 0.5, 166 ): 167 """ 168 Args: 169 kernel_size (int): the size of the cross kernel 170 a larger kernel size often means that more penalty is applied, often leading 171 to more sparse results 172 cross_width (int): the width of one of the edges in the cross filter, should be 173 roughly equal to the width of the rules in the image after morphology is applied 174 cross_height (int | None): useful if the horizontal rules and vertical rules 175 have different sizes 176 morph_size (int | None): the size of the morphology operators that are applied before 177 the cross kernel. 'bridges the gaps' of broken-up lines 178 sauvola_k (float): threshold parameter for sauvola thresholding 179 sauvola_window (int): window_size parameter for sauvola thresholding 180 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 181 search_region (int): area in which to search for a new max value in `find_nearest` etc. 182 distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1] 183 skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding 184 min_rows (int): minimum number of rows to find before stopping the table finding algorithm 185 grow_threshold (float): the threshold for accepting a new point when growing the table 186 look_distance (int): how many points away to look when calculating the median slope 187 cuts (int): The amount of cuts (large deletions) to do in the grid during table growing 188 cut_fraction (float): The portion of the already-chosen corner points to delete during cutting 189 """ 190 self._validate_parameters( 191 kernel_size, 192 cross_width, 193 cross_height, 194 morph_size, 195 search_region, 196 sauvola_k, 197 sauvola_window, 198 distance_penalty, 199 skip_astar_threshold, 200 cuts, 201 cut_fraction, 202 ) 203 204 self._kernel_size = kernel_size 205 self._cross_width = cross_width 206 self._cross_height = cross_width if cross_height is None else cross_height 207 self._morph_size = morph_size if morph_size is not None else cross_width 208 self._search_region = search_region 209 self._sauvola_k = sauvola_k 210 self._sauvola_window = sauvola_window 211 self._distance_penalty = distance_penalty 212 self._scale = scale 213 self._skip_astar_threshold = skip_astar_threshold 214 self._min_rows = min_rows 215 self._grow_threshold = grow_threshold 216 self._look_distance = look_distance 217 self._cuts = cuts 218 self._cut_fraction = cut_fraction 219 220 self._cross_kernel = self._create_cross_kernel()
Arguments:
- kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
- cross_width (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
- cross_height (int | None): useful if the horizontal rules and vertical rules have different sizes
- morph_size (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
- sauvola_k (float): threshold parameter for sauvola thresholding
- sauvola_window (int): window_size parameter for sauvola thresholding
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- search_region (int): area in which to search for a new max value in
find_nearestetc. - distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
- skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
- min_rows (int): minimum number of rows to find before stopping the table finding algorithm
- grow_threshold (float): the threshold for accepting a new point when growing the table
- look_distance (int): how many points away to look when calculating the median slope
- cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
- cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
331 def apply(self, img: MatLike, visual: bool = False) -> MatLike: 332 """ 333 Apply the grid detection filter to the input image. 334 335 Args: 336 img (MatLike): the input image 337 visual (bool): whether to show intermediate steps 338 339 Returns: 340 MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules 341 """ 342 343 if img is None or img.size == 0: 344 raise ValueError("Input image is empty or None") 345 346 binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window) 347 348 if visual: 349 imu.show(binary, title="thresholded") 350 351 binary = self._apply_morphology(binary) 352 353 if visual: 354 imu.show(binary, title="dilated") 355 356 filtered = self._apply_cross_matching(binary) 357 358 return filtered
Apply the grid detection filter to the input image.
Arguments:
- img (MatLike): the input image
- visual (bool): whether to show intermediate steps
Returns:
MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
360 @log_calls(level=logging.DEBUG, include_return=True) 361 def find_nearest( 362 self, filtered: MatLike, point: Point, region: Optional[int] = None 363 ) -> Tuple[Point, float]: 364 """ 365 Find the nearest 'corner match' in the image, along with its score [0,1] 366 367 Args: 368 filtered (MatLike): the filtered image (obtained through `apply`) 369 point (tuple[int, int]): the approximate target point (x, y) 370 region (None | int): alternative value for search region, 371 overwriting the `__init__` parameter `region` 372 """ 373 374 if filtered is None or filtered.size == 0: 375 raise ValueError("Filtered image is empty or None") 376 377 region_size = region if region is not None else self._search_region 378 x, y = point 379 380 # Calculate crop boundaries 381 crop_x = max(0, x - region_size // 2) 382 crop_y = max(0, y - region_size // 2) 383 crop_width = min(region_size, filtered.shape[1] - crop_x) 384 crop_height = min(region_size, filtered.shape[0] - crop_y) 385 386 # Handle edge cases 387 if crop_width <= 0 or crop_height <= 0: 388 logger.warning(f"Point {point} is outside image bounds") 389 return point, 0.0 390 391 cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width] 392 393 if cropped.size == 0: 394 return point, 0.0 395 396 # Always apply Gaussian weighting by extending crop if needed 397 if cropped.shape[0] == region_size and cropped.shape[1] == region_size: 398 # Perfect size - apply weights directly 399 weights = self._create_gaussian_weights(region_size) 400 weighted = cropped.astype(np.float32) * weights 401 else: 402 # Extend crop to match region_size, apply weights, then restore 403 extended = np.zeros((region_size, region_size), dtype=cropped.dtype) 404 405 # Calculate offset to center the cropped region in extended array 406 offset_y = (region_size - cropped.shape[0]) // 2 407 offset_x = (region_size - cropped.shape[1]) // 2 408 409 # Place cropped region in center of extended array 410 extended[ 411 offset_y : offset_y + cropped.shape[0], 412 offset_x : offset_x + cropped.shape[1], 413 ] = cropped 414 415 # Apply Gaussian weights to extended array 416 weights = self._create_gaussian_weights(region_size) 417 weighted_extended = extended.astype(np.float32) * weights 418 419 # Extract the original region back out 420 weighted = weighted_extended[ 421 offset_y : offset_y + cropped.shape[0], 422 offset_x : offset_x + cropped.shape[1], 423 ] 424 425 best_idx = np.argmax(weighted) 426 best_y, best_x = np.unravel_index(best_idx, cropped.shape) 427 428 result_point = ( 429 int(crop_x + best_x), 430 int(crop_y + best_y), 431 ) 432 result_confidence = float(weighted[best_y, best_x]) / 255.0 433 434 return result_point, result_confidence
Find the nearest 'corner match' in the image, along with its score [0,1]
Arguments:
436 def find_table_points( 437 self, 438 img: MatLike | PathLike[str], 439 top_row: list[Point | None], 440 cell_widths: list[int], 441 cell_heights: list[int] | int, 442 visual: bool = False, 443 window: str = WINDOW, 444 goals_width: Optional[int] = None, 445 filtered: Optional[MatLike | PathLike[str]] = None, 446 smooth: bool = False, 447 ) -> "TableGrid": 448 """ 449 Parse the image to a `TableGrid` structure that holds all of the 450 intersections between horizontal and vertical rules, starting near the `left_top` point 451 452 Args: 453 img (MatLike): the input image of a table 454 top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching) 455 cell_widths (list[int]): the expected widths of the cells (based on a header template) 456 cell_heights (list[int]): the expected height of the rows of data. 457 The last value from this list is used until the image has no more vertical space. 458 visual (bool): whether to show intermediate steps 459 window (str): the name of the OpenCV window to use for visualization 460 goals_width (int | None): the width of the goal region when searching for the next point. 461 If None, defaults to 1.5 * search_region 462 filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of 463 calculating the filtered image from scratch 464 smooth (bool): if True, smooth the grid after detection, using local heuristics 465 466 Returns: 467 a TableGrid object 468 """ 469 470 if goals_width is None: 471 goals_width = self._search_region * 3 // 2 472 473 if not cell_widths: 474 raise ValueError("cell_widths must contain at least one value") 475 476 if not isinstance(img, np.ndarray): 477 img = cv.imread(os.fspath(img)) 478 479 if filtered is None: 480 filtered = self.apply(img, visual) 481 else: 482 if not isinstance(filtered, np.ndarray): 483 filtered = cv.imread(os.fspath(filtered)) 484 485 filtered = ensure_gray(filtered) 486 487 if visual: 488 imu.show(filtered, window=window) 489 490 if isinstance(cell_heights, int): 491 cell_heights = [cell_heights] 492 493 for i in range(len(top_row)): 494 if top_row[i] is None: 495 continue 496 497 adjusted, confidence = self.find_nearest( 498 filtered, top_row[i], int(self._search_region * 2) 499 ) 500 501 if confidence < 0.15: 502 top_row[i] = None 503 else: 504 top_row[i] = adjusted 505 506 if not any(top_row): 507 logger.error("No good starting candidates given") 508 509 # resize all parameters according to scale 510 img = cv.resize(img, None, fx=self._scale, fy=self._scale) 511 512 if visual: 513 imu.push(img) 514 515 filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale) 516 cell_widths = [int(w * self._scale) for w in cell_widths] 517 cell_heights = [int(h * self._scale) for h in cell_heights] 518 top_row = [ 519 (int(p[0] * self._scale), int(p[1] * self._scale)) 520 if p is not None 521 else None 522 for p in top_row 523 ] 524 search_region = int(self._search_region * self._scale) 525 526 img_gray = ensure_gray(img) 527 filtered_gray = ensure_gray(filtered) 528 529 table_grower = TableGrower( 530 filtered_gray, 531 cell_widths, # pyright: ignore 532 cell_heights, # pyright: ignore 533 top_row, 534 search_region, 535 self._distance_penalty, 536 self._look_distance, 537 self._grow_threshold, 538 self._skip_astar_threshold, 539 self._min_rows, 540 self._cuts, 541 self._cut_fraction, 542 ) 543 544 def show_grower_progress(wait: bool = False): 545 img_orig = np.copy(img) 546 corners = table_grower.get_all_corners() 547 for y in range(len(corners)): 548 for x in range(len(corners[y])): 549 if corners[y][x] is not None: 550 img_orig = imu.draw_points( 551 img_orig, 552 [corners[y][x]], 553 color=(0, 0, 255), 554 thickness=30, 555 ) 556 557 edge = table_grower.get_edge_points() 558 559 for point, score in edge: 560 color = (100, int(clamp(score * 255, 0, 255)), 100) 561 imu.draw_point(img_orig, point, color=color, thickness=20) 562 563 imu.show(img_orig, wait=wait) 564 565 if visual: 566 threshold = self._grow_threshold 567 look_distance = self._look_distance 568 569 # python implementation of rust loops, for visualization purposes 570 # note this is a LOT slower 571 while table_grower.grow_point(img_gray, filtered_gray) is not None: 572 show_grower_progress() 573 574 show_grower_progress(True) 575 576 original_threshold = threshold 577 578 loops_without_change = 0 579 580 while not table_grower.is_table_complete(): 581 loops_without_change += 1 582 583 if loops_without_change > 50: 584 break 585 586 if table_grower.extrapolate_one(img_gray, filtered_gray) is not None: 587 show_grower_progress() 588 589 loops_without_change = 0 590 591 grown = False 592 while table_grower.grow_point(img_gray, filtered_gray) is not None: 593 show_grower_progress() 594 grown = True 595 threshold = min(0.1 + 0.9 * threshold, original_threshold) 596 table_grower.set_threshold(threshold) 597 598 if not grown: 599 threshold *= 0.9 600 table_grower.set_threshold(threshold) 601 602 else: 603 threshold *= 0.9 604 table_grower.set_threshold(threshold) 605 606 if table_grower.grow_point(img_gray, filtered_gray) is not None: 607 show_grower_progress() 608 loops_without_change = 0 609 610 else: 611 table_grower.grow_table(img_gray, filtered_gray) 612 613 if smooth: 614 table_grower.smooth_grid() 615 corners = table_grower.get_all_corners() 616 logger.info( 617 f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns" 618 ) 619 # rescale corners back to original size 620 if self._scale != 1.0: 621 for y in range(len(corners)): 622 for x in range(len(corners[y])): 623 if corners[y][x] is not None: 624 corners[y][x] = ( 625 int(corners[y][x][0] / self._scale), # pyright:ignore 626 int(corners[y][x][1] / self._scale), # pyright:ignore 627 ) 628 629 return TableGrid(corners) # pyright: ignore
Parse the image to a TableGrid structure that holds all of the
intersections between horizontal and vertical rules, starting near the left_top point
Arguments:
- img (MatLike): the input image of a table
- top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
- cell_widths (list[int]): the expected widths of the cells (based on a header template)
- cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
- visual (bool): whether to show intermediate steps
- window (str): the name of the OpenCV window to use for visualization
- goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_region
- filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
- smooth (bool): if True, smooth the grid after detection, using local heuristics
Returns:
a TableGrid object
761class TableGrid(TableIndexer): 762 """ 763 A data class that allows segmenting the image into cells 764 """ 765 766 _right_offset: int | None = None 767 768 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 769 """ 770 Args: 771 points: a 2D list of intersections between hor. and vert. rules 772 """ 773 self._points = points 774 self._right_offset = right_offset 775 776 @property 777 def points(self) -> list[list[Point]]: 778 return self._points 779 780 def row(self, i: int) -> list[Point]: 781 assert 0 <= i and i < len(self._points) 782 return self._points[i] 783 784 @property 785 def cols(self) -> int: 786 if self._right_offset is not None: 787 return len(self.row(0)) - 2 788 else: 789 return len(self.row(0)) - 1 790 791 @property 792 def rows(self) -> int: 793 return len(self._points) - 1 794 795 @property 796 def right_offset(self) -> int | None: 797 return self._right_offset 798 799 @staticmethod 800 def from_split( 801 split_grids: Split["TableGrid"], offsets: Split[Point] 802 ) -> "TableGrid": 803 """ 804 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 805 Args: 806 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 807 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 808 """ 809 810 def offset_points(points, offset): 811 return [ 812 [ 813 (p[0] + offset[0], p[1] + offset[1]) if p is not None else None 814 for p in row 815 ] 816 for row in points 817 ] 818 819 split_points = split_grids.apply( 820 lambda grid, offset: offset_points(grid.points, offset), offsets 821 ) 822 points = [] 823 rows = min(split_grids.left.rows, split_grids.right.rows) 824 for row in range(rows + 1): 825 left_row = split_points.left[row] 826 right_row = split_points.right[row] 827 828 # Skip rows that contain None values 829 if any(p is None for p in left_row) or any(p is None for p in right_row): 830 logger.warning( 831 f"Skipping row {row} in from_split due to incomplete grid data" 832 ) 833 continue 834 835 row_points = [] 836 row_points.extend(left_row) 837 row_points.extend(right_row) 838 points.append(row_points) 839 if not points: 840 raise ValueError( 841 "Cannot create TableGrid from split: no complete rows found in both grids" 842 ) 843 table_grid = TableGrid(points, split_grids.left.cols) 844 return table_grid 845 846 def save(self, path: str | Path): 847 """ 848 Persist the table grid to a JSON file. 849 850 Saves the grid corner points and right_offset (for split tables) to disk, 851 allowing the grid to be reloaded later without re-running detection. 852 853 Args: 854 path: Path to save the JSON file. 855 856 Example: 857 >>> grid = taulu.segment_table("table.png") 858 >>> grid.save("grid.json") 859 """ 860 with open(path, "w") as f: 861 json.dump({"points": self.points, "right_offset": self._right_offset}, f) 862 863 @staticmethod 864 def from_saved(path: str | Path) -> "TableGrid": 865 """ 866 Load a previously saved TableGrid from a JSON file. 867 868 Args: 869 path: Path to the JSON file created by `save()`. 870 871 Returns: 872 A TableGrid instance with the saved corner points. 873 874 Example: 875 >>> grid = TableGrid.from_saved("grid.json") 876 >>> cell = grid.crop_cell(image, (0, 0)) 877 """ 878 with open(path, "r") as f: 879 points = json.load(f) 880 right_offset = points.get("right_offset", None) 881 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 882 return TableGrid(points, right_offset) 883 884 def add_left_col(self, width: int): 885 for row in self._points: 886 first = row[0] 887 new_first = (first[0] - width, first[1]) 888 row.insert(0, new_first) 889 890 def add_top_row(self, height: int): 891 new_row = [] 892 for point in self._points[0]: 893 new_row.append((point[0], point[1] - height)) 894 895 self.points.insert(0, new_row) 896 897 def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool: 898 """point: x, y""" 899 lt, rt, rb, lb = rect 900 x, y = point 901 902 top = _Rule(*lt, *rt) 903 if top._y_at_x(x) > y: 904 return False 905 906 right = _Rule(*rt, *rb) 907 if right._x_at_y(y) < x: 908 return False 909 910 bottom = _Rule(*lb, *rb) 911 if bottom._y_at_x(x) < y: 912 return False 913 914 left = _Rule(*lb, *lt) 915 if left._x_at_y(y) > x: 916 return False 917 918 return True 919 920 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 921 """ 922 Get the cell indices (row, col) containing a pixel coordinate. 923 924 Searches through all cells to find which one contains the given point, 925 accounting for the non-rectangular (perspective-warped) cell boundaries. 926 927 Args: 928 point: Pixel coordinates (x, y) in the original image. 929 930 Returns: 931 (row, col) indices of the containing cell, or (-1, -1) if the point 932 is outside all cells. 933 934 Example: 935 >>> grid = taulu.segment_table("table.png") 936 >>> row, col = grid.cell((150, 200)) 937 >>> if row >= 0: 938 ... print(f"Point is in cell ({row}, {col})") 939 """ 940 for r in range(len(self._points) - 1): 941 offset = 0 942 for c in range(len(self.row(0)) - 1): 943 if self._right_offset is not None and c == self._right_offset: 944 offset = -1 945 continue 946 947 if self._surrounds( 948 [ 949 self._points[r][c], 950 self._points[r][c + 1], 951 self._points[r + 1][c + 1], 952 self._points[r + 1][c], 953 ], 954 point, 955 ): 956 return (r, c + offset) 957 958 return (-1, -1) 959 960 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 961 """ 962 Get the four corner coordinates of a cell. 963 964 Returns the corners in clockwise order starting from top-left, 965 suitable for use with OpenCV drawing functions. 966 967 Args: 968 cell: Cell indices as (row, col). 969 970 Returns: 971 Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: 972 top-left, top-right, bottom-right, bottom-left. 973 974 Raises: 975 TauluException: If row or col indices are out of bounds. 976 977 Example: 978 >>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) 979 >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) 980 >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2) 981 """ 982 r, c = cell 983 984 self._check_row_idx(r) 985 self._check_col_idx(c) 986 987 if self._right_offset is not None and c >= self._right_offset: 988 c = c + 1 989 990 return ( 991 self._points[r][c], 992 self._points[r][c + 1], 993 self._points[r + 1][c + 1], 994 self._points[r + 1][c], 995 ) 996 997 def region( 998 self, start: tuple[int, int], end: tuple[int, int] 999 ) -> tuple[Point, Point, Point, Point]: 1000 """ 1001 Get the bounding polygon for a rectangular region of cells. 1002 1003 Returns the four corner coordinates that enclose all cells from 1004 start to end (inclusive). 1005 1006 Args: 1007 start: Top-left cell as (row, col). 1008 end: Bottom-right cell as (row, col). 1009 1010 Returns: 1011 Four corner points (lt, rt, rb, lb) enclosing the region, 1012 each as (x, y) pixel coordinates. 1013 1014 Raises: 1015 TauluException: If any row or col indices are out of bounds. 1016 1017 Example: 1018 >>> # Get bounding box for cells (0,0) through (2,3) 1019 >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3)) 1020 """ 1021 r0, c0 = start 1022 r1, c1 = end 1023 1024 self._check_row_idx(r0) 1025 self._check_row_idx(r1) 1026 self._check_col_idx(c0) 1027 self._check_col_idx(c1) 1028 1029 if self._right_offset is not None and c0 >= self._right_offset: 1030 c0 = c0 + 1 1031 1032 if self._right_offset is not None and c1 >= self._right_offset: 1033 c1 = c1 + 1 1034 1035 lt = self._points[r0][c0] 1036 rt = self._points[r0][c1 + 1] 1037 rb = self._points[r1 + 1][c1 + 1] 1038 lb = self._points[r1 + 1][c0] 1039 1040 return lt, rt, rb, lb 1041 1042 def visualize_points(self, img: MatLike): 1043 """ 1044 Draw the detected table points on the image for visual verification 1045 """ 1046 import colorsys 1047 1048 def clr(index, total_steps): 1049 hue = index / total_steps # Normalized hue between 0 and 1 1050 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1051 return int(r * 255), int(g * 255), int(b * 255) 1052 1053 for i, row in enumerate(self._points): 1054 for p in row: 1055 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1056 1057 imu.show(img) 1058 1059 def text_regions( 1060 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1061 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1062 def vertical_rule_crop(row: int, col: int): 1063 self._check_col_idx(col) 1064 self._check_row_idx(row) 1065 1066 if self._right_offset is not None and col >= self._right_offset: 1067 col = col + 1 1068 1069 top = self._points[row][col] 1070 bottom = self._points[row + 1][col] 1071 1072 left = int(min(top[0], bottom[0])) 1073 right = int(max(top[0], bottom[0])) 1074 1075 return img[ 1076 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1077 left - margin_x : right + margin_x, 1078 ] 1079 1080 result = [] 1081 1082 start = None 1083 for col in range(self.cols): 1084 crop = vertical_rule_crop(row, col) 1085 text_over_score = imu.text_presence_score(crop) 1086 text_over = text_over_score > -0.10 1087 1088 if not text_over: 1089 if start is not None: 1090 result.append(((row, start), (row, col - 1))) 1091 start = col 1092 1093 if start is not None: 1094 result.append(((row, start), (row, self.cols - 1))) 1095 1096 return result
A data class that allows segmenting the image into cells
768 def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None): 769 """ 770 Args: 771 points: a 2D list of intersections between hor. and vert. rules 772 """ 773 self._points = points 774 self._right_offset = right_offset
Arguments:
- points: a 2D list of intersections between hor. and vert. rules
799 @staticmethod 800 def from_split( 801 split_grids: Split["TableGrid"], offsets: Split[Point] 802 ) -> "TableGrid": 803 """ 804 Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image 805 Args: 806 split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table 807 offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened 808 """ 809 810 def offset_points(points, offset): 811 return [ 812 [ 813 (p[0] + offset[0], p[1] + offset[1]) if p is not None else None 814 for p in row 815 ] 816 for row in points 817 ] 818 819 split_points = split_grids.apply( 820 lambda grid, offset: offset_points(grid.points, offset), offsets 821 ) 822 points = [] 823 rows = min(split_grids.left.rows, split_grids.right.rows) 824 for row in range(rows + 1): 825 left_row = split_points.left[row] 826 right_row = split_points.right[row] 827 828 # Skip rows that contain None values 829 if any(p is None for p in left_row) or any(p is None for p in right_row): 830 logger.warning( 831 f"Skipping row {row} in from_split due to incomplete grid data" 832 ) 833 continue 834 835 row_points = [] 836 row_points.extend(left_row) 837 row_points.extend(right_row) 838 points.append(row_points) 839 if not points: 840 raise ValueError( 841 "Cannot create TableGrid from split: no complete rows found in both grids" 842 ) 843 table_grid = TableGrid(points, split_grids.left.cols) 844 return table_grid
Convert two TableGrid objects into one, that is able to segment the original (non-cropped) image
Arguments:
- split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
- offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
846 def save(self, path: str | Path): 847 """ 848 Persist the table grid to a JSON file. 849 850 Saves the grid corner points and right_offset (for split tables) to disk, 851 allowing the grid to be reloaded later without re-running detection. 852 853 Args: 854 path: Path to save the JSON file. 855 856 Example: 857 >>> grid = taulu.segment_table("table.png") 858 >>> grid.save("grid.json") 859 """ 860 with open(path, "w") as f: 861 json.dump({"points": self.points, "right_offset": self._right_offset}, f)
Persist the table grid to a JSON file.
Saves the grid corner points and right_offset (for split tables) to disk, allowing the grid to be reloaded later without re-running detection.
Arguments:
- path: Path to save the JSON file.
Example:
>>> grid = taulu.segment_table("table.png") >>> grid.save("grid.json")
863 @staticmethod 864 def from_saved(path: str | Path) -> "TableGrid": 865 """ 866 Load a previously saved TableGrid from a JSON file. 867 868 Args: 869 path: Path to the JSON file created by `save()`. 870 871 Returns: 872 A TableGrid instance with the saved corner points. 873 874 Example: 875 >>> grid = TableGrid.from_saved("grid.json") 876 >>> cell = grid.crop_cell(image, (0, 0)) 877 """ 878 with open(path, "r") as f: 879 points = json.load(f) 880 right_offset = points.get("right_offset", None) 881 points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]] 882 return TableGrid(points, right_offset)
Load a previously saved TableGrid from a JSON file.
Arguments:
- path: Path to the JSON file created by
save().
Returns:
A TableGrid instance with the saved corner points.
Example:
>>> grid = TableGrid.from_saved("grid.json") >>> cell = grid.crop_cell(image, (0, 0))
920 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 921 """ 922 Get the cell indices (row, col) containing a pixel coordinate. 923 924 Searches through all cells to find which one contains the given point, 925 accounting for the non-rectangular (perspective-warped) cell boundaries. 926 927 Args: 928 point: Pixel coordinates (x, y) in the original image. 929 930 Returns: 931 (row, col) indices of the containing cell, or (-1, -1) if the point 932 is outside all cells. 933 934 Example: 935 >>> grid = taulu.segment_table("table.png") 936 >>> row, col = grid.cell((150, 200)) 937 >>> if row >= 0: 938 ... print(f"Point is in cell ({row}, {col})") 939 """ 940 for r in range(len(self._points) - 1): 941 offset = 0 942 for c in range(len(self.row(0)) - 1): 943 if self._right_offset is not None and c == self._right_offset: 944 offset = -1 945 continue 946 947 if self._surrounds( 948 [ 949 self._points[r][c], 950 self._points[r][c + 1], 951 self._points[r + 1][c + 1], 952 self._points[r + 1][c], 953 ], 954 point, 955 ): 956 return (r, c + offset) 957 958 return (-1, -1)
Get the cell indices (row, col) containing a pixel coordinate.
Searches through all cells to find which one contains the given point, accounting for the non-rectangular (perspective-warped) cell boundaries.
Arguments:
- point: Pixel coordinates (x, y) in the original image.
Returns:
(row, col) indices of the containing cell, or (-1, -1) if the point is outside all cells.
Example:
>>> grid = taulu.segment_table("table.png") >>> row, col = grid.cell((150, 200)) >>> if row >= 0: ... print(f"Point is in cell ({row}, {col})")
960 def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]: 961 """ 962 Get the four corner coordinates of a cell. 963 964 Returns the corners in clockwise order starting from top-left, 965 suitable for use with OpenCV drawing functions. 966 967 Args: 968 cell: Cell indices as (row, col). 969 970 Returns: 971 Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: 972 top-left, top-right, bottom-right, bottom-left. 973 974 Raises: 975 TauluException: If row or col indices are out of bounds. 976 977 Example: 978 >>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) 979 >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) 980 >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2) 981 """ 982 r, c = cell 983 984 self._check_row_idx(r) 985 self._check_col_idx(c) 986 987 if self._right_offset is not None and c >= self._right_offset: 988 c = c + 1 989 990 return ( 991 self._points[r][c], 992 self._points[r][c + 1], 993 self._points[r + 1][c + 1], 994 self._points[r + 1][c], 995 )
Get the four corner coordinates of a cell.
Returns the corners in clockwise order starting from top-left, suitable for use with OpenCV drawing functions.
Arguments:
- cell: Cell indices as (row, col).
Returns:
Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: top-left, top-right, bottom-right, bottom-left.
Raises:
- TauluException: If row or col indices are out of bounds.
Example:
>>> lt, rt, rb, lb = grid.cell_polygon((0, 0)) >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32) >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
997 def region( 998 self, start: tuple[int, int], end: tuple[int, int] 999 ) -> tuple[Point, Point, Point, Point]: 1000 """ 1001 Get the bounding polygon for a rectangular region of cells. 1002 1003 Returns the four corner coordinates that enclose all cells from 1004 start to end (inclusive). 1005 1006 Args: 1007 start: Top-left cell as (row, col). 1008 end: Bottom-right cell as (row, col). 1009 1010 Returns: 1011 Four corner points (lt, rt, rb, lb) enclosing the region, 1012 each as (x, y) pixel coordinates. 1013 1014 Raises: 1015 TauluException: If any row or col indices are out of bounds. 1016 1017 Example: 1018 >>> # Get bounding box for cells (0,0) through (2,3) 1019 >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3)) 1020 """ 1021 r0, c0 = start 1022 r1, c1 = end 1023 1024 self._check_row_idx(r0) 1025 self._check_row_idx(r1) 1026 self._check_col_idx(c0) 1027 self._check_col_idx(c1) 1028 1029 if self._right_offset is not None and c0 >= self._right_offset: 1030 c0 = c0 + 1 1031 1032 if self._right_offset is not None and c1 >= self._right_offset: 1033 c1 = c1 + 1 1034 1035 lt = self._points[r0][c0] 1036 rt = self._points[r0][c1 + 1] 1037 rb = self._points[r1 + 1][c1 + 1] 1038 lb = self._points[r1 + 1][c0] 1039 1040 return lt, rt, rb, lb
Get the bounding polygon for a rectangular region of cells.
Returns the four corner coordinates that enclose all cells from start to end (inclusive).
Arguments:
- start: Top-left cell as (row, col).
- end: Bottom-right cell as (row, col).
Returns:
Four corner points (lt, rt, rb, lb) enclosing the region, each as (x, y) pixel coordinates.
Raises:
- TauluException: If any row or col indices are out of bounds.
Example:
>>> # Get bounding box for cells (0,0) through (2,3) >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1042 def visualize_points(self, img: MatLike): 1043 """ 1044 Draw the detected table points on the image for visual verification 1045 """ 1046 import colorsys 1047 1048 def clr(index, total_steps): 1049 hue = index / total_steps # Normalized hue between 0 and 1 1050 r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0) 1051 return int(r * 255), int(g * 255), int(b * 255) 1052 1053 for i, row in enumerate(self._points): 1054 for p in row: 1055 cv.circle(img, p, 4, clr(i, len(self._points)), -1) 1056 1057 imu.show(img)
Draw the detected table points on the image for visual verification
1059 def text_regions( 1060 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3 1061 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 1062 def vertical_rule_crop(row: int, col: int): 1063 self._check_col_idx(col) 1064 self._check_row_idx(row) 1065 1066 if self._right_offset is not None and col >= self._right_offset: 1067 col = col + 1 1068 1069 top = self._points[row][col] 1070 bottom = self._points[row + 1][col] 1071 1072 left = int(min(top[0], bottom[0])) 1073 right = int(max(top[0], bottom[0])) 1074 1075 return img[ 1076 int(top[1]) - margin_y : int(bottom[1]) + margin_y, 1077 left - margin_x : right + margin_x, 1078 ] 1079 1080 result = [] 1081 1082 start = None 1083 for col in range(self.cols): 1084 crop = vertical_rule_crop(row, col) 1085 text_over_score = imu.text_presence_score(crop) 1086 text_over = text_over_score > -0.10 1087 1088 if not text_over: 1089 if start is not None: 1090 result.append(((row, start), (row, col - 1))) 1091 start = col 1092 1093 if start is not None: 1094 result.append(((row, start), (row, self.cols - 1))) 1095 1096 return result
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
23class HeaderAligner: 24 """ 25 Aligns table header templates to subject images using feature-based registration. 26 27 This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and 28 matching to compute a homography transformation that maps points from a header 29 template image to their corresponding locations in full table images. 30 31 ## How it Works 32 33 1. **Feature Detection**: Extracts ORB keypoints from both template and subject 34 2. **Feature Matching**: Finds correspondences using Hamming distance 35 3. **Filtering**: Keeps top matches and prunes based on spatial consistency 36 4. **Homography Estimation**: Computes perspective transform using RANSAC 37 38 The computed homography can then transform any point from template space to 39 image space, allowing you to locate table structures based on your annotation. 40 41 ## Preprocessing Options 42 43 - Set `k` parameter to apply Sauvola thresholding before feature detection. 44 This can improve matching on documents with variable lighting. 45 - Set `k=None` to use raw images (just extract blue channel for BGR images) 46 47 ## Tuning Guidelines 48 49 - **max_features**: Increase if matching fails on complex templates 50 - **match_fraction**: Decrease if you get many incorrect matches 51 - **max_dist**: Increase for documents with more warping/distortion 52 - **scale**: Decrease (<1.0) to speed up on high-resolution images 53 54 Args: 55 template (MatLike | PathLike[str] | str | None): Header template image or path. 56 This should contain a clear, representative view of the table header. 57 max_features (int): Maximum ORB features to detect. More features = slower 58 but potentially more robust matching. 59 patch_size (int): ORB patch size for feature extraction. 60 match_fraction (float): Fraction [0, 1] of matches to keep after sorting by 61 quality. Higher = more matches but potentially more outliers. 62 scale (float): Image downscaling factor (0, 1] for processing speed. 63 max_dist (float): Maximum allowed distance (relative to image size) between 64 matched keypoints. Filters out spatially inconsistent matches. 65 k (float | None): Sauvola threshold parameter for preprocessing. If None, 66 no thresholding is applied. Typical range: 0.03-0.15. 67 """ 68 69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist 111 112 def _scale_img(self, img: MatLike) -> MatLike: 113 if self._scale == 1.0: 114 return img 115 116 return cv.resize(img, None, fx=self._scale, fy=self._scale) 117 118 def _unscale_img(self, img: MatLike) -> MatLike: 119 if self._scale == 1.0: 120 return img 121 122 return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale) 123 124 def _unscale_homography(self, h: np.ndarray) -> np.ndarray: 125 if self._scale == 1.0: 126 return h 127 128 scale_matrix = np.diag([self._scale, self._scale, 1.0]) 129 # inv_scale_matrix = np.linalg.inv(scale_matrix) 130 inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0]) 131 # return inv_scale_matrix @ h @ scale_matrix 132 return inv_scale_matrix @ h @ scale_matrix 133 134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template 138 139 @template.setter 140 def template(self, value: MatLike | str): 141 """Set the template image as a path or an image""" 142 143 if type(value) is str: 144 value = cv.imread(value) 145 self._template = value 146 147 # TODO: check if the image has the right properties (dimensions etc.) 148 self._template = cast(MatLike, value) 149 150 self._preprocess_template() 151 152 def _preprocess_template(self): 153 self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY) 154 if self._k is not None: 155 self._template = imu.sauvola(self._template, self._k) 156 self._template = cv.bitwise_not(self._template) 157 else: 158 _, _, self._template = cv.split(self._template) 159 160 def _preprocess_image(self, img: MatLike): 161 if self._template_orig is None: 162 raise TauluException("process the template first") 163 164 if self._k is not None: 165 img = imu.sauvola(img, self._k) 166 img = cv.bitwise_not(img) 167 else: 168 _, _, img = cv.split(img) 169 170 return img 171 172 @log_calls(level=logging.DEBUG, include_return=True) 173 def _find_transform_of_template_on( 174 self, im: MatLike, visual: bool = False, window: str = WINDOW 175 ): 176 im = self._scale_img(im) 177 # Detect ORB features and compute descriptors. 178 orb = cv.ORB_create( 179 self._max_features, # type:ignore 180 patchSize=self._patch_size, 181 ) 182 keypoints_im, descriptors_im = orb.detectAndCompute(im, None) 183 keypoints_tg, descriptors_tg = orb.detectAndCompute(self._template, None) 184 185 # Match features 186 matcher = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) 187 matches = matcher.match(descriptors_im, descriptors_tg) 188 189 # Sort matches by score 190 matches = sorted(matches, key=lambda x: x.distance) 191 192 # Remove not so good matches 193 numGoodMatches = int(len(matches) * self._match_fraction) 194 matches = matches[:numGoodMatches] 195 196 if visual: 197 final_img_filtered = cv.drawMatches( 198 im, 199 keypoints_im, 200 self._template, 201 keypoints_tg, 202 matches[:10], 203 None, # type:ignore 204 cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS, 205 ) 206 imu.show(final_img_filtered, title="matches", window=window) 207 208 # Extract location of good matches 209 points1 = np.zeros((len(matches), 2), dtype=np.float32) 210 points2 = np.zeros((len(matches), 2), dtype=np.float32) 211 212 for i, match in enumerate(matches): 213 points1[i, :] = keypoints_tg[match.trainIdx].pt 214 points2[i, :] = keypoints_im[match.queryIdx].pt 215 216 # Prune reference points based upon distance between 217 # key points. This assumes a fairly good alignment to start with 218 # due to the protocol used (location of the sheets) 219 p1 = pd.DataFrame(data=points1) 220 p2 = pd.DataFrame(data=points2) 221 refdist = abs(p1 - p2) 222 223 mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist) 224 mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist) 225 mask = mask_x & mask_y 226 points1 = points1[mask.to_numpy()] 227 points2 = points2[mask.to_numpy()] 228 229 # Find homography 230 h, _ = cv.findHomography(points1, points2, cv.RANSAC) 231 232 return self._unscale_homography(h) 233 234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged) 257 258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h 281 282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Aligns table header templates to subject images using feature-based registration.
This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and matching to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.
How it Works
- Feature Detection: Extracts ORB keypoints from both template and subject
- Feature Matching: Finds correspondences using Hamming distance
- Filtering: Keeps top matches and prunes based on spatial consistency
- Homography Estimation: Computes perspective transform using RANSAC
The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.
Preprocessing Options
- Set
kparameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting. - Set
k=Noneto use raw images (just extract blue channel for BGR images)
Tuning Guidelines
- max_features: Increase if matching fails on complex templates
- match_fraction: Decrease if you get many incorrect matches
- max_dist: Increase for documents with more warping/distortion
- scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
- template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
- max_features (int): Maximum ORB features to detect. More features = slower but potentially more robust matching.
- patch_size (int): ORB patch size for feature extraction.
- match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
- scale (float): Image downscaling factor (0, 1] for processing speed.
- max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
- k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
69 def __init__( 70 self, 71 template: None | MatLike | PathLike[str] | str = None, 72 max_features: int = 25_000, 73 patch_size: int = 31, 74 match_fraction: float = 0.6, 75 scale: float = 1.0, 76 max_dist: float = 1.00, 77 k: float | None = 0.05, 78 ): 79 """ 80 Args: 81 template (MatLike | str): (path of) template image, with the table template clearly visible 82 max_features (int): maximal number of features that will be extracted by ORB 83 patch_size (int): for ORB feature extractor 84 match_fraction (float): best fraction of matches that are kept 85 scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly) 86 max_dist (float): maximum distance (relative to image size) of matched features. 87 Increase this value if the warping between image and template needs to be more agressive 88 k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done 89 """ 90 91 if type(template) is str or type(template) is PathLike: 92 value = cv.imread(fspath(template)) 93 template = value 94 95 self._k = k 96 if scale > 1.0: 97 raise TauluException( 98 "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0" 99 ) 100 if scale == 0: 101 raise TauluException("Use 0 < scale <= 1.0") 102 103 self._scale = scale 104 self._template = self._scale_img(cast(MatLike, template)) 105 self._template_orig: None | MatLike = None 106 self._preprocess_template() 107 self._max_features = max_features 108 self._patch_size = patch_size 109 self._match_fraction = match_fraction 110 self._max_dist = max_dist
Arguments:
- template (MatLike | str): (path of) template image, with the table template clearly visible
- max_features (int): maximal number of features that will be extracted by ORB
- patch_size (int): for ORB feature extractor
- match_fraction (float): best fraction of matches that are kept
- scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
- max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
- k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
134 @property 135 def template(self): 136 """The template image that subject images are aligned to""" 137 return self._template
The template image that subject images are aligned to
234 def view_alignment(self, img: MatLike, h: NDArray): 235 """ 236 Show the alignment of the template on the given image 237 by transforming it using the supplied transformation matrix `h` 238 and visualising both on different channels 239 240 Args: 241 img (MatLike): the image on which the template is transformed 242 h (NDArray): the transformation matrix 243 """ 244 245 im = imu.ensure_gray(img) 246 header = imu.ensure_gray(self._unscale_img(self._template)) 247 height, width = im.shape 248 249 header_warped = cv.warpPerspective(header, h, (width, height)) 250 251 merged = np.full((height, width, 3), 255, dtype=np.uint8) 252 253 merged[..., 1] = im 254 merged[..., 2] = header_warped 255 256 return imu.show(merged)
Show the alignment of the template on the given image
by transforming it using the supplied transformation matrix h
and visualising both on different channels
Arguments:
- img (MatLike): the image on which the template is transformed
- h (NDArray): the transformation matrix
258 @log_calls(level=logging.DEBUG, include_return=True) 259 def align( 260 self, img: MatLike | str, visual: bool = False, window: str = WINDOW 261 ) -> NDArray: 262 """ 263 Calculates a homogeneous transformation matrix that maps pixels of 264 the template to the given image 265 """ 266 267 logger.info("Aligning header with supplied table image") 268 269 if type(img) is str: 270 img = cv.imread(img) 271 img = cast(MatLike, img) 272 273 img = self._preprocess_image(img) 274 275 h = self._find_transform_of_template_on(img, visual, window) 276 277 if visual: 278 self.view_alignment(img, h) 279 280 return h
Calculates a homogeneous transformation matrix that maps pixels of the template to the given image
282 def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]: 283 """ 284 Transform the given point (in template-space) using the transformation h 285 (obtained through the `align` method) 286 287 Args: 288 h (NDArray): transformation matrix of shape (3, 3) 289 point (Iterable[int]): the to-be-transformed point, should conform to (x, y) 290 """ 291 292 point = np.array([[point[0], point[1], 1]]) # type:ignore 293 transformed = np.dot(h, point.T) # type:ignore 294 295 transformed /= transformed[2] 296 297 return int(transformed[0][0]), int(transformed[1][0])
Transform the given point (in template-space) using the transformation h
(obtained through the align method)
Arguments:
- h (NDArray): transformation matrix of shape (3, 3)
- point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
151class HeaderTemplate(TableIndexer): 152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 ) 169 170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f) 180 181 @staticmethod 182 @log_calls(level=logging.DEBUG) 183 def from_saved(path: PathLike[str]) -> "HeaderTemplate": 184 with open(path, "r") as f: 185 data = json.load(f) 186 rules = data["rules"] 187 rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules] 188 189 return HeaderTemplate(rules) 190 191 @property 192 def cols(self) -> int: 193 return len(self._v_rules) - 1 194 195 @property 196 def rows(self) -> int: 197 return len(self._h_rules) - 1 198 199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines) 278 279 @staticmethod 280 @log_calls(level=logging.DEBUG, include_return=True) 281 def _crop(template: MatLike, margin: int = 10) -> MatLike: 282 """ 283 Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow. 284 """ 285 286 points = [] 287 anno_template = np.copy(template) 288 289 def get_point(event, x, y, flags, params): 290 nonlocal points, anno_template 291 _ = flags 292 _ = params 293 if event == cv.EVENT_LBUTTONDOWN: 294 point = (x, y) 295 296 cv.circle( # type:ignore 297 anno_template, # type:ignore 298 (x, y), 299 4, 300 (0, 255, 0), 301 2, 302 ) 303 cv.imshow(constants.WINDOW, anno_template) # type:ignore 304 305 points.append(point) 306 elif event == cv.EVENT_RBUTTONDOWN: 307 # remove the last annotation 308 points = points[:-1] 309 310 anno_template = np.copy(anno_template) 311 312 for p in points: 313 cv.circle( 314 anno_template, 315 p, 316 4, 317 (0, 255, 0), 318 2, 319 ) 320 321 cv.imshow(constants.WINDOW, anno_template) 322 323 print(CROP_HELP) 324 325 imu.show(anno_template, get_point, title="crop the header") 326 327 assert len(points) == 4, ( 328 "you need to annotate the four corners of the table in order to crop it" 329 ) 330 331 # crop the image to contain all of the points (just crop rectangularly, x, y, w, h) 332 # Convert points to numpy array 333 points_np = np.array(points) 334 335 # Find bounding box 336 x_min = np.min(points_np[:, 0]) 337 y_min = np.min(points_np[:, 1]) 338 x_max = np.max(points_np[:, 0]) 339 y_max = np.max(points_np[:, 1]) 340 341 # Compute width and height 342 width = x_max - x_min 343 height = y_max - y_min 344 345 # Ensure integers and within image boundaries 346 x_min = max(int(x_min), 0) 347 y_min = max(int(y_min), 0) 348 width = int(width) 349 height = int(height) 350 351 # Crop the image 352 cropped = template[ 353 y_min - margin : y_min + height + margin, 354 x_min - margin : x_min + width + margin, 355 ] 356 357 return cropped 358 359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules) 382 383 def cell_width(self, i: int) -> int: 384 self._check_col_idx(i) 385 return int(self._v_rules[i + 1]._x - self._v_rules[i]._x) 386 387 def cell_widths(self, start: int = 0) -> list[int]: 388 return [self.cell_width(i) for i in range(start, self.cols)] 389 390 def cell_height(self, header_factor: float = 0.8) -> int: 391 return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor) 392 393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ] 400 401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints 410 411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col) 445 446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore 479 480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 ) 518 519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
152 def __init__(self, rules: Iterable[Iterable[int]]): 153 """ 154 A TableTemplate is a collection of rules of a table. This class implements methods 155 for finding cell positions in a table image, given the template the image adheres to. 156 157 Args: 158 rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1] 159 """ 160 161 super().__init__() 162 self._rules = [_Rule(*rule) for rule in rules] 163 self._h_rules = sorted( 164 [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y 165 ) 166 self._v_rules = sorted( 167 [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x 168 )
A TableTemplate is a collection of rules of a table. This class implements methods for finding cell positions in a table image, given the template the image adheres to.
Arguments:
- rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
170 @log_calls(level=logging.DEBUG) 171 def save(self, path: PathLike[str]): 172 """ 173 Save the HeaderTemplate to the given path, as a json 174 """ 175 176 data = {"rules": [r.to_dict() for r in self._rules]} 177 178 with open(path, "w") as f: 179 json.dump(data, f)
Save the HeaderTemplate to the given path, as a json
199 @staticmethod 200 @log_calls(level=logging.DEBUG) 201 def annotate_image( 202 template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10 203 ) -> "HeaderTemplate": 204 """ 205 Utility method that allows users to create a template form a template image. 206 207 The user is asked to click to annotate lines (two clicks per line). 208 209 Args: 210 template: the image on which to annotate the header lines 211 crop (str | None): if str, crop the template image first, then do the annotation. 212 The cropped image will be stored at the supplied path 213 margin (int): margin to add around the cropping of the header 214 """ 215 216 if type(template) is str: 217 value = cv.imread(template) 218 template = value 219 template = cast(MatLike, template) 220 221 if crop is not None: 222 cropped = HeaderTemplate._crop(template, margin) 223 cv.imwrite(os.fspath(crop), cropped) 224 template = cropped 225 226 start_point = None 227 lines: list[list[int]] = [] 228 229 anno_template = np.copy(template) 230 231 def get_point(event, x, y, flags, params): 232 nonlocal lines, start_point, anno_template 233 _ = flags 234 _ = params 235 if event == cv.EVENT_LBUTTONDOWN: 236 if start_point is not None: 237 line: list[int] = [start_point[1], start_point[0], x, y] 238 239 cv.line( # type:ignore 240 anno_template, # type:ignore 241 (start_point[1], start_point[0]), 242 (x, y), 243 (0, 255, 0), 244 2, 245 cv.LINE_AA, 246 ) 247 cv.imshow(constants.WINDOW, anno_template) # type:ignore 248 249 lines.append(line) 250 start_point = None 251 else: 252 start_point = (y, x) 253 elif event == cv.EVENT_RBUTTONDOWN: 254 start_point = None 255 256 # remove the last annotation 257 lines = lines[:-1] 258 259 anno_template = np.copy(anno_template) 260 261 for line in lines: 262 cv.line( 263 template, 264 (line[0], line[1]), 265 (line[2], line[3]), 266 (0, 255, 0), 267 2, 268 cv.LINE_AA, 269 ) 270 271 cv.imshow(constants.WINDOW, template) 272 273 print(ANNO_HELP) 274 275 imu.show(anno_template, get_point, title="annotate the header") 276 277 return HeaderTemplate(lines)
Utility method that allows users to create a template form a template image.
The user is asked to click to annotate lines (two clicks per line).
Arguments:
- template: the image on which to annotate the header lines
- crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
- margin (int): margin to add around the cropping of the header
359 @staticmethod 360 def from_vgg_annotation(annotation: str) -> "HeaderTemplate": 361 """ 362 Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool. 363 364 Args: 365 annotation (str): the path of the annotation csv file 366 """ 367 368 rules = [] 369 with open(annotation, "r") as csvfile: 370 reader = csv.DictReader(csvfile) 371 for row in reader: 372 shape_attributes = json.loads(row["region_shape_attributes"]) 373 if shape_attributes["name"] == "polyline": 374 x_points = shape_attributes["all_points_x"] 375 y_points = shape_attributes["all_points_y"] 376 if len(x_points) == 2 and len(y_points) == 2: 377 rules.append( 378 [x_points[0], y_points[0], x_points[1], y_points[1]] 379 ) 380 381 return HeaderTemplate(rules)
Create a TableTemplate from annotations made in vgg, using the polylines tool.
Arguments:
- annotation (str): the path of the annotation csv file
393 def cell_heights(self, header_factors: list[float] | float) -> list[int]: 394 if isinstance(header_factors, float): 395 header_factors = [header_factors] 396 header_factors = cast(list, header_factors) 397 return [ 398 int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors 399 ]
401 def intersection(self, index: tuple[int, int]) -> tuple[float, float]: 402 """ 403 Returns the interaction of the index[0]th horizontal rule and the 404 index[1]th vertical rule 405 """ 406 407 ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]]) 408 assert ints is not None 409 return ints
Returns the interaction of the index[0]th horizontal rule and the index[1]th vertical rule
411 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 412 """ 413 Get the cell index (row, col) that corresponds with the point (x, y) in the template image 414 415 Args: 416 point (tuple[float, float]): the coordinates in the template image 417 418 Returns: 419 tuple[int, int]: (row, col) 420 """ 421 422 x, y = point 423 424 row = -1 425 col = -1 426 427 for i in range(self.rows): 428 y0 = self._h_rules[i]._y_at_x(x) 429 y1 = self._h_rules[i + 1]._y_at_x(x) 430 if min(y0, y1) <= y <= max(y0, y1): 431 row = i 432 break 433 434 for i in range(self.cols): 435 x0 = self._v_rules[i]._x_at_y(y) 436 x1 = self._v_rules[i + 1]._x_at_y(y) 437 if min(x0, x1) <= x <= max(x0, x1): 438 col = i 439 break 440 441 if row == -1 or col == -1: 442 return (-1, -1) 443 444 return (row, col)
Get the cell index (row, col) that corresponds with the point (x, y) in the template image
Arguments:
- point (tuple[float, float]): the coordinates in the template image
Returns:
tuple[int, int]: (row, col)
446 def cell_polygon( 447 self, cell: tuple[int, int] 448 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 449 """ 450 Return points (x,y) that make up a polygon around the requested cell 451 (top left, top right, bottom right, bottom left) 452 """ 453 454 row, col = cell 455 456 self._check_col_idx(col) 457 self._check_row_idx(row) 458 459 top_rule = self._h_rules[row] 460 bottom_rule = self._h_rules[row + 1] 461 left_rule = self._v_rules[col] 462 right_rule = self._v_rules[col + 1] 463 464 # Calculate corner points using intersections 465 top_left = top_rule.intersection(left_rule) 466 top_right = top_rule.intersection(right_rule) 467 bottom_left = bottom_rule.intersection(left_rule) 468 bottom_right = bottom_rule.intersection(right_rule) 469 470 if not all( 471 [ 472 point is not None 473 for point in [top_left, top_right, bottom_left, bottom_right] 474 ] 475 ): 476 raise TauluException("the lines around this cell do not intersect") 477 478 return top_left, top_right, bottom_right, bottom_left # type:ignore
Return points (x,y) that make up a polygon around the requested cell (top left, top right, bottom right, bottom left)
480 def region( 481 self, start: tuple[int, int], end: tuple[int, int] 482 ) -> tuple[Point, Point, Point, Point]: 483 self._check_row_idx(start[0]) 484 self._check_row_idx(end[0]) 485 self._check_col_idx(start[1]) 486 self._check_col_idx(end[1]) 487 488 # the rules that surround this row 489 top_rule = self._h_rules[start[0]] 490 bottom_rule = self._h_rules[end[0] + 1] 491 left_rule = self._v_rules[start[1]] 492 right_rule = self._v_rules[end[1] + 1] 493 494 # four points that will be the bounding polygon of the result, 495 # which needs to be rectified 496 top_left = top_rule.intersection(left_rule) 497 top_right = top_rule.intersection(right_rule) 498 bottom_left = bottom_rule.intersection(left_rule) 499 bottom_right = bottom_rule.intersection(right_rule) 500 501 if ( 502 top_left is None 503 or top_right is None 504 or bottom_left is None 505 or bottom_right is None 506 ): 507 raise TauluException("the lines around this row do not intersect properly") 508 509 def to_point(pnt) -> Point: 510 return (int(pnt[0]), int(pnt[1])) 511 512 return ( 513 to_point(top_left), 514 to_point(top_right), 515 to_point(bottom_right), 516 to_point(bottom_left), 517 )
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
519 def text_regions( 520 self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20 521 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 522 raise TauluException("text_regions should not be called on a HeaderTemplate")
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
72class TableIndexer(ABC): 73 """ 74 Subclasses implement methods for going from a pixel in the input image to a table cell index, 75 and cropping an image to the given table cell index. 76 """ 77 78 def __init__(self): 79 self._col_offset = 0 80 81 @property 82 def col_offset(self) -> int: 83 return self._col_offset 84 85 @col_offset.setter 86 def col_offset(self, value: int): 87 assert value >= 0 88 self._col_offset = value 89 90 @property 91 @abstractmethod 92 def cols(self) -> int: 93 pass 94 95 @property 96 @abstractmethod 97 def rows(self) -> int: 98 pass 99 100 def cells(self) -> Generator[tuple[int, int], None, None]: 101 """ 102 Generate all cell indices in row-major order. 103 104 Yields (row, col) tuples for every cell in the table, iterating 105 through each row from left to right, top to bottom. 106 107 Yields: 108 tuple[int, int]: Cell indices as (row, col). 109 110 Example: 111 >>> for row, col in grid.cells(): 112 ... cell_img = grid.crop_cell(image, (row, col)) 113 ... process(cell_img) 114 """ 115 for row in range(self.rows): 116 for col in range(self.cols): 117 yield (row, col) 118 119 def _check_row_idx(self, row: int): 120 if row < 0: 121 raise TauluException("row number needs to be positive or zero") 122 if row >= self.rows: 123 raise TauluException(f"row number too high: {row} >= {self.rows}") 124 125 def _check_col_idx(self, col: int): 126 if col < 0: 127 raise TauluException("col number needs to be positive or zero") 128 if col >= self.cols: 129 raise TauluException(f"col number too high: {col} >= {self.cols}") 130 131 @abstractmethod 132 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 133 """ 134 Returns the coordinate (row, col) of the cell that contains the given position 135 136 Args: 137 point (tuple[float, float]): a location in the input image 138 139 Returns: 140 tuple[int, int]: the cell index (row, col) that contains the given point 141 """ 142 pass 143 144 @abstractmethod 145 def cell_polygon( 146 self, cell: tuple[int, int] 147 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 148 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 149 pass 150 151 def _highlight_cell( 152 self, 153 image: MatLike, 154 cell: tuple[int, int], 155 color: tuple[int, int, int] = (0, 0, 255), 156 thickness: int = 2, 157 ): 158 polygon = self.cell_polygon(cell) 159 points = np.int32(list(polygon)) # type:ignore 160 cv.polylines(image, [points], True, color, thickness, cv.LINE_AA) # type:ignore 161 cv.putText( 162 image, 163 str(cell), 164 (int(polygon[3][0] + 10), int(polygon[3][1] - 10)), 165 cv.FONT_HERSHEY_PLAIN, 166 2.0, 167 (255, 255, 255), 168 2, 169 ) 170 171 def highlight_all_cells( 172 self, 173 image: MatLike, 174 color: tuple[int, int, int] = (0, 0, 255), 175 thickness: int = 1, 176 ) -> MatLike: 177 img = np.copy(image) 178 179 for cell in self.cells(): 180 self._highlight_cell(img, cell, color, thickness) 181 182 return img 183 184 def select_one_cell( 185 self, 186 image: MatLike, 187 window: str = WINDOW, 188 color: tuple[int, int, int] = (255, 0, 0), 189 thickness: int = 2, 190 ) -> tuple[int, int] | None: 191 clicked = None 192 193 def click_event(event, x, y, flags, params): 194 nonlocal clicked 195 196 img = np.copy(image) 197 _ = flags 198 _ = params 199 if event == cv.EVENT_LBUTTONDOWN: 200 cell = self.cell((x, y)) 201 if cell[0] >= 0: 202 clicked = cell 203 else: 204 return 205 self._highlight_cell(img, cell, color, thickness) 206 cv.imshow(window, img) 207 208 imu.show(image, click_event=click_event, title="select one cell", window=window) 209 210 return clicked 211 212 def show_cells( 213 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 214 ) -> list[tuple[int, int]]: 215 if not isinstance(image, np.ndarray): 216 image = cv.imread(os.fspath(image)) 217 218 img = np.copy(image) 219 220 cells = [] 221 222 def click_event(event, x, y, flags, params): 223 _ = flags 224 _ = params 225 if event == cv.EVENT_LBUTTONDOWN: 226 cell = self.cell((x, y)) 227 if cell[0] >= 0: 228 cells.append(cell) 229 else: 230 return 231 self._highlight_cell(img, cell) 232 cv.imshow(window, img) 233 234 imu.show( 235 img, 236 click_event=click_event, 237 title="click to highlight cells", 238 window=window, 239 ) 240 241 return cells 242 243 @abstractmethod 244 def region( 245 self, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 ) -> tuple[Point, Point, Point, Point]: 249 """ 250 Get the bounding box for the rectangular region that goes from start to end 251 252 Returns: 253 4 points: lt, rt, rb, lb, in format (x, y) 254 """ 255 pass 256 257 def crop_region( 258 self, 259 image: MatLike, 260 start: tuple[int, int], 261 end: tuple[int, int], 262 margin: int = 0, 263 margin_top: int | None = None, 264 margin_bottom: int | None = None, 265 margin_left: int | None = None, 266 margin_right: int | None = None, 267 margin_y: int | None = None, 268 margin_x: int | None = None, 269 ) -> MatLike: 270 """ 271 Extract a multi-cell region from the image with perspective correction. 272 273 Crops the image to include all cells from start to end (inclusive), 274 applying a perspective transform to produce a rectangular output. 275 276 Args: 277 image: Source image (BGR or grayscale). 278 start: Top-left cell as (row, col). 279 end: Bottom-right cell as (row, col). 280 margin: Uniform margin in pixels (default 0). 281 margin_top: Override top margin. 282 margin_bottom: Override bottom margin. 283 margin_left: Override left margin. 284 margin_right: Override right margin. 285 margin_y: Override vertical margins (top and bottom). 286 margin_x: Override horizontal margins (left and right). 287 288 Returns: 289 Cropped and perspective-corrected image. 290 291 Example: 292 >>> # Extract a 3x2 region starting at cell (1, 0) 293 >>> region_img = grid.crop_region(image, (1, 0), (3, 1)) 294 """ 295 296 region = self.region(start, end) 297 298 lt, rt, rb, lb = _apply_margin( 299 *region, 300 margin=margin, 301 margin_top=margin_top, 302 margin_bottom=margin_bottom, 303 margin_left=margin_left, 304 margin_right=margin_right, 305 margin_y=margin_y, 306 margin_x=margin_x, 307 ) 308 309 # apply margins according to priority: 310 # margin_top > margin_y > margin (etc.) 311 312 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 313 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 314 315 # crop by doing a perspective transform to the desired quad 316 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 317 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 318 M = cv.getPerspectiveTransform(src_pts, dst_pts) 319 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 320 321 return warped 322 323 @abstractmethod 324 def text_regions( 325 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 326 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 327 """ 328 Split the row into regions of continuous text 329 330 Returns 331 list[tuple[int, int]]: a list of spans (start col, end col) 332 """ 333 334 pass 335 336 def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike: 337 """ 338 Extract a single cell from the image with perspective correction. 339 340 Convenience method equivalent to `crop_region(image, cell, cell, margin)`. 341 342 Args: 343 image: Source image (BGR or grayscale). 344 cell: Cell indices as (row, col). 345 margin: Padding in pixels around the cell (default 0). 346 347 Returns: 348 Cropped and perspective-corrected cell image. 349 350 Example: 351 >>> cell_img = grid.crop_cell(image, (0, 0)) 352 >>> cv2.imwrite("cell_0_0.png", cell_img) 353 """ 354 return self.crop_region(image, cell, cell, margin)
Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.
100 def cells(self) -> Generator[tuple[int, int], None, None]: 101 """ 102 Generate all cell indices in row-major order. 103 104 Yields (row, col) tuples for every cell in the table, iterating 105 through each row from left to right, top to bottom. 106 107 Yields: 108 tuple[int, int]: Cell indices as (row, col). 109 110 Example: 111 >>> for row, col in grid.cells(): 112 ... cell_img = grid.crop_cell(image, (row, col)) 113 ... process(cell_img) 114 """ 115 for row in range(self.rows): 116 for col in range(self.cols): 117 yield (row, col)
Generate all cell indices in row-major order.
Yields (row, col) tuples for every cell in the table, iterating through each row from left to right, top to bottom.
Yields:
tuple[int, int]: Cell indices as (row, col).
Example:
>>> for row, col in grid.cells(): ... cell_img = grid.crop_cell(image, (row, col)) ... process(cell_img)
131 @abstractmethod 132 def cell(self, point: tuple[float, float]) -> tuple[int, int]: 133 """ 134 Returns the coordinate (row, col) of the cell that contains the given position 135 136 Args: 137 point (tuple[float, float]): a location in the input image 138 139 Returns: 140 tuple[int, int]: the cell index (row, col) that contains the given point 141 """ 142 pass
Returns the coordinate (row, col) of the cell that contains the given position
Arguments:
- point (tuple[float, float]): a location in the input image
Returns:
tuple[int, int]: the cell index (row, col) that contains the given point
144 @abstractmethod 145 def cell_polygon( 146 self, cell: tuple[int, int] 147 ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 148 """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position""" 149 pass
returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position
184 def select_one_cell( 185 self, 186 image: MatLike, 187 window: str = WINDOW, 188 color: tuple[int, int, int] = (255, 0, 0), 189 thickness: int = 2, 190 ) -> tuple[int, int] | None: 191 clicked = None 192 193 def click_event(event, x, y, flags, params): 194 nonlocal clicked 195 196 img = np.copy(image) 197 _ = flags 198 _ = params 199 if event == cv.EVENT_LBUTTONDOWN: 200 cell = self.cell((x, y)) 201 if cell[0] >= 0: 202 clicked = cell 203 else: 204 return 205 self._highlight_cell(img, cell, color, thickness) 206 cv.imshow(window, img) 207 208 imu.show(image, click_event=click_event, title="select one cell", window=window) 209 210 return clicked
212 def show_cells( 213 self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW 214 ) -> list[tuple[int, int]]: 215 if not isinstance(image, np.ndarray): 216 image = cv.imread(os.fspath(image)) 217 218 img = np.copy(image) 219 220 cells = [] 221 222 def click_event(event, x, y, flags, params): 223 _ = flags 224 _ = params 225 if event == cv.EVENT_LBUTTONDOWN: 226 cell = self.cell((x, y)) 227 if cell[0] >= 0: 228 cells.append(cell) 229 else: 230 return 231 self._highlight_cell(img, cell) 232 cv.imshow(window, img) 233 234 imu.show( 235 img, 236 click_event=click_event, 237 title="click to highlight cells", 238 window=window, 239 ) 240 241 return cells
243 @abstractmethod 244 def region( 245 self, 246 start: tuple[int, int], 247 end: tuple[int, int], 248 ) -> tuple[Point, Point, Point, Point]: 249 """ 250 Get the bounding box for the rectangular region that goes from start to end 251 252 Returns: 253 4 points: lt, rt, rb, lb, in format (x, y) 254 """ 255 pass
Get the bounding box for the rectangular region that goes from start to end
Returns:
4 points: lt, rt, rb, lb, in format (x, y)
257 def crop_region( 258 self, 259 image: MatLike, 260 start: tuple[int, int], 261 end: tuple[int, int], 262 margin: int = 0, 263 margin_top: int | None = None, 264 margin_bottom: int | None = None, 265 margin_left: int | None = None, 266 margin_right: int | None = None, 267 margin_y: int | None = None, 268 margin_x: int | None = None, 269 ) -> MatLike: 270 """ 271 Extract a multi-cell region from the image with perspective correction. 272 273 Crops the image to include all cells from start to end (inclusive), 274 applying a perspective transform to produce a rectangular output. 275 276 Args: 277 image: Source image (BGR or grayscale). 278 start: Top-left cell as (row, col). 279 end: Bottom-right cell as (row, col). 280 margin: Uniform margin in pixels (default 0). 281 margin_top: Override top margin. 282 margin_bottom: Override bottom margin. 283 margin_left: Override left margin. 284 margin_right: Override right margin. 285 margin_y: Override vertical margins (top and bottom). 286 margin_x: Override horizontal margins (left and right). 287 288 Returns: 289 Cropped and perspective-corrected image. 290 291 Example: 292 >>> # Extract a 3x2 region starting at cell (1, 0) 293 >>> region_img = grid.crop_region(image, (1, 0), (3, 1)) 294 """ 295 296 region = self.region(start, end) 297 298 lt, rt, rb, lb = _apply_margin( 299 *region, 300 margin=margin, 301 margin_top=margin_top, 302 margin_bottom=margin_bottom, 303 margin_left=margin_left, 304 margin_right=margin_right, 305 margin_y=margin_y, 306 margin_x=margin_x, 307 ) 308 309 # apply margins according to priority: 310 # margin_top > margin_y > margin (etc.) 311 312 w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2 313 h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2 314 315 # crop by doing a perspective transform to the desired quad 316 src_pts = np.array([lt, rt, rb, lb], dtype="float32") 317 dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32") 318 M = cv.getPerspectiveTransform(src_pts, dst_pts) 319 warped = cv.warpPerspective(image, M, (int(w), int(h))) # type:ignore 320 321 return warped
Extract a multi-cell region from the image with perspective correction.
Crops the image to include all cells from start to end (inclusive), applying a perspective transform to produce a rectangular output.
Arguments:
- image: Source image (BGR or grayscale).
- start: Top-left cell as (row, col).
- end: Bottom-right cell as (row, col).
- margin: Uniform margin in pixels (default 0).
- margin_top: Override top margin.
- margin_bottom: Override bottom margin.
- margin_left: Override left margin.
- margin_right: Override right margin.
- margin_y: Override vertical margins (top and bottom).
- margin_x: Override horizontal margins (left and right).
Returns:
Cropped and perspective-corrected image.
Example:
>>> # Extract a 3x2 region starting at cell (1, 0) >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
323 @abstractmethod 324 def text_regions( 325 self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0 326 ) -> list[tuple[tuple[int, int], tuple[int, int]]]: 327 """ 328 Split the row into regions of continuous text 329 330 Returns 331 list[tuple[int, int]]: a list of spans (start col, end col) 332 """ 333 334 pass
Split the row into regions of continuous text
Returns list[tuple[int, int]]: a list of spans (start col, end col)
336 def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike: 337 """ 338 Extract a single cell from the image with perspective correction. 339 340 Convenience method equivalent to `crop_region(image, cell, cell, margin)`. 341 342 Args: 343 image: Source image (BGR or grayscale). 344 cell: Cell indices as (row, col). 345 margin: Padding in pixels around the cell (default 0). 346 347 Returns: 348 Cropped and perspective-corrected cell image. 349 350 Example: 351 >>> cell_img = grid.crop_cell(image, (0, 0)) 352 >>> cv2.imwrite("cell_0_0.png", cell_img) 353 """ 354 return self.crop_region(image, cell, cell, margin)
Extract a single cell from the image with perspective correction.
Convenience method equivalent to crop_region(image, cell, cell, margin).
Arguments:
- image: Source image (BGR or grayscale).
- cell: Cell indices as (row, col).
- margin: Padding in pixels around the cell (default 0).
Returns:
Cropped and perspective-corrected cell image.
Example:
>>> cell_img = grid.crop_cell(image, (0, 0)) >>> cv2.imwrite("cell_0_0.png", cell_img)
15class Split(Generic[T]): 16 """ 17 Container for paired left/right data with convenient manipulation methods. 18 19 The Split class is designed for working with table images that span two pages 20 or have distinct left and right sections. It allows you to: 21 - Store related data for both sides 22 - Apply functions to both sides simultaneously 23 - Access attributes/methods of contained objects transparently 24 25 Examples: 26 >>> # Create a split with different parameters for each side 27 >>> thresholds = Split(0.25, 0.30) 28 >>> 29 >>> # Apply a function to both sides 30 >>> images = Split(left_img, right_img) 31 >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) 32 >>> 33 >>> # Use with different parameters per side 34 >>> results = images.apply( 35 ... lambda img, k: sauvola_threshold(img, k), 36 ... k=thresholds # k.left used for left img, k.right for right 37 ... ) 38 >>> 39 >>> # Access methods of contained objects directly 40 >>> templates = Split(template_left, template_right) 41 >>> widths = templates.cell_widths(0) # Calls on both templates 42 43 Type Parameters: 44 T: The type of objects stored in left and right 45 """ 46 47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right 61 62 @property 63 def left(self) -> T: 64 assert self._left is not None 65 return self._left 66 67 @left.setter 68 def left(self, value: T): 69 self._left = value 70 71 @property 72 def right(self) -> T: 73 assert self._right is not None 74 return self._right 75 76 @right.setter 77 def right(self, value: T): 78 self._right = value 79 80 def append(self, value: T): 81 if self._left is None: 82 self._left = value 83 else: 84 self._right = value 85 86 def __repr__(self) -> str: 87 return f"left: {self._left}, right: {self._right}" 88 89 def __iter__(self): 90 assert self._left is not None 91 assert self._right is not None 92 return iter((self._left, self._right)) 93 94 def __getitem__(self, index: bool) -> T: 95 assert self._left is not None 96 assert self._right is not None 97 if int(index) == 0: 98 return self._left 99 else: 100 return self._right 101 102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right")) 126 127 def __getattr__(self, attr_name: str): 128 if attr_name in self.__dict__: 129 return getattr(self, attr_name) 130 131 def wrapper(*args, **kwargs): 132 return self.apply( 133 Split( 134 getattr(self.left.__class__, attr_name), 135 getattr(self.right.__class__, attr_name), 136 ), 137 *args, 138 **kwargs, 139 ) 140 141 return wrapper
Container for paired left/right data with convenient manipulation methods.
The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:
- Store related data for both sides
- Apply functions to both sides simultaneously
- Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side >>> thresholds = Split(0.25, 0.30) >>> >>> # Apply a function to both sides >>> images = Split(left_img, right_img) >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5))) >>> >>> # Use with different parameters per side >>> results = images.apply( ... lambda img, k: sauvola_threshold(img, k), ... k=thresholds # k.left used for left img, k.right for right ... ) >>> >>> # Access methods of contained objects directly >>> templates = Split(template_left, template_right) >>> widths = templates.cell_widths(0) # Calls on both templates
Type Parameters:
T: The type of objects stored in left and right
47 def __init__(self, left: T | None = None, right: T | None = None): 48 """ 49 Initialize a Split container. 50 51 Args: 52 left: Data for the left side 53 right: Data for the right side 54 55 Note: 56 Both can initially be None. Use the `append` method or set 57 properties directly to populate. 58 """ 59 self._left = left 60 self._right = right
Initialize a Split container.
Arguments:
- left: Data for the left side
- right: Data for the right side
Note:
Both can initially be None. Use the
appendmethod or set properties directly to populate.
102 def apply( 103 self, 104 funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]", 105 *args, 106 **kwargs, 107 ) -> "Split[V]": 108 if not isinstance(funcs, Split): 109 funcs = Split(funcs, funcs) 110 111 def get_arg(side: str, arg): 112 if isinstance(arg, Split): 113 return getattr(arg, side) 114 return arg 115 116 def call(side: str): 117 func = getattr(funcs, side) 118 target = getattr(self, side) 119 120 side_args = [get_arg(side, arg) for arg in args] 121 side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()} 122 123 return func(target, *side_args, **side_kwargs) 124 125 return Split(call("left"), call("right"))
41class Taulu: 42 """ 43 High-level API for table segmentation from images. 44 45 Taulu orchestrates header alignment, grid detection, and table segmentation 46 into a single workflow. 47 48 Workflow: 49 1. Create annotated header images via `Taulu.annotate()` 50 2. Initialize Taulu with header(s) and parameters 51 3. Call `segment_table()` to get a `TableGrid` with cell boundaries 52 53 For two-page tables, use `Split[T]` to provide different parameters for 54 left and right sides. 55 56 Example: 57 >>> from taulu import Taulu 58 >>> Taulu.annotate("table_image.png", "header.png") 59 >>> taulu = Taulu("header.png") 60 >>> grid = taulu.segment_table("table_page_01.png") 61 >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0)) 62 """ 63 64 def __init__( 65 self, 66 header_image_path: Splittable[PathLike[str]] | Splittable[str], 67 cell_height_factor: Splittable[float] | Splittable[list[float]] = [1.0], 68 header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None, 69 sauvola_k: Splittable[float] = 0.25, 70 search_region: Splittable[int] = 60, 71 distance_penalty: Splittable[float] = 0.4, 72 cross_width: Splittable[int] = 10, 73 morph_size: Splittable[int] = 4, 74 kernel_size: Splittable[int] = 41, 75 processing_scale: Splittable[float] = 1.0, 76 skip_astar_threshold: Splittable[float] = 0.2, 77 min_rows: Splittable[int] = 5, 78 look_distance: Splittable[int] = 3, 79 grow_threshold: Splittable[float] = 0.3, 80 smooth_grid: bool = False, 81 cuts: Splittable[int] = 3, 82 cut_fraction: Splittable[float] = 0.5, 83 ): 84 """ 85 Args: 86 header_image_path: Path to header template image(s). Use `Split` for two-page tables. 87 cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0] 88 header_anno_path: Explicit annotation JSON path. Default: inferred from image path. 89 sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25 90 search_region: Corner search area in pixels. Default: 60 91 distance_penalty: Position penalty weight [0, 1]. Default: 0.4 92 cross_width: Cross-kernel width matching line thickness. Default: 10 93 morph_size: Morphological dilation size. Default: 4 94 kernel_size: Cross-kernel size (odd). Default: 41 95 processing_scale: Image downscale factor (0, 1]. Default: 1.0 96 skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2 97 min_rows: Minimum rows before completion. Default: 5 98 look_distance: Rows to examine for extrapolation. Default: 3 99 grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3 100 smooth_grid: Apply grid smoothing after detection. Default: False 101 cuts: Number of grid cuts during growing. Default: 3 102 cut_fraction: Fraction of points to delete per cut. Default: 0.5 103 """ 104 self._processing_scale = processing_scale 105 self._cell_height_factor = cell_height_factor 106 self._smooth = smooth_grid 107 108 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 109 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 110 111 if not exists(header.left.with_suffix(".png")) or not exists( 112 header.right.with_suffix(".png") 113 ): 114 raise TauluException( 115 "The header images you provided do not exist (or they aren't .png files)" 116 ) 117 118 if header_anno_path is None: 119 if not exists(header.left.with_suffix(".json")) or not exists( 120 header.right.with_suffix(".json") 121 ): 122 raise TauluException( 123 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 124 ) 125 126 template_left = HeaderTemplate.from_saved( 127 header.left.with_suffix(".json") 128 ) 129 template_right = HeaderTemplate.from_saved( 130 header.right.with_suffix(".json") 131 ) 132 133 else: 134 if not exists(header_anno_path.left) or not exists( 135 header_anno_path.right 136 ): 137 raise TauluException( 138 "The header annotation files you provided do not exist (or they aren't .json files)" 139 ) 140 141 template_left = HeaderTemplate.from_saved(header_anno_path.left) 142 template_right = HeaderTemplate.from_saved(header_anno_path.right) 143 144 self._header = Split( 145 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 146 ) 147 148 self._aligner = Split( 149 HeaderAligner( 150 self._header.left, scale=get_param(self._processing_scale, "left") 151 ), 152 HeaderAligner( 153 self._header.right, scale=get_param(self._processing_scale, "right") 154 ), 155 ) 156 157 self._template = Split(template_left, template_right) 158 159 self._cell_heights = Split( 160 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 161 self._template.right.cell_heights( 162 get_param(cell_height_factor, "right") 163 ), 164 ) 165 166 # Create GridDetector for left and right with potentially different parameters 167 self._grid_detector = Split( 168 GridDetector( 169 kernel_size=get_param(kernel_size, "left"), 170 cross_width=get_param(cross_width, "left"), 171 morph_size=get_param(morph_size, "left"), 172 search_region=get_param(search_region, "left"), 173 sauvola_k=get_param(sauvola_k, "left"), 174 distance_penalty=get_param(distance_penalty, "left"), 175 scale=get_param(self._processing_scale, "left"), 176 skip_astar_threshold=get_param(skip_astar_threshold, "left"), 177 min_rows=get_param(min_rows, "left"), 178 look_distance=get_param(look_distance, "left"), 179 grow_threshold=get_param(grow_threshold, "left"), 180 cuts=get_param(cuts, "left"), 181 cut_fraction=get_param(cut_fraction, "left"), 182 ), 183 GridDetector( 184 kernel_size=get_param(kernel_size, "right"), 185 cross_width=get_param(cross_width, "right"), 186 morph_size=get_param(morph_size, "right"), 187 search_region=get_param(search_region, "right"), 188 sauvola_k=get_param(sauvola_k, "right"), 189 distance_penalty=get_param(distance_penalty, "right"), 190 scale=get_param(self._processing_scale, "right"), 191 skip_astar_threshold=get_param(skip_astar_threshold, "right"), 192 min_rows=get_param(min_rows, "right"), 193 look_distance=get_param(look_distance, "right"), 194 grow_threshold=get_param(grow_threshold, "right"), 195 cuts=get_param(cuts, "right"), 196 cut_fraction=get_param(cut_fraction, "right"), 197 ), 198 ) 199 200 else: 201 header_image_path = Path(header_image_path) 202 self._header = cv2.imread(os.fspath(header_image_path)) 203 self._aligner = HeaderAligner(self._header) 204 self._template = HeaderTemplate.from_saved( 205 header_image_path.with_suffix(".json") 206 ) 207 208 # For single header, parameters should not be Split objects 209 if any( 210 isinstance(param, Split) 211 for param in [ 212 sauvola_k, 213 search_region, 214 distance_penalty, 215 cross_width, 216 morph_size, 217 kernel_size, 218 processing_scale, 219 min_rows, 220 look_distance, 221 grow_threshold, 222 cell_height_factor, 223 cuts, 224 cut_fraction, 225 ] 226 ): 227 raise TauluException( 228 "Split parameters can only be used with split headers (tuple header_path)" 229 ) 230 231 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 232 233 self._grid_detector = GridDetector( 234 kernel_size=kernel_size, # ty: ignore 235 cross_width=cross_width, # ty: ignore 236 morph_size=morph_size, # ty: ignore 237 search_region=search_region, # ty: ignore 238 sauvola_k=sauvola_k, # ty: ignore 239 distance_penalty=distance_penalty, # ty: ignore 240 scale=self._processing_scale, # ty: ignore 241 skip_astar_threshold=skip_astar_threshold, # ty: ignore 242 min_rows=min_rows, # ty: ignore 243 look_distance=look_distance, # ty: ignore 244 grow_threshold=grow_threshold, # ty: ignore 245 cuts=cuts, 246 cut_fraction=cut_fraction, 247 ) 248 249 @staticmethod 250 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 251 """ 252 Interactive tool to create header annotations for table segmentation. 253 254 This method guides you through a two-step annotation process: 255 256 1. **Crop the header**: Click four corners to define the header region 257 2. **Annotate lines**: Click pairs of points to define each vertical and 258 horizontal line in the header 259 260 The annotations are saved as: 261 - A cropped header image (.png) at `output_path` 262 - A JSON file (.json) containing line coordinates 263 264 ## Annotation Guidelines 265 266 **Which lines to annotate:** 267 - All vertical lines that extend into the table body (column separators) 268 - The top horizontal line of the header 269 - The bottom horizontal line of the header (top of data rows) 270 271 **Order doesn't matter** - annotate lines in any order that's convenient. 272 273 **To annotate a line:** 274 1. Click once at one endpoint 275 2. Click again at the other endpoint 276 3. A green line appears showing your annotation 277 278 **To undo:** 279 - Right-click anywhere to remove the last line you drew 280 281 **When finished:** 282 - Press 'n' to save and exit 283 - Press 'q' to quit without saving 284 285 Args: 286 image_path (PathLike[str] | str): Path to a table image containing 287 a clear view of the header. This can be a full table image. 288 output_path (PathLike[str] | str): Where to save the cropped header 289 image. The annotation JSON will be saved with the same name but 290 .json extension. 291 292 Raises: 293 TauluException: If image_path doesn't exist or output_path is a directory 294 295 Examples: 296 Annotate a single header: 297 298 >>> from taulu import Taulu 299 >>> Taulu.annotate("scan_page_01.png", "header.png") 300 # Interactive window opens 301 # After annotation: creates header.png and header.json 302 303 Annotate left and right headers for a split table: 304 305 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 306 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 307 # Creates header_left.{png,json} and header_right.{png,json} 308 309 Notes: 310 - The header image doesn't need to be perfectly cropped initially - 311 the tool will help you crop it precisely 312 - Annotation accuracy is important: misaligned lines will cause 313 segmentation errors 314 - You can re-run this method to update annotations if needed 315 """ 316 317 if not exists(image_path): 318 raise TauluException(f"Image path {image_path} does not exist") 319 320 if os.path.isdir(output_path): 321 raise TauluException("Output path should be a file") 322 323 output_path = Path(output_path) 324 325 template = HeaderTemplate.annotate_image( 326 os.fspath(image_path), crop=output_path.with_suffix(".png") 327 ) 328 329 template.save(output_path.with_suffix(".json")) 330 331 def segment_table( 332 self, 333 image: MatLike | PathLike[str] | str, 334 filtered: Optional[MatLike | PathLike[str] | str] = None, 335 debug_view: bool = False, 336 ) -> TableGrid: 337 """ 338 Segment a table image into a grid of cells. 339 340 Orchestrates header alignment, grid detection, corner growing, and 341 extrapolation to produce a complete grid structure. 342 343 Args: 344 image: Table image to segment (file path or numpy array). 345 filtered: Optional pre-filtered binary image for corner detection. 346 If provided, binarization parameters are ignored. 347 debug_view: Show intermediate processing steps. Press 'n' to advance, 348 'q' to quit. Default: False 349 350 Returns: 351 TableGrid: Grid structure with methods for cell access (`crop_cell`, 352 `cell_polygon`), visualization (`show_cells`), and persistence 353 (`save`, `from_saved`). 354 355 Raises: 356 TauluException: If image cannot be loaded or grid detection fails. 357 """ 358 359 if not isinstance(image, MatLike): 360 image = cast(str | PathLike[str], image) 361 image = cv2.imread(os.fspath(image)) 362 363 now = perf_counter() 364 h = self._aligner.align(image, visual=debug_view) 365 align_time = perf_counter() - now 366 logger.info(f"Header alignment took {align_time:.2f} seconds") 367 368 # find the starting point for the table grid algorithm 369 370 def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray): 371 top_row = [] 372 for x in range(template.cols + 1): 373 on_template = template.intersection((1, x)) 374 on_template = (int(on_template[0]), int(on_template[1])) 375 376 on_img = aligner.template_to_img(h, on_template) 377 378 top_row.append(on_img) 379 380 return top_row 381 382 if isinstance(self._aligner, Split): 383 top_row = Split( 384 make_top_row(self._template.left, self._aligner.left, h.left), # ty:ignore 385 make_top_row(self._template.right, self._aligner.right, h.right), # ty:ignore 386 ) 387 else: 388 top_row = make_top_row(self._template, self._aligner, h) # ty:ignore 389 390 now = perf_counter() 391 table = self._grid_detector.find_table_points( 392 image, # ty:ignore 393 top_row, # ty:ignore 394 self._template.cell_widths(0), 395 self._cell_heights, # ty:ignore 396 visual=debug_view, 397 filtered=filtered, # ty:ignore 398 smooth=self._smooth, 399 ) 400 grid_time = perf_counter() - now 401 logger.info(f"Grid detection took {grid_time:.2f} seconds") 402 403 if isinstance(table, Split): 404 table = TableGrid.from_split(table, (0, 0)) # ty: ignore 405 406 return table
High-level API for table segmentation from images.
Taulu orchestrates header alignment, grid detection, and table segmentation into a single workflow.
Workflow:
- Create annotated header images via
Taulu.annotate()- Initialize Taulu with header(s) and parameters
- Call
segment_table()to get aTableGridwith cell boundaries
For two-page tables, use Split[T] to provide different parameters for
left and right sides.
Example:
>>> from taulu import Taulu >>> Taulu.annotate("table_image.png", "header.png") >>> taulu = Taulu("header.png") >>> grid = taulu.segment_table("table_page_01.png") >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
64 def __init__( 65 self, 66 header_image_path: Splittable[PathLike[str]] | Splittable[str], 67 cell_height_factor: Splittable[float] | Splittable[list[float]] = [1.0], 68 header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None, 69 sauvola_k: Splittable[float] = 0.25, 70 search_region: Splittable[int] = 60, 71 distance_penalty: Splittable[float] = 0.4, 72 cross_width: Splittable[int] = 10, 73 morph_size: Splittable[int] = 4, 74 kernel_size: Splittable[int] = 41, 75 processing_scale: Splittable[float] = 1.0, 76 skip_astar_threshold: Splittable[float] = 0.2, 77 min_rows: Splittable[int] = 5, 78 look_distance: Splittable[int] = 3, 79 grow_threshold: Splittable[float] = 0.3, 80 smooth_grid: bool = False, 81 cuts: Splittable[int] = 3, 82 cut_fraction: Splittable[float] = 0.5, 83 ): 84 """ 85 Args: 86 header_image_path: Path to header template image(s). Use `Split` for two-page tables. 87 cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0] 88 header_anno_path: Explicit annotation JSON path. Default: inferred from image path. 89 sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25 90 search_region: Corner search area in pixels. Default: 60 91 distance_penalty: Position penalty weight [0, 1]. Default: 0.4 92 cross_width: Cross-kernel width matching line thickness. Default: 10 93 morph_size: Morphological dilation size. Default: 4 94 kernel_size: Cross-kernel size (odd). Default: 41 95 processing_scale: Image downscale factor (0, 1]. Default: 1.0 96 skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2 97 min_rows: Minimum rows before completion. Default: 5 98 look_distance: Rows to examine for extrapolation. Default: 3 99 grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3 100 smooth_grid: Apply grid smoothing after detection. Default: False 101 cuts: Number of grid cuts during growing. Default: 3 102 cut_fraction: Fraction of points to delete per cut. Default: 0.5 103 """ 104 self._processing_scale = processing_scale 105 self._cell_height_factor = cell_height_factor 106 self._smooth = smooth_grid 107 108 if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split): 109 header = Split(Path(header_image_path.left), Path(header_image_path.right)) 110 111 if not exists(header.left.with_suffix(".png")) or not exists( 112 header.right.with_suffix(".png") 113 ): 114 raise TauluException( 115 "The header images you provided do not exist (or they aren't .png files)" 116 ) 117 118 if header_anno_path is None: 119 if not exists(header.left.with_suffix(".json")) or not exists( 120 header.right.with_suffix(".json") 121 ): 122 raise TauluException( 123 "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method" 124 ) 125 126 template_left = HeaderTemplate.from_saved( 127 header.left.with_suffix(".json") 128 ) 129 template_right = HeaderTemplate.from_saved( 130 header.right.with_suffix(".json") 131 ) 132 133 else: 134 if not exists(header_anno_path.left) or not exists( 135 header_anno_path.right 136 ): 137 raise TauluException( 138 "The header annotation files you provided do not exist (or they aren't .json files)" 139 ) 140 141 template_left = HeaderTemplate.from_saved(header_anno_path.left) 142 template_right = HeaderTemplate.from_saved(header_anno_path.right) 143 144 self._header = Split( 145 cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right)) 146 ) 147 148 self._aligner = Split( 149 HeaderAligner( 150 self._header.left, scale=get_param(self._processing_scale, "left") 151 ), 152 HeaderAligner( 153 self._header.right, scale=get_param(self._processing_scale, "right") 154 ), 155 ) 156 157 self._template = Split(template_left, template_right) 158 159 self._cell_heights = Split( 160 self._template.left.cell_heights(get_param(cell_height_factor, "left")), 161 self._template.right.cell_heights( 162 get_param(cell_height_factor, "right") 163 ), 164 ) 165 166 # Create GridDetector for left and right with potentially different parameters 167 self._grid_detector = Split( 168 GridDetector( 169 kernel_size=get_param(kernel_size, "left"), 170 cross_width=get_param(cross_width, "left"), 171 morph_size=get_param(morph_size, "left"), 172 search_region=get_param(search_region, "left"), 173 sauvola_k=get_param(sauvola_k, "left"), 174 distance_penalty=get_param(distance_penalty, "left"), 175 scale=get_param(self._processing_scale, "left"), 176 skip_astar_threshold=get_param(skip_astar_threshold, "left"), 177 min_rows=get_param(min_rows, "left"), 178 look_distance=get_param(look_distance, "left"), 179 grow_threshold=get_param(grow_threshold, "left"), 180 cuts=get_param(cuts, "left"), 181 cut_fraction=get_param(cut_fraction, "left"), 182 ), 183 GridDetector( 184 kernel_size=get_param(kernel_size, "right"), 185 cross_width=get_param(cross_width, "right"), 186 morph_size=get_param(morph_size, "right"), 187 search_region=get_param(search_region, "right"), 188 sauvola_k=get_param(sauvola_k, "right"), 189 distance_penalty=get_param(distance_penalty, "right"), 190 scale=get_param(self._processing_scale, "right"), 191 skip_astar_threshold=get_param(skip_astar_threshold, "right"), 192 min_rows=get_param(min_rows, "right"), 193 look_distance=get_param(look_distance, "right"), 194 grow_threshold=get_param(grow_threshold, "right"), 195 cuts=get_param(cuts, "right"), 196 cut_fraction=get_param(cut_fraction, "right"), 197 ), 198 ) 199 200 else: 201 header_image_path = Path(header_image_path) 202 self._header = cv2.imread(os.fspath(header_image_path)) 203 self._aligner = HeaderAligner(self._header) 204 self._template = HeaderTemplate.from_saved( 205 header_image_path.with_suffix(".json") 206 ) 207 208 # For single header, parameters should not be Split objects 209 if any( 210 isinstance(param, Split) 211 for param in [ 212 sauvola_k, 213 search_region, 214 distance_penalty, 215 cross_width, 216 morph_size, 217 kernel_size, 218 processing_scale, 219 min_rows, 220 look_distance, 221 grow_threshold, 222 cell_height_factor, 223 cuts, 224 cut_fraction, 225 ] 226 ): 227 raise TauluException( 228 "Split parameters can only be used with split headers (tuple header_path)" 229 ) 230 231 self._cell_heights = self._template.cell_heights(self._cell_height_factor) 232 233 self._grid_detector = GridDetector( 234 kernel_size=kernel_size, # ty: ignore 235 cross_width=cross_width, # ty: ignore 236 morph_size=morph_size, # ty: ignore 237 search_region=search_region, # ty: ignore 238 sauvola_k=sauvola_k, # ty: ignore 239 distance_penalty=distance_penalty, # ty: ignore 240 scale=self._processing_scale, # ty: ignore 241 skip_astar_threshold=skip_astar_threshold, # ty: ignore 242 min_rows=min_rows, # ty: ignore 243 look_distance=look_distance, # ty: ignore 244 grow_threshold=grow_threshold, # ty: ignore 245 cuts=cuts, 246 cut_fraction=cut_fraction, 247 )
Arguments:
- header_image_path: Path to header template image(s). Use
Splitfor two-page tables. - cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
- header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
- sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
- search_region: Corner search area in pixels. Default: 60
- distance_penalty: Position penalty weight [0, 1]. Default: 0.4
- cross_width: Cross-kernel width matching line thickness. Default: 10
- morph_size: Morphological dilation size. Default: 4
- kernel_size: Cross-kernel size (odd). Default: 41
- processing_scale: Image downscale factor (0, 1]. Default: 1.0
- skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
- min_rows: Minimum rows before completion. Default: 5
- look_distance: Rows to examine for extrapolation. Default: 3
- grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
- smooth_grid: Apply grid smoothing after detection. Default: False
- cuts: Number of grid cuts during growing. Default: 3
- cut_fraction: Fraction of points to delete per cut. Default: 0.5
249 @staticmethod 250 def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str): 251 """ 252 Interactive tool to create header annotations for table segmentation. 253 254 This method guides you through a two-step annotation process: 255 256 1. **Crop the header**: Click four corners to define the header region 257 2. **Annotate lines**: Click pairs of points to define each vertical and 258 horizontal line in the header 259 260 The annotations are saved as: 261 - A cropped header image (.png) at `output_path` 262 - A JSON file (.json) containing line coordinates 263 264 ## Annotation Guidelines 265 266 **Which lines to annotate:** 267 - All vertical lines that extend into the table body (column separators) 268 - The top horizontal line of the header 269 - The bottom horizontal line of the header (top of data rows) 270 271 **Order doesn't matter** - annotate lines in any order that's convenient. 272 273 **To annotate a line:** 274 1. Click once at one endpoint 275 2. Click again at the other endpoint 276 3. A green line appears showing your annotation 277 278 **To undo:** 279 - Right-click anywhere to remove the last line you drew 280 281 **When finished:** 282 - Press 'n' to save and exit 283 - Press 'q' to quit without saving 284 285 Args: 286 image_path (PathLike[str] | str): Path to a table image containing 287 a clear view of the header. This can be a full table image. 288 output_path (PathLike[str] | str): Where to save the cropped header 289 image. The annotation JSON will be saved with the same name but 290 .json extension. 291 292 Raises: 293 TauluException: If image_path doesn't exist or output_path is a directory 294 295 Examples: 296 Annotate a single header: 297 298 >>> from taulu import Taulu 299 >>> Taulu.annotate("scan_page_01.png", "header.png") 300 # Interactive window opens 301 # After annotation: creates header.png and header.json 302 303 Annotate left and right headers for a split table: 304 305 >>> Taulu.annotate("scan_page_01.png", "header_left.png") 306 >>> Taulu.annotate("scan_page_01.png", "header_right.png") 307 # Creates header_left.{png,json} and header_right.{png,json} 308 309 Notes: 310 - The header image doesn't need to be perfectly cropped initially - 311 the tool will help you crop it precisely 312 - Annotation accuracy is important: misaligned lines will cause 313 segmentation errors 314 - You can re-run this method to update annotations if needed 315 """ 316 317 if not exists(image_path): 318 raise TauluException(f"Image path {image_path} does not exist") 319 320 if os.path.isdir(output_path): 321 raise TauluException("Output path should be a file") 322 323 output_path = Path(output_path) 324 325 template = HeaderTemplate.annotate_image( 326 os.fspath(image_path), crop=output_path.with_suffix(".png") 327 ) 328 329 template.save(output_path.with_suffix(".json"))
Interactive tool to create header annotations for table segmentation.
This method guides you through a two-step annotation process:
- Crop the header: Click four corners to define the header region
- Annotate lines: Click pairs of points to define each vertical and horizontal line in the header
The annotations are saved as:
- A cropped header image (.png) at
output_path - A JSON file (.json) containing line coordinates
Annotation Guidelines
Which lines to annotate:
- All vertical lines that extend into the table body (column separators)
- The top horizontal line of the header
- The bottom horizontal line of the header (top of data rows)
Order doesn't matter - annotate lines in any order that's convenient.
To annotate a line:
- Click once at one endpoint
- Click again at the other endpoint
- A green line appears showing your annotation
To undo:
- Right-click anywhere to remove the last line you drew
When finished:
- Press 'n' to save and exit
- Press 'q' to quit without saving
Arguments:
- image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
- output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
- TauluException: If image_path doesn't exist or output_path is a directory
Examples:
Annotate a single header:
>>> from taulu import Taulu >>> Taulu.annotate("scan_page_01.png", "header.png") <h1 id="interactive-window-opens">Interactive window opens</h1>After annotation: creates header.png and header.json
Annotate left and right headers for a split table:
>>> Taulu.annotate("scan_page_01.png", "header_left.png") >>> Taulu.annotate("scan_page_01.png", "header_right.png") <h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
- The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
- Annotation accuracy is important: misaligned lines will cause segmentation errors
- You can re-run this method to update annotations if needed
331 def segment_table( 332 self, 333 image: MatLike | PathLike[str] | str, 334 filtered: Optional[MatLike | PathLike[str] | str] = None, 335 debug_view: bool = False, 336 ) -> TableGrid: 337 """ 338 Segment a table image into a grid of cells. 339 340 Orchestrates header alignment, grid detection, corner growing, and 341 extrapolation to produce a complete grid structure. 342 343 Args: 344 image: Table image to segment (file path or numpy array). 345 filtered: Optional pre-filtered binary image for corner detection. 346 If provided, binarization parameters are ignored. 347 debug_view: Show intermediate processing steps. Press 'n' to advance, 348 'q' to quit. Default: False 349 350 Returns: 351 TableGrid: Grid structure with methods for cell access (`crop_cell`, 352 `cell_polygon`), visualization (`show_cells`), and persistence 353 (`save`, `from_saved`). 354 355 Raises: 356 TauluException: If image cannot be loaded or grid detection fails. 357 """ 358 359 if not isinstance(image, MatLike): 360 image = cast(str | PathLike[str], image) 361 image = cv2.imread(os.fspath(image)) 362 363 now = perf_counter() 364 h = self._aligner.align(image, visual=debug_view) 365 align_time = perf_counter() - now 366 logger.info(f"Header alignment took {align_time:.2f} seconds") 367 368 # find the starting point for the table grid algorithm 369 370 def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray): 371 top_row = [] 372 for x in range(template.cols + 1): 373 on_template = template.intersection((1, x)) 374 on_template = (int(on_template[0]), int(on_template[1])) 375 376 on_img = aligner.template_to_img(h, on_template) 377 378 top_row.append(on_img) 379 380 return top_row 381 382 if isinstance(self._aligner, Split): 383 top_row = Split( 384 make_top_row(self._template.left, self._aligner.left, h.left), # ty:ignore 385 make_top_row(self._template.right, self._aligner.right, h.right), # ty:ignore 386 ) 387 else: 388 top_row = make_top_row(self._template, self._aligner, h) # ty:ignore 389 390 now = perf_counter() 391 table = self._grid_detector.find_table_points( 392 image, # ty:ignore 393 top_row, # ty:ignore 394 self._template.cell_widths(0), 395 self._cell_heights, # ty:ignore 396 visual=debug_view, 397 filtered=filtered, # ty:ignore 398 smooth=self._smooth, 399 ) 400 grid_time = perf_counter() - now 401 logger.info(f"Grid detection took {grid_time:.2f} seconds") 402 403 if isinstance(table, Split): 404 table = TableGrid.from_split(table, (0, 0)) # ty: ignore 405 406 return table
Segment a table image into a grid of cells.
Orchestrates header alignment, grid detection, corner growing, and extrapolation to produce a complete grid structure.
Arguments:
- image: Table image to segment (file path or numpy array).
- filtered: Optional pre-filtered binary image for corner detection. If provided, binarization parameters are ignored.
- debug_view: Show intermediate processing steps. Press 'n' to advance, 'q' to quit. Default: False
Returns:
TableGrid: Grid structure with methods for cell access (
crop_cell,cell_polygon), visualization (show_cells), and persistence (save,from_saved).
Raises:
- TauluException: If image cannot be loaded or grid detection fails.