taulu

Taulu - segment tables from images

Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).

To use this package, you first need to make an annotation of the headers in your table images. The idea is that these headers will be similar across your full set of images, and they will be used as a starting point for the search algorithm that finds the table grid.

Here is an example python script of how to use Taulu:

from taulu import Taulu
import os


def setup():
    # create an Annotation file of the headers in the image
    # (one for the left header, one for the right)
    # and store them in the examples directory
    print("Annotating the LEFT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")

    print("Annotating the RIGHT header...")
    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")


def main():
    taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png"))
    table = taulu.segment_table("../data/table_00.png",  cell_height_factor=0.8, debug_view=True)

    table.show_cells("../data/table_00.png")


if __name__ == "__main__":
    if os.path.exists("table_00_header_left.png") and os.path.exists(
        "table_00_header_right.png"
    ):
        main()
    else:
        setup()
        main()

If you want a high-level overview of how to use Taulu, see .taulu.Taulu">the Taulu class

 1"""
 2Taulu - *segment tables from images*
 3
 4Taulu is a Python package designed to segment images of tables into their constituent rows and columns (and cells).
 5
 6To use this package, you first need to make an annotation of the headers in your table images.
 7The idea is that these headers will be similar across your full set of images, and they will be
 8used as a starting point for the search algorithm that finds the table grid.
 9
10Here is an example python script of how to use Taulu:
11```python
12from taulu import Taulu
13import os
14
15
16def setup():
17    # create an Annotation file of the headers in the image
18    # (one for the left header, one for the right)
19    # and store them in the examples directory
20    print("Annotating the LEFT header...")
21    Taulu.annotate("../data/table_00.png", "table_00_header_left.png")
22
23    print("Annotating the RIGHT header...")
24    Taulu.annotate("../data/table_00.png", "table_00_header_right.png")
25
26
27def main():
28    taulu = Taulu(("table_00_header_left.png", "table_00_header_right.png"))
29    table = taulu.segment_table("../data/table_00.png",  cell_height_factor=0.8, debug_view=True)
30
31    table.show_cells("../data/table_00.png")
32
33
34if __name__ == "__main__":
35    if os.path.exists("table_00_header_left.png") and os.path.exists(
36        "table_00_header_right.png"
37    ):
38        main()
39    else:
40        setup()
41        main()
42
43```
44
45If you want a high-level overview of how to use Taulu, see [the Taulu class](./taulu.html#taulu.taulu.Taulu)
46"""
47
48from .grid import GridDetector, TableGrid
49from .header_aligner import HeaderAligner
50from .header_template import HeaderTemplate
51from .table_indexer import TableIndexer
52from .split import Split
53from .taulu import Taulu
54
55__pdoc__ = {}
56__pdoc__["constants"] = False
57__pdoc__["main"] = False
58__pdoc__["decorators"] = False
59__pdoc__["error"] = False
60__pdoc__["types"] = False
61__pdoc__["img_util"] = False
62
63__all__ = [
64    "GridDetector",
65    "TableGrid",
66    "HeaderAligner",
67    "HeaderTemplate",
68    "TableIndexer",
69    "Split",
70    "Taulu",
71]
72
73try:
74    from . import gpu
75
76    __all__.append("gpu")
77except ImportError:
78    pass
class GridDetector:
120class GridDetector:
121    """
122    Detects table grid intersections using morphological filtering and template matching.
123
124    This detector implements a multi-stage pipeline:
125
126    1. **Binarization**: Sauvola adaptive thresholding to handle varying lighting
127    2. **Morphological operations**: Dilation to connect broken rule segments
128    3. **Cross-kernel matching**: Template matching with a cross-shaped kernel to find
129       rule intersections where horizontal and vertical lines meet
130    4. **Grid growing**: Iterative point detection starting from a known seed point
131
132    The cross-kernel is designed to match the specific geometry of your table rules.
133    It should be sized so that after morphology, it aligns with actual corner shapes.
134
135    ## Tuning Guidelines
136
137    - **kernel_size**: Increase if you need more selectivity (fewer false positives)
138    - **cross_width/height**: Should match rule thickness after morphology
139    - **morph_size**: Increase to connect more broken lines, but this thickens rules
140    - **sauvola_k**: Increase to threshold more aggressively (remove noise)
141    - **search_region**: Increase for documents with more warping/distortion
142    - **distance_penalty**: Increase to prefer corners closer to expected positions
143
144    ## Visual Debugging
145
146    Set `visual=True` in methods to see intermediate results and tune parameters.
147    """
148
149    def __init__(
150        self,
151        kernel_size: int = 21,
152        cross_width: int = 6,
153        cross_height: Optional[int] = None,
154        morph_size: Optional[int] = None,
155        sauvola_k: float = 0.04,
156        sauvola_window: int = 15,
157        scale: float = 1.0,
158        search_region: int = 40,
159        distance_penalty: float = 0.4,
160        skip_astar_threshold: float = 0.2,
161        min_rows: int = 5,
162        grow_threshold: float = 0.3,
163        look_distance: int = 4,
164        cuts: int = 3,
165        cut_fraction: float = 0.5,
166    ):
167        """
168        Args:
169            kernel_size (int): the size of the cross kernel
170                a larger kernel size often means that more penalty is applied, often leading
171                to more sparse results
172            cross_width (int): the width of one of the edges in the cross filter, should be
173                roughly equal to the width of the rules in the image after morphology is applied
174            cross_height (int | None): useful if the horizontal rules and vertical rules
175                have different sizes
176            morph_size (int | None): the size of the morphology operators that are applied before
177                the cross kernel. 'bridges the gaps' of broken-up lines
178            sauvola_k (float): threshold parameter for sauvola thresholding
179            sauvola_window (int): window_size parameter for sauvola thresholding
180            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
181            search_region (int): area in which to search for a new max value in `find_nearest` etc.
182            distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
183            skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
184            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
185            grow_threshold (float): the threshold for accepting a new point when growing the table
186            look_distance (int): how many points away to look when calculating the median slope
187            cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
188            cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
189        """
190        self._validate_parameters(
191            kernel_size,
192            cross_width,
193            cross_height,
194            morph_size,
195            search_region,
196            sauvola_k,
197            sauvola_window,
198            distance_penalty,
199            skip_astar_threshold,
200            cuts,
201            cut_fraction,
202        )
203
204        self._kernel_size = kernel_size
205        self._cross_width = cross_width
206        self._cross_height = cross_width if cross_height is None else cross_height
207        self._morph_size = morph_size if morph_size is not None else cross_width
208        self._search_region = search_region
209        self._sauvola_k = sauvola_k
210        self._sauvola_window = sauvola_window
211        self._distance_penalty = distance_penalty
212        self._scale = scale
213        self._skip_astar_threshold = skip_astar_threshold
214        self._min_rows = min_rows
215        self._grow_threshold = grow_threshold
216        self._look_distance = look_distance
217        self._cuts = cuts
218        self._cut_fraction = cut_fraction
219
220        self._cross_kernel = self._create_cross_kernel()
221
222    def _validate_parameters(
223        self,
224        kernel_size: int,
225        cross_width: int,
226        cross_height: Optional[int],
227        morph_size: Optional[int],
228        search_region: int,
229        sauvola_k: float,
230        sauvola_window: int,
231        distance_penalty: float,
232        skip_astar_threshold: float,
233        cuts: int,
234        cut_fraction: float,
235    ) -> None:
236        """Validate initialization parameters."""
237        if kernel_size % 2 == 0:
238            raise ValueError("kernel_size must be odd")
239        if (
240            kernel_size <= 0
241            or cross_width <= 0
242            or search_region <= 0
243            or sauvola_window <= 0
244        ):
245            raise ValueError("Size parameters must be positive")
246        if cross_height is not None and cross_height <= 0:
247            raise ValueError("cross_height must be positive")
248        if morph_size is not None and morph_size <= 0:
249            raise ValueError("morph_size must be positive")
250        if not 0 <= distance_penalty <= 1:
251            raise ValueError("distance_penalty must be in [0, 1]")
252        if sauvola_k <= 0:
253            raise ValueError("sauvola_k must be positive")
254        if skip_astar_threshold < 0 or skip_astar_threshold > 1:
255            raise ValueError("skip_astar_threshold must be in [0, 1]")
256        if cut_fraction < 0 or cut_fraction > 1:
257            raise ValueError("cut_fraction must be in [0, 1]")
258        if cuts < 0:
259            raise ValueError("cuts must be zero or positive")
260
261    def _create_gaussian_weights(self, region_size: int) -> NDArray:
262        """
263        Create a 2D Gaussian weight mask.
264
265        Args:
266            shape (tuple[int, int]): Shape of the region (height, width)
267            p (float): Minimum value at the edge = 1 - p
268
269        Returns:
270            NDArray: Gaussian weight mask
271        """
272        if self._distance_penalty == 0:
273            return np.ones((region_size, region_size), dtype=np.float32)
274
275        y = np.linspace(-1, 1, region_size)
276        x = np.linspace(-1, 1, region_size)
277        xv, yv = np.meshgrid(x, y)
278        dist_squared = xv**2 + yv**2
279
280        # Prevent log(0) when distance_penalty is 1
281        if self._distance_penalty >= 0.999:
282            sigma = 0.1  # Small sigma for very sharp peak
283        else:
284            sigma = np.sqrt(-1 / (2 * np.log(1 - self._distance_penalty)))
285
286        weights = np.exp(-dist_squared / (2 * sigma**2))
287
288        return weights.astype(np.float32)
289
290    def _create_cross_kernel(self) -> NDArray:
291        kernel = np.zeros((self._kernel_size, self._kernel_size), dtype=np.uint8)
292        center = self._kernel_size // 2
293
294        # Create horizontal bar
295        h_start = max(0, center - self._cross_height // 2)
296        h_end = min(self._kernel_size, center + (self._cross_height + 1) // 2)
297        kernel[h_start:h_end, :] = 255
298
299        # Create vertical bar
300        v_start = max(0, center - self._cross_width // 2)
301        v_end = min(self._kernel_size, center + (self._cross_width + 1) // 2)
302        kernel[:, v_start:v_end] = 255
303
304        return kernel
305
306    def _apply_morphology(self, binary: MatLike) -> MatLike:
307        # Define a horizontal kernel (adjust width as needed)
308        kernel_hor = cv.getStructuringElement(cv.MORPH_RECT, (self._morph_size, 1))
309        kernel_ver = cv.getStructuringElement(cv.MORPH_RECT, (1, self._morph_size))
310
311        # Apply dilation
312        dilated = cv.dilate(binary, kernel_hor, iterations=1)
313        dilated = cv.dilate(dilated, kernel_ver, iterations=1)
314
315        return dilated
316
317    def _apply_cross_matching(self, img: MatLike) -> MatLike:
318        """Apply cross kernel template matching."""
319        pad_y = self._cross_kernel.shape[0] // 2
320        pad_x = self._cross_kernel.shape[1] // 2
321
322        padded = cv.copyMakeBorder(
323            img, pad_y, pad_y, pad_x, pad_x, borderType=cv.BORDER_CONSTANT, value=0
324        )
325
326        filtered = cv.matchTemplate(padded, self._cross_kernel, cv.TM_SQDIFF_NORMED)
327        # Invert and normalize to 0-255 range
328        filtered = cv.normalize(1.0 - filtered, None, 0, 255, cv.NORM_MINMAX)
329        return filtered.astype(np.uint8)
330
331    def apply(self, img: MatLike, visual: bool = False) -> MatLike:
332        """
333        Apply the grid detection filter to the input image.
334
335        Args:
336            img (MatLike): the input image
337            visual (bool): whether to show intermediate steps
338
339        Returns:
340            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
341        """
342
343        if img is None or img.size == 0:
344            raise ValueError("Input image is empty or None")
345
346        binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window)
347
348        if visual:
349            imu.show(binary, title="thresholded")
350
351        binary = self._apply_morphology(binary)
352
353        if visual:
354            imu.show(binary, title="dilated")
355
356        filtered = self._apply_cross_matching(binary)
357
358        return filtered
359
360    @log_calls(level=logging.DEBUG, include_return=True)
361    def find_nearest(
362        self, filtered: MatLike, point: Point, region: Optional[int] = None
363    ) -> Tuple[Point, float]:
364        """
365        Find the nearest 'corner match' in the image, along with its score [0,1]
366
367        Args:
368            filtered (MatLike): the filtered image (obtained through `apply`)
369            point (tuple[int, int]): the approximate target point (x, y)
370            region (None | int): alternative value for search region,
371                overwriting the `__init__` parameter `region`
372        """
373
374        if filtered is None or filtered.size == 0:
375            raise ValueError("Filtered image is empty or None")
376
377        region_size = region if region is not None else self._search_region
378        x, y = point
379
380        # Calculate crop boundaries
381        crop_x = max(0, x - region_size // 2)
382        crop_y = max(0, y - region_size // 2)
383        crop_width = min(region_size, filtered.shape[1] - crop_x)
384        crop_height = min(region_size, filtered.shape[0] - crop_y)
385
386        # Handle edge cases
387        if crop_width <= 0 or crop_height <= 0:
388            logger.warning(f"Point {point} is outside image bounds")
389            return point, 0.0
390
391        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
392
393        if cropped.size == 0:
394            return point, 0.0
395
396        # Always apply Gaussian weighting by extending crop if needed
397        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
398            # Perfect size - apply weights directly
399            weights = self._create_gaussian_weights(region_size)
400            weighted = cropped.astype(np.float32) * weights
401        else:
402            # Extend crop to match region_size, apply weights, then restore
403            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
404
405            # Calculate offset to center the cropped region in extended array
406            offset_y = (region_size - cropped.shape[0]) // 2
407            offset_x = (region_size - cropped.shape[1]) // 2
408
409            # Place cropped region in center of extended array
410            extended[
411                offset_y : offset_y + cropped.shape[0],
412                offset_x : offset_x + cropped.shape[1],
413            ] = cropped
414
415            # Apply Gaussian weights to extended array
416            weights = self._create_gaussian_weights(region_size)
417            weighted_extended = extended.astype(np.float32) * weights
418
419            # Extract the original region back out
420            weighted = weighted_extended[
421                offset_y : offset_y + cropped.shape[0],
422                offset_x : offset_x + cropped.shape[1],
423            ]
424
425        best_idx = np.argmax(weighted)
426        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
427
428        result_point = (
429            int(crop_x + best_x),
430            int(crop_y + best_y),
431        )
432        result_confidence = float(weighted[best_y, best_x]) / 255.0
433
434        return result_point, result_confidence
435
436    def find_table_points(
437        self,
438        img: MatLike | PathLike[str],
439        top_row: list[Point | None],
440        cell_widths: list[int],
441        cell_heights: list[int] | int,
442        visual: bool = False,
443        window: str = WINDOW,
444        goals_width: Optional[int] = None,
445        filtered: Optional[MatLike | PathLike[str]] = None,
446        smooth: bool = False,
447    ) -> "TableGrid":
448        """
449        Parse the image to a `TableGrid` structure that holds all of the
450        intersections between horizontal and vertical rules, starting near the `left_top` point
451
452        Args:
453            img (MatLike): the input image of a table
454            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
455            cell_widths (list[int]): the expected widths of the cells (based on a header template)
456            cell_heights (list[int]): the expected height of the rows of data.
457                The last value from this list is used until the image has no more vertical space.
458            visual (bool): whether to show intermediate steps
459            window (str): the name of the OpenCV window to use for visualization
460            goals_width (int | None): the width of the goal region when searching for the next point.
461                If None, defaults to 1.5 * search_region
462            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
463                calculating the filtered image from scratch
464            smooth (bool): if True, smooth the grid after detection, using local heuristics
465
466        Returns:
467            a TableGrid object
468        """
469
470        if goals_width is None:
471            goals_width = self._search_region * 3 // 2
472
473        if not cell_widths:
474            raise ValueError("cell_widths must contain at least one value")
475
476        if not isinstance(img, np.ndarray):
477            img = cv.imread(os.fspath(img))
478
479        if filtered is None:
480            filtered = self.apply(img, visual)
481        else:
482            if not isinstance(filtered, np.ndarray):
483                filtered = cv.imread(os.fspath(filtered))
484
485            filtered = ensure_gray(filtered)
486
487        if visual:
488            imu.show(filtered, window=window)
489
490        if isinstance(cell_heights, int):
491            cell_heights = [cell_heights]
492
493        for i in range(len(top_row)):
494            if top_row[i] is None:
495                continue
496
497            adjusted, confidence = self.find_nearest(
498                filtered, top_row[i], int(self._search_region * 2)
499            )
500
501            if confidence < 0.15:
502                top_row[i] = None
503            else:
504                top_row[i] = adjusted
505
506        if not any(top_row):
507            logger.error("No good starting candidates given")
508
509        # resize all parameters according to scale
510        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
511
512        if visual:
513            imu.push(img)
514
515        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
516        cell_widths = [int(w * self._scale) for w in cell_widths]
517        cell_heights = [int(h * self._scale) for h in cell_heights]
518        top_row = [
519            (int(p[0] * self._scale), int(p[1] * self._scale))
520            if p is not None
521            else None
522            for p in top_row
523        ]
524        search_region = int(self._search_region * self._scale)
525
526        img_gray = ensure_gray(img)
527        filtered_gray = ensure_gray(filtered)
528
529        table_grower = TableGrower(
530            filtered_gray,
531            cell_widths,  # pyright: ignore
532            cell_heights,  # pyright: ignore
533            top_row,
534            search_region,
535            self._distance_penalty,
536            self._look_distance,
537            self._grow_threshold,
538            self._skip_astar_threshold,
539            self._min_rows,
540            self._cuts,
541            self._cut_fraction,
542        )
543
544        def show_grower_progress(wait: bool = False):
545            img_orig = np.copy(img)
546            corners = table_grower.get_all_corners()
547            for y in range(len(corners)):
548                for x in range(len(corners[y])):
549                    if corners[y][x] is not None:
550                        img_orig = imu.draw_points(
551                            img_orig,
552                            [corners[y][x]],
553                            color=(0, 0, 255),
554                            thickness=30,
555                        )
556
557            edge = table_grower.get_edge_points()
558
559            for point, score in edge:
560                color = (100, int(clamp(score * 255, 0, 255)), 100)
561                imu.draw_point(img_orig, point, color=color, thickness=20)
562
563            imu.show(img_orig, wait=wait)
564
565        if visual:
566            threshold = self._grow_threshold
567            look_distance = self._look_distance
568
569            # python implementation of rust loops, for visualization purposes
570            # note this is a LOT slower
571            while table_grower.grow_point(img_gray, filtered_gray) is not None:
572                show_grower_progress()
573
574            show_grower_progress(True)
575
576            original_threshold = threshold
577
578            loops_without_change = 0
579
580            while not table_grower.is_table_complete():
581                loops_without_change += 1
582
583                if loops_without_change > 50:
584                    break
585
586                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
587                    show_grower_progress()
588
589                    loops_without_change = 0
590
591                    grown = False
592                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
593                        show_grower_progress()
594                        grown = True
595                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
596                        table_grower.set_threshold(threshold)
597
598                    if not grown:
599                        threshold *= 0.9
600                        table_grower.set_threshold(threshold)
601
602                else:
603                    threshold *= 0.9
604                    table_grower.set_threshold(threshold)
605
606                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
607                        show_grower_progress()
608                        loops_without_change = 0
609
610        else:
611            table_grower.grow_table(img_gray, filtered_gray)
612
613        if smooth:
614            table_grower.smooth_grid()
615        corners = table_grower.get_all_corners()
616        logger.info(
617            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
618        )
619        # rescale corners back to original size
620        if self._scale != 1.0:
621            for y in range(len(corners)):
622                for x in range(len(corners[y])):
623                    if corners[y][x] is not None:
624                        corners[y][x] = (
625                            int(corners[y][x][0] / self._scale),  # pyright:ignore
626                            int(corners[y][x][1] / self._scale),  # pyright:ignore
627                        )
628
629        return TableGrid(corners)  # pyright: ignore
630
631    def _visualize_grid(self, img: MatLike, points: List[List[Point]]) -> None:
632        """Visualize the detected grid points."""
633        all_points = [point for row in points for point in row]
634        drawn = imu.draw_points(img, all_points)
635        imu.show(drawn, wait=True)
636
637    def _visualize_path_finding(
638        self,
639        path: List[Point],
640        current: Point,
641        next_point: Point,
642        previous_row_target: Optional[Point] = None,
643        region_center: Optional[Point] = None,
644        region_size: Optional[int] = None,
645    ) -> None:
646        """Visualize the path finding process for debugging."""
647        global show_time
648
649        screen = imu.pop()
650
651        # if gray, convert to BGR
652        if len(screen.shape) == 2 or screen.shape[2] == 1:
653            debug_img = cv.cvtColor(screen, cv.COLOR_GRAY2BGR)
654        else:
655            debug_img = cast(MatLike, screen)
656
657        debug_img = imu.draw_points(debug_img, path, color=(200, 200, 0), thickness=2)
658        debug_img = imu.draw_points(
659            debug_img, [current], color=(0, 255, 0), thickness=3
660        )
661        debug_img = imu.draw_points(
662            debug_img, [next_point], color=(0, 0, 255), thickness=2
663        )
664
665        # Draw previous row target if available
666        if previous_row_target is not None:
667            debug_img = imu.draw_points(
668                debug_img, [previous_row_target], color=(255, 0, 255), thickness=2
669            )
670
671        # Draw search region if available
672        if region_center is not None and region_size is not None:
673            top_left = (
674                max(0, region_center[0] - region_size // 2),
675                max(0, region_center[1] - region_size // 2),
676            )
677            bottom_right = (
678                min(debug_img.shape[1], region_center[0] + region_size // 2),
679                min(debug_img.shape[0], region_center[1] + region_size // 2),
680            )
681            cv.rectangle(
682                debug_img,
683                top_left,
684                bottom_right,
685                color=(255, 0, 0),
686                thickness=2,
687                lineType=cv.LINE_AA,
688            )
689
690        imu.push(debug_img)
691
692        show_time += 1
693        if show_time % 10 != 1:
694            return
695
696        imu.show(debug_img, title="Next column point", wait=False)
697        # time.sleep(0.003)
698
699    @log_calls(level=logging.DEBUG, include_return=True)
700    def _astar(
701        self,
702        img: np.ndarray,
703        start: tuple[int, int],
704        goals: list[tuple[int, int]],
705        direction: str,
706    ) -> Optional[List[Point]]:
707        """
708        Find the best path between the start point and one of the goal points on the image
709        """
710
711        if not goals:
712            return None
713
714        if self._scale != 1.0:
715            img = cv.resize(img, None, fx=self._scale, fy=self._scale)
716            start = (int(start[0] * self._scale), int(start[1] * self._scale))
717            goals = [(int(g[0] * self._scale), int(g[1] * self._scale)) for g in goals]
718
719        # calculate bounding box with margin
720        all_points = goals + [start]
721        xs = [p[0] for p in all_points]
722        ys = [p[1] for p in all_points]
723
724        margin = 30
725        top_left = (max(0, min(xs) - margin), max(0, min(ys) - margin))
726        bottom_right = (
727            min(img.shape[1], max(xs) + margin),
728            min(img.shape[0], max(ys) + margin),
729        )
730
731        # check bounds
732        if (
733            top_left[0] >= bottom_right[0]
734            or top_left[1] >= bottom_right[1]
735            or top_left[0] >= img.shape[1]
736            or top_left[1] >= img.shape[0]
737        ):
738            return None
739
740        # transform coordinates to cropped image
741        start_local = (start[0] - top_left[0], start[1] - top_left[1])
742        goals_local = [(g[0] - top_left[0], g[1] - top_left[1]) for g in goals]
743
744        cropped = img[top_left[1] : bottom_right[1], top_left[0] : bottom_right[0]]
745
746        if cropped.size == 0:
747            return None
748
749        path = rust_astar(cropped, start_local, goals_local, direction)
750
751        if path is None:
752            return None
753
754        if self._scale != 1.0:
755            path = [(int(p[0] / self._scale), int(p[1] / self._scale)) for p in path]
756            top_left = (int(top_left[0] / self._scale), int(top_left[1] / self._scale))
757
758        return [(p[0] + top_left[0], p[1] + top_left[1]) for p in path]

Detects table grid intersections using morphological filtering and template matching.

This detector implements a multi-stage pipeline:

  1. Binarization: Sauvola adaptive thresholding to handle varying lighting
  2. Morphological operations: Dilation to connect broken rule segments
  3. Cross-kernel matching: Template matching with a cross-shaped kernel to find rule intersections where horizontal and vertical lines meet
  4. Grid growing: Iterative point detection starting from a known seed point

The cross-kernel is designed to match the specific geometry of your table rules. It should be sized so that after morphology, it aligns with actual corner shapes.

Tuning Guidelines

  • kernel_size: Increase if you need more selectivity (fewer false positives)
  • cross_width/height: Should match rule thickness after morphology
  • morph_size: Increase to connect more broken lines, but this thickens rules
  • sauvola_k: Increase to threshold more aggressively (remove noise)
  • search_region: Increase for documents with more warping/distortion
  • distance_penalty: Increase to prefer corners closer to expected positions

Visual Debugging

Set visual=True in methods to see intermediate results and tune parameters.

GridDetector( kernel_size: int = 21, cross_width: int = 6, cross_height: Optional[int] = None, morph_size: Optional[int] = None, sauvola_k: float = 0.04, sauvola_window: int = 15, scale: float = 1.0, search_region: int = 40, distance_penalty: float = 0.4, skip_astar_threshold: float = 0.2, min_rows: int = 5, grow_threshold: float = 0.3, look_distance: int = 4, cuts: int = 3, cut_fraction: float = 0.5)
149    def __init__(
150        self,
151        kernel_size: int = 21,
152        cross_width: int = 6,
153        cross_height: Optional[int] = None,
154        morph_size: Optional[int] = None,
155        sauvola_k: float = 0.04,
156        sauvola_window: int = 15,
157        scale: float = 1.0,
158        search_region: int = 40,
159        distance_penalty: float = 0.4,
160        skip_astar_threshold: float = 0.2,
161        min_rows: int = 5,
162        grow_threshold: float = 0.3,
163        look_distance: int = 4,
164        cuts: int = 3,
165        cut_fraction: float = 0.5,
166    ):
167        """
168        Args:
169            kernel_size (int): the size of the cross kernel
170                a larger kernel size often means that more penalty is applied, often leading
171                to more sparse results
172            cross_width (int): the width of one of the edges in the cross filter, should be
173                roughly equal to the width of the rules in the image after morphology is applied
174            cross_height (int | None): useful if the horizontal rules and vertical rules
175                have different sizes
176            morph_size (int | None): the size of the morphology operators that are applied before
177                the cross kernel. 'bridges the gaps' of broken-up lines
178            sauvola_k (float): threshold parameter for sauvola thresholding
179            sauvola_window (int): window_size parameter for sauvola thresholding
180            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
181            search_region (int): area in which to search for a new max value in `find_nearest` etc.
182            distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
183            skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
184            min_rows (int): minimum number of rows to find before stopping the table finding algorithm
185            grow_threshold (float): the threshold for accepting a new point when growing the table
186            look_distance (int): how many points away to look when calculating the median slope
187            cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
188            cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
189        """
190        self._validate_parameters(
191            kernel_size,
192            cross_width,
193            cross_height,
194            morph_size,
195            search_region,
196            sauvola_k,
197            sauvola_window,
198            distance_penalty,
199            skip_astar_threshold,
200            cuts,
201            cut_fraction,
202        )
203
204        self._kernel_size = kernel_size
205        self._cross_width = cross_width
206        self._cross_height = cross_width if cross_height is None else cross_height
207        self._morph_size = morph_size if morph_size is not None else cross_width
208        self._search_region = search_region
209        self._sauvola_k = sauvola_k
210        self._sauvola_window = sauvola_window
211        self._distance_penalty = distance_penalty
212        self._scale = scale
213        self._skip_astar_threshold = skip_astar_threshold
214        self._min_rows = min_rows
215        self._grow_threshold = grow_threshold
216        self._look_distance = look_distance
217        self._cuts = cuts
218        self._cut_fraction = cut_fraction
219
220        self._cross_kernel = self._create_cross_kernel()
Arguments:
  • kernel_size (int): the size of the cross kernel a larger kernel size often means that more penalty is applied, often leading to more sparse results
  • cross_width (int): the width of one of the edges in the cross filter, should be roughly equal to the width of the rules in the image after morphology is applied
  • cross_height (int | None): useful if the horizontal rules and vertical rules have different sizes
  • morph_size (int | None): the size of the morphology operators that are applied before the cross kernel. 'bridges the gaps' of broken-up lines
  • sauvola_k (float): threshold parameter for sauvola thresholding
  • sauvola_window (int): window_size parameter for sauvola thresholding
  • scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
  • search_region (int): area in which to search for a new max value in find_nearest etc.
  • distance_penalty (float): how much the point finding algorithm penalizes points that are further in the region [0, 1]
  • skip_astar_threshold (float): minimum confidence score during table growing based on heuristic jump on which to skipastar pathfinding
  • min_rows (int): minimum number of rows to find before stopping the table finding algorithm
  • grow_threshold (float): the threshold for accepting a new point when growing the table
  • look_distance (int): how many points away to look when calculating the median slope
  • cuts (int): The amount of cuts (large deletions) to do in the grid during table growing
  • cut_fraction (float): The portion of the already-chosen corner points to delete during cutting
def apply( self, img: Union[cv2.Mat, numpy.ndarray], visual: bool = False) -> Union[cv2.Mat, numpy.ndarray]:
331    def apply(self, img: MatLike, visual: bool = False) -> MatLike:
332        """
333        Apply the grid detection filter to the input image.
334
335        Args:
336            img (MatLike): the input image
337            visual (bool): whether to show intermediate steps
338
339        Returns:
340            MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules
341        """
342
343        if img is None or img.size == 0:
344            raise ValueError("Input image is empty or None")
345
346        binary = imu.sauvola(img, k=self._sauvola_k, window_size=self._sauvola_window)
347
348        if visual:
349            imu.show(binary, title="thresholded")
350
351        binary = self._apply_morphology(binary)
352
353        if visual:
354            imu.show(binary, title="dilated")
355
356        filtered = self._apply_cross_matching(binary)
357
358        return filtered

Apply the grid detection filter to the input image.

Arguments:
  • img (MatLike): the input image
  • visual (bool): whether to show intermediate steps
Returns:

MatLike: the filtered image, with high values (whiter pixels) at intersections of horizontal and vertical rules

@log_calls(level=logging.DEBUG, include_return=True)
def find_nearest( self, filtered: Union[cv2.Mat, numpy.ndarray], point: Tuple[int, int], region: Optional[int] = None) -> Tuple[Tuple[int, int], float]:
360    @log_calls(level=logging.DEBUG, include_return=True)
361    def find_nearest(
362        self, filtered: MatLike, point: Point, region: Optional[int] = None
363    ) -> Tuple[Point, float]:
364        """
365        Find the nearest 'corner match' in the image, along with its score [0,1]
366
367        Args:
368            filtered (MatLike): the filtered image (obtained through `apply`)
369            point (tuple[int, int]): the approximate target point (x, y)
370            region (None | int): alternative value for search region,
371                overwriting the `__init__` parameter `region`
372        """
373
374        if filtered is None or filtered.size == 0:
375            raise ValueError("Filtered image is empty or None")
376
377        region_size = region if region is not None else self._search_region
378        x, y = point
379
380        # Calculate crop boundaries
381        crop_x = max(0, x - region_size // 2)
382        crop_y = max(0, y - region_size // 2)
383        crop_width = min(region_size, filtered.shape[1] - crop_x)
384        crop_height = min(region_size, filtered.shape[0] - crop_y)
385
386        # Handle edge cases
387        if crop_width <= 0 or crop_height <= 0:
388            logger.warning(f"Point {point} is outside image bounds")
389            return point, 0.0
390
391        cropped = filtered[crop_y : crop_y + crop_height, crop_x : crop_x + crop_width]
392
393        if cropped.size == 0:
394            return point, 0.0
395
396        # Always apply Gaussian weighting by extending crop if needed
397        if cropped.shape[0] == region_size and cropped.shape[1] == region_size:
398            # Perfect size - apply weights directly
399            weights = self._create_gaussian_weights(region_size)
400            weighted = cropped.astype(np.float32) * weights
401        else:
402            # Extend crop to match region_size, apply weights, then restore
403            extended = np.zeros((region_size, region_size), dtype=cropped.dtype)
404
405            # Calculate offset to center the cropped region in extended array
406            offset_y = (region_size - cropped.shape[0]) // 2
407            offset_x = (region_size - cropped.shape[1]) // 2
408
409            # Place cropped region in center of extended array
410            extended[
411                offset_y : offset_y + cropped.shape[0],
412                offset_x : offset_x + cropped.shape[1],
413            ] = cropped
414
415            # Apply Gaussian weights to extended array
416            weights = self._create_gaussian_weights(region_size)
417            weighted_extended = extended.astype(np.float32) * weights
418
419            # Extract the original region back out
420            weighted = weighted_extended[
421                offset_y : offset_y + cropped.shape[0],
422                offset_x : offset_x + cropped.shape[1],
423            ]
424
425        best_idx = np.argmax(weighted)
426        best_y, best_x = np.unravel_index(best_idx, cropped.shape)
427
428        result_point = (
429            int(crop_x + best_x),
430            int(crop_y + best_y),
431        )
432        result_confidence = float(weighted[best_y, best_x]) / 255.0
433
434        return result_point, result_confidence

Find the nearest 'corner match' in the image, along with its score [0,1]

Arguments:
  • filtered (MatLike): the filtered image (obtained through apply)
  • point (tuple[int, int]): the approximate target point (x, y)
  • region (None | int): alternative value for search region, overwriting the __init__ parameter region
def find_table_points( self, img: Union[cv2.Mat, numpy.ndarray, os.PathLike[str]], top_row: list[typing.Optional[typing.Tuple[int, int]]], cell_widths: list[int], cell_heights: list[int] | int, visual: bool = False, window: str = 'taulu', goals_width: Optional[int] = None, filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], NoneType] = None, smooth: bool = False) -> TableGrid:
436    def find_table_points(
437        self,
438        img: MatLike | PathLike[str],
439        top_row: list[Point | None],
440        cell_widths: list[int],
441        cell_heights: list[int] | int,
442        visual: bool = False,
443        window: str = WINDOW,
444        goals_width: Optional[int] = None,
445        filtered: Optional[MatLike | PathLike[str]] = None,
446        smooth: bool = False,
447    ) -> "TableGrid":
448        """
449        Parse the image to a `TableGrid` structure that holds all of the
450        intersections between horizontal and vertical rules, starting near the `left_top` point
451
452        Args:
453            img (MatLike): the input image of a table
454            top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
455            cell_widths (list[int]): the expected widths of the cells (based on a header template)
456            cell_heights (list[int]): the expected height of the rows of data.
457                The last value from this list is used until the image has no more vertical space.
458            visual (bool): whether to show intermediate steps
459            window (str): the name of the OpenCV window to use for visualization
460            goals_width (int | None): the width of the goal region when searching for the next point.
461                If None, defaults to 1.5 * search_region
462            filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of
463                calculating the filtered image from scratch
464            smooth (bool): if True, smooth the grid after detection, using local heuristics
465
466        Returns:
467            a TableGrid object
468        """
469
470        if goals_width is None:
471            goals_width = self._search_region * 3 // 2
472
473        if not cell_widths:
474            raise ValueError("cell_widths must contain at least one value")
475
476        if not isinstance(img, np.ndarray):
477            img = cv.imread(os.fspath(img))
478
479        if filtered is None:
480            filtered = self.apply(img, visual)
481        else:
482            if not isinstance(filtered, np.ndarray):
483                filtered = cv.imread(os.fspath(filtered))
484
485            filtered = ensure_gray(filtered)
486
487        if visual:
488            imu.show(filtered, window=window)
489
490        if isinstance(cell_heights, int):
491            cell_heights = [cell_heights]
492
493        for i in range(len(top_row)):
494            if top_row[i] is None:
495                continue
496
497            adjusted, confidence = self.find_nearest(
498                filtered, top_row[i], int(self._search_region * 2)
499            )
500
501            if confidence < 0.15:
502                top_row[i] = None
503            else:
504                top_row[i] = adjusted
505
506        if not any(top_row):
507            logger.error("No good starting candidates given")
508
509        # resize all parameters according to scale
510        img = cv.resize(img, None, fx=self._scale, fy=self._scale)
511
512        if visual:
513            imu.push(img)
514
515        filtered = cv.resize(filtered, None, fx=self._scale, fy=self._scale)
516        cell_widths = [int(w * self._scale) for w in cell_widths]
517        cell_heights = [int(h * self._scale) for h in cell_heights]
518        top_row = [
519            (int(p[0] * self._scale), int(p[1] * self._scale))
520            if p is not None
521            else None
522            for p in top_row
523        ]
524        search_region = int(self._search_region * self._scale)
525
526        img_gray = ensure_gray(img)
527        filtered_gray = ensure_gray(filtered)
528
529        table_grower = TableGrower(
530            filtered_gray,
531            cell_widths,  # pyright: ignore
532            cell_heights,  # pyright: ignore
533            top_row,
534            search_region,
535            self._distance_penalty,
536            self._look_distance,
537            self._grow_threshold,
538            self._skip_astar_threshold,
539            self._min_rows,
540            self._cuts,
541            self._cut_fraction,
542        )
543
544        def show_grower_progress(wait: bool = False):
545            img_orig = np.copy(img)
546            corners = table_grower.get_all_corners()
547            for y in range(len(corners)):
548                for x in range(len(corners[y])):
549                    if corners[y][x] is not None:
550                        img_orig = imu.draw_points(
551                            img_orig,
552                            [corners[y][x]],
553                            color=(0, 0, 255),
554                            thickness=30,
555                        )
556
557            edge = table_grower.get_edge_points()
558
559            for point, score in edge:
560                color = (100, int(clamp(score * 255, 0, 255)), 100)
561                imu.draw_point(img_orig, point, color=color, thickness=20)
562
563            imu.show(img_orig, wait=wait)
564
565        if visual:
566            threshold = self._grow_threshold
567            look_distance = self._look_distance
568
569            # python implementation of rust loops, for visualization purposes
570            # note this is a LOT slower
571            while table_grower.grow_point(img_gray, filtered_gray) is not None:
572                show_grower_progress()
573
574            show_grower_progress(True)
575
576            original_threshold = threshold
577
578            loops_without_change = 0
579
580            while not table_grower.is_table_complete():
581                loops_without_change += 1
582
583                if loops_without_change > 50:
584                    break
585
586                if table_grower.extrapolate_one(img_gray, filtered_gray) is not None:
587                    show_grower_progress()
588
589                    loops_without_change = 0
590
591                    grown = False
592                    while table_grower.grow_point(img_gray, filtered_gray) is not None:
593                        show_grower_progress()
594                        grown = True
595                        threshold = min(0.1 + 0.9 * threshold, original_threshold)
596                        table_grower.set_threshold(threshold)
597
598                    if not grown:
599                        threshold *= 0.9
600                        table_grower.set_threshold(threshold)
601
602                else:
603                    threshold *= 0.9
604                    table_grower.set_threshold(threshold)
605
606                    if table_grower.grow_point(img_gray, filtered_gray) is not None:
607                        show_grower_progress()
608                        loops_without_change = 0
609
610        else:
611            table_grower.grow_table(img_gray, filtered_gray)
612
613        if smooth:
614            table_grower.smooth_grid()
615        corners = table_grower.get_all_corners()
616        logger.info(
617            f"Table growth complete, found {len(corners)} rows and {len(corners[0])} columns"
618        )
619        # rescale corners back to original size
620        if self._scale != 1.0:
621            for y in range(len(corners)):
622                for x in range(len(corners[y])):
623                    if corners[y][x] is not None:
624                        corners[y][x] = (
625                            int(corners[y][x][0] / self._scale),  # pyright:ignore
626                            int(corners[y][x][1] / self._scale),  # pyright:ignore
627                        )
628
629        return TableGrid(corners)  # pyright: ignore

Parse the image to a TableGrid structure that holds all of the intersections between horizontal and vertical rules, starting near the left_top point

Arguments:
  • img (MatLike): the input image of a table
  • top_row (list[tuple[int, int]]): initial guess at topmost row of corner points (from template matching)
  • cell_widths (list[int]): the expected widths of the cells (based on a header template)
  • cell_heights (list[int]): the expected height of the rows of data. The last value from this list is used until the image has no more vertical space.
  • visual (bool): whether to show intermediate steps
  • window (str): the name of the OpenCV window to use for visualization
  • goals_width (int | None): the width of the goal region when searching for the next point. If None, defaults to 1.5 * search_region
  • filtered (MatLike | PathLike[str] | None): if provided, this image is used instead of calculating the filtered image from scratch
  • smooth (bool): if True, smooth the grid after detection, using local heuristics
Returns:

a TableGrid object

class TableGrid(taulu.TableIndexer):
 761class TableGrid(TableIndexer):
 762    """
 763    A data class that allows segmenting the image into cells
 764    """
 765
 766    _right_offset: int | None = None
 767
 768    def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None):
 769        """
 770        Args:
 771            points: a 2D list of intersections between hor. and vert. rules
 772        """
 773        self._points = points
 774        self._right_offset = right_offset
 775
 776    @property
 777    def points(self) -> list[list[Point]]:
 778        return self._points
 779
 780    def row(self, i: int) -> list[Point]:
 781        assert 0 <= i and i < len(self._points)
 782        return self._points[i]
 783
 784    @property
 785    def cols(self) -> int:
 786        if self._right_offset is not None:
 787            return len(self.row(0)) - 2
 788        else:
 789            return len(self.row(0)) - 1
 790
 791    @property
 792    def rows(self) -> int:
 793        return len(self._points) - 1
 794
 795    @property
 796    def right_offset(self) -> int | None:
 797        return self._right_offset
 798
 799    @staticmethod
 800    def from_split(
 801        split_grids: Split["TableGrid"], offsets: Split[Point]
 802    ) -> "TableGrid":
 803        """
 804        Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image
 805        Args:
 806            split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
 807            offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
 808        """
 809
 810        def offset_points(points, offset):
 811            return [
 812                [
 813                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
 814                    for p in row
 815                ]
 816                for row in points
 817            ]
 818
 819        split_points = split_grids.apply(
 820            lambda grid, offset: offset_points(grid.points, offset), offsets
 821        )
 822        points = []
 823        rows = min(split_grids.left.rows, split_grids.right.rows)
 824        for row in range(rows + 1):
 825            left_row = split_points.left[row]
 826            right_row = split_points.right[row]
 827
 828            # Skip rows that contain None values
 829            if any(p is None for p in left_row) or any(p is None for p in right_row):
 830                logger.warning(
 831                    f"Skipping row {row} in from_split due to incomplete grid data"
 832                )
 833                continue
 834
 835            row_points = []
 836            row_points.extend(left_row)
 837            row_points.extend(right_row)
 838            points.append(row_points)
 839        if not points:
 840            raise ValueError(
 841                "Cannot create TableGrid from split: no complete rows found in both grids"
 842            )
 843        table_grid = TableGrid(points, split_grids.left.cols)
 844        return table_grid
 845
 846    def save(self, path: str | Path):
 847        """
 848        Persist the table grid to a JSON file.
 849
 850        Saves the grid corner points and right_offset (for split tables) to disk,
 851        allowing the grid to be reloaded later without re-running detection.
 852
 853        Args:
 854            path: Path to save the JSON file.
 855
 856        Example:
 857            >>> grid = taulu.segment_table("table.png")
 858            >>> grid.save("grid.json")
 859        """
 860        with open(path, "w") as f:
 861            json.dump({"points": self.points, "right_offset": self._right_offset}, f)
 862
 863    @staticmethod
 864    def from_saved(path: str | Path) -> "TableGrid":
 865        """
 866        Load a previously saved TableGrid from a JSON file.
 867
 868        Args:
 869            path: Path to the JSON file created by `save()`.
 870
 871        Returns:
 872            A TableGrid instance with the saved corner points.
 873
 874        Example:
 875            >>> grid = TableGrid.from_saved("grid.json")
 876            >>> cell = grid.crop_cell(image, (0, 0))
 877        """
 878        with open(path, "r") as f:
 879            points = json.load(f)
 880            right_offset = points.get("right_offset", None)
 881            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
 882            return TableGrid(points, right_offset)
 883
 884    def add_left_col(self, width: int):
 885        for row in self._points:
 886            first = row[0]
 887            new_first = (first[0] - width, first[1])
 888            row.insert(0, new_first)
 889
 890    def add_top_row(self, height: int):
 891        new_row = []
 892        for point in self._points[0]:
 893            new_row.append((point[0], point[1] - height))
 894
 895        self.points.insert(0, new_row)
 896
 897    def _surrounds(self, rect: list[Point], point: tuple[float, float]) -> bool:
 898        """point: x, y"""
 899        lt, rt, rb, lb = rect
 900        x, y = point
 901
 902        top = _Rule(*lt, *rt)
 903        if top._y_at_x(x) > y:
 904            return False
 905
 906        right = _Rule(*rt, *rb)
 907        if right._x_at_y(y) < x:
 908            return False
 909
 910        bottom = _Rule(*lb, *rb)
 911        if bottom._y_at_x(x) < y:
 912            return False
 913
 914        left = _Rule(*lb, *lt)
 915        if left._x_at_y(y) > x:
 916            return False
 917
 918        return True
 919
 920    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
 921        """
 922        Get the cell indices (row, col) containing a pixel coordinate.
 923
 924        Searches through all cells to find which one contains the given point,
 925        accounting for the non-rectangular (perspective-warped) cell boundaries.
 926
 927        Args:
 928            point: Pixel coordinates (x, y) in the original image.
 929
 930        Returns:
 931            (row, col) indices of the containing cell, or (-1, -1) if the point
 932            is outside all cells.
 933
 934        Example:
 935            >>> grid = taulu.segment_table("table.png")
 936            >>> row, col = grid.cell((150, 200))
 937            >>> if row >= 0:
 938            ...     print(f"Point is in cell ({row}, {col})")
 939        """
 940        for r in range(len(self._points) - 1):
 941            offset = 0
 942            for c in range(len(self.row(0)) - 1):
 943                if self._right_offset is not None and c == self._right_offset:
 944                    offset = -1
 945                    continue
 946
 947                if self._surrounds(
 948                    [
 949                        self._points[r][c],
 950                        self._points[r][c + 1],
 951                        self._points[r + 1][c + 1],
 952                        self._points[r + 1][c],
 953                    ],
 954                    point,
 955                ):
 956                    return (r, c + offset)
 957
 958        return (-1, -1)
 959
 960    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
 961        """
 962        Get the four corner coordinates of a cell.
 963
 964        Returns the corners in clockwise order starting from top-left,
 965        suitable for use with OpenCV drawing functions.
 966
 967        Args:
 968            cell: Cell indices as (row, col).
 969
 970        Returns:
 971            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
 972            top-left, top-right, bottom-right, bottom-left.
 973
 974        Raises:
 975            TauluException: If row or col indices are out of bounds.
 976
 977        Example:
 978            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
 979            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
 980            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
 981        """
 982        r, c = cell
 983
 984        self._check_row_idx(r)
 985        self._check_col_idx(c)
 986
 987        if self._right_offset is not None and c >= self._right_offset:
 988            c = c + 1
 989
 990        return (
 991            self._points[r][c],
 992            self._points[r][c + 1],
 993            self._points[r + 1][c + 1],
 994            self._points[r + 1][c],
 995        )
 996
 997    def region(
 998        self, start: tuple[int, int], end: tuple[int, int]
 999    ) -> tuple[Point, Point, Point, Point]:
1000        """
1001        Get the bounding polygon for a rectangular region of cells.
1002
1003        Returns the four corner coordinates that enclose all cells from
1004        start to end (inclusive).
1005
1006        Args:
1007            start: Top-left cell as (row, col).
1008            end: Bottom-right cell as (row, col).
1009
1010        Returns:
1011            Four corner points (lt, rt, rb, lb) enclosing the region,
1012            each as (x, y) pixel coordinates.
1013
1014        Raises:
1015            TauluException: If any row or col indices are out of bounds.
1016
1017        Example:
1018            >>> # Get bounding box for cells (0,0) through (2,3)
1019            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1020        """
1021        r0, c0 = start
1022        r1, c1 = end
1023
1024        self._check_row_idx(r0)
1025        self._check_row_idx(r1)
1026        self._check_col_idx(c0)
1027        self._check_col_idx(c1)
1028
1029        if self._right_offset is not None and c0 >= self._right_offset:
1030            c0 = c0 + 1
1031
1032        if self._right_offset is not None and c1 >= self._right_offset:
1033            c1 = c1 + 1
1034
1035        lt = self._points[r0][c0]
1036        rt = self._points[r0][c1 + 1]
1037        rb = self._points[r1 + 1][c1 + 1]
1038        lb = self._points[r1 + 1][c0]
1039
1040        return lt, rt, rb, lb
1041
1042    def visualize_points(self, img: MatLike):
1043        """
1044        Draw the detected table points on the image for visual verification
1045        """
1046        import colorsys
1047
1048        def clr(index, total_steps):
1049            hue = index / total_steps  # Normalized hue between 0 and 1
1050            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1051            return int(r * 255), int(g * 255), int(b * 255)
1052
1053        for i, row in enumerate(self._points):
1054            for p in row:
1055                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1056
1057        imu.show(img)
1058
1059    def text_regions(
1060        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1061    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1062        def vertical_rule_crop(row: int, col: int):
1063            self._check_col_idx(col)
1064            self._check_row_idx(row)
1065
1066            if self._right_offset is not None and col >= self._right_offset:
1067                col = col + 1
1068
1069            top = self._points[row][col]
1070            bottom = self._points[row + 1][col]
1071
1072            left = int(min(top[0], bottom[0]))
1073            right = int(max(top[0], bottom[0]))
1074
1075            return img[
1076                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1077                left - margin_x : right + margin_x,
1078            ]
1079
1080        result = []
1081
1082        start = None
1083        for col in range(self.cols):
1084            crop = vertical_rule_crop(row, col)
1085            text_over_score = imu.text_presence_score(crop)
1086            text_over = text_over_score > -0.10
1087
1088            if not text_over:
1089                if start is not None:
1090                    result.append(((row, start), (row, col - 1)))
1091                start = col
1092
1093        if start is not None:
1094            result.append(((row, start), (row, self.cols - 1)))
1095
1096        return result

A data class that allows segmenting the image into cells

TableGrid( points: list[list[typing.Tuple[int, int]]], right_offset: Optional[int] = None)
768    def __init__(self, points: list[list[Point]], right_offset: Optional[int] = None):
769        """
770        Args:
771            points: a 2D list of intersections between hor. and vert. rules
772        """
773        self._points = points
774        self._right_offset = right_offset
Arguments:
  • points: a 2D list of intersections between hor. and vert. rules
points: list[list[typing.Tuple[int, int]]]
776    @property
777    def points(self) -> list[list[Point]]:
778        return self._points
def row(self, i: int) -> list[typing.Tuple[int, int]]:
780    def row(self, i: int) -> list[Point]:
781        assert 0 <= i and i < len(self._points)
782        return self._points[i]
cols: int
784    @property
785    def cols(self) -> int:
786        if self._right_offset is not None:
787            return len(self.row(0)) - 2
788        else:
789            return len(self.row(0)) - 1
rows: int
791    @property
792    def rows(self) -> int:
793        return len(self._points) - 1
right_offset: int | None
795    @property
796    def right_offset(self) -> int | None:
797        return self._right_offset
@staticmethod
def from_split( split_grids: Split[TableGrid], offsets: Split[typing.Tuple[int, int]]) -> TableGrid:
799    @staticmethod
800    def from_split(
801        split_grids: Split["TableGrid"], offsets: Split[Point]
802    ) -> "TableGrid":
803        """
804        Convert two ``TableGrid`` objects into one, that is able to segment the original (non-cropped) image
805        Args:
806            split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
807            offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
808        """
809
810        def offset_points(points, offset):
811            return [
812                [
813                    (p[0] + offset[0], p[1] + offset[1]) if p is not None else None
814                    for p in row
815                ]
816                for row in points
817            ]
818
819        split_points = split_grids.apply(
820            lambda grid, offset: offset_points(grid.points, offset), offsets
821        )
822        points = []
823        rows = min(split_grids.left.rows, split_grids.right.rows)
824        for row in range(rows + 1):
825            left_row = split_points.left[row]
826            right_row = split_points.right[row]
827
828            # Skip rows that contain None values
829            if any(p is None for p in left_row) or any(p is None for p in right_row):
830                logger.warning(
831                    f"Skipping row {row} in from_split due to incomplete grid data"
832                )
833                continue
834
835            row_points = []
836            row_points.extend(left_row)
837            row_points.extend(right_row)
838            points.append(row_points)
839        if not points:
840            raise ValueError(
841                "Cannot create TableGrid from split: no complete rows found in both grids"
842            )
843        table_grid = TableGrid(points, split_grids.left.cols)
844        return table_grid

Convert two TableGrid objects into one, that is able to segment the original (non-cropped) image

Arguments:
  • split_grids (Split[TableGrid]): a Split of TableGrid objects of the left and right part of the table
  • offsets (Split[tuple[int, int]]): a Split of the offsets in the image where the crop happened
def save(self, path: str | pathlib._local.Path):
846    def save(self, path: str | Path):
847        """
848        Persist the table grid to a JSON file.
849
850        Saves the grid corner points and right_offset (for split tables) to disk,
851        allowing the grid to be reloaded later without re-running detection.
852
853        Args:
854            path: Path to save the JSON file.
855
856        Example:
857            >>> grid = taulu.segment_table("table.png")
858            >>> grid.save("grid.json")
859        """
860        with open(path, "w") as f:
861            json.dump({"points": self.points, "right_offset": self._right_offset}, f)

Persist the table grid to a JSON file.

Saves the grid corner points and right_offset (for split tables) to disk, allowing the grid to be reloaded later without re-running detection.

Arguments:
  • path: Path to save the JSON file.
Example:
>>> grid = taulu.segment_table("table.png")
>>> grid.save("grid.json")
@staticmethod
def from_saved(path: str | pathlib._local.Path) -> TableGrid:
863    @staticmethod
864    def from_saved(path: str | Path) -> "TableGrid":
865        """
866        Load a previously saved TableGrid from a JSON file.
867
868        Args:
869            path: Path to the JSON file created by `save()`.
870
871        Returns:
872            A TableGrid instance with the saved corner points.
873
874        Example:
875            >>> grid = TableGrid.from_saved("grid.json")
876            >>> cell = grid.crop_cell(image, (0, 0))
877        """
878        with open(path, "r") as f:
879            points = json.load(f)
880            right_offset = points.get("right_offset", None)
881            points = [[(p[0], p[1]) for p in pointes] for pointes in points["points"]]
882            return TableGrid(points, right_offset)

Load a previously saved TableGrid from a JSON file.

Arguments:
  • path: Path to the JSON file created by save().
Returns:

A TableGrid instance with the saved corner points.

Example:
>>> grid = TableGrid.from_saved("grid.json")
>>> cell = grid.crop_cell(image, (0, 0))
def add_left_col(self, width: int):
884    def add_left_col(self, width: int):
885        for row in self._points:
886            first = row[0]
887            new_first = (first[0] - width, first[1])
888            row.insert(0, new_first)
def add_top_row(self, height: int):
890    def add_top_row(self, height: int):
891        new_row = []
892        for point in self._points[0]:
893            new_row.append((point[0], point[1] - height))
894
895        self.points.insert(0, new_row)
def cell(self, point: tuple[float, float]) -> tuple[int, int]:
920    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
921        """
922        Get the cell indices (row, col) containing a pixel coordinate.
923
924        Searches through all cells to find which one contains the given point,
925        accounting for the non-rectangular (perspective-warped) cell boundaries.
926
927        Args:
928            point: Pixel coordinates (x, y) in the original image.
929
930        Returns:
931            (row, col) indices of the containing cell, or (-1, -1) if the point
932            is outside all cells.
933
934        Example:
935            >>> grid = taulu.segment_table("table.png")
936            >>> row, col = grid.cell((150, 200))
937            >>> if row >= 0:
938            ...     print(f"Point is in cell ({row}, {col})")
939        """
940        for r in range(len(self._points) - 1):
941            offset = 0
942            for c in range(len(self.row(0)) - 1):
943                if self._right_offset is not None and c == self._right_offset:
944                    offset = -1
945                    continue
946
947                if self._surrounds(
948                    [
949                        self._points[r][c],
950                        self._points[r][c + 1],
951                        self._points[r + 1][c + 1],
952                        self._points[r + 1][c],
953                    ],
954                    point,
955                ):
956                    return (r, c + offset)
957
958        return (-1, -1)

Get the cell indices (row, col) containing a pixel coordinate.

Searches through all cells to find which one contains the given point, accounting for the non-rectangular (perspective-warped) cell boundaries.

Arguments:
  • point: Pixel coordinates (x, y) in the original image.
Returns:

(row, col) indices of the containing cell, or (-1, -1) if the point is outside all cells.

Example:
>>> grid = taulu.segment_table("table.png")
>>> row, col = grid.cell((150, 200))
>>> if row >= 0:
...     print(f"Point is in cell ({row}, {col})")
def cell_polygon( self, cell: tuple[int, int]) -> tuple[typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int]]:
960    def cell_polygon(self, cell: tuple[int, int]) -> tuple[Point, Point, Point, Point]:
961        """
962        Get the four corner coordinates of a cell.
963
964        Returns the corners in clockwise order starting from top-left,
965        suitable for use with OpenCV drawing functions.
966
967        Args:
968            cell: Cell indices as (row, col).
969
970        Returns:
971            Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order:
972            top-left, top-right, bottom-right, bottom-left.
973
974        Raises:
975            TauluException: If row or col indices are out of bounds.
976
977        Example:
978            >>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
979            >>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
980            >>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
981        """
982        r, c = cell
983
984        self._check_row_idx(r)
985        self._check_col_idx(c)
986
987        if self._right_offset is not None and c >= self._right_offset:
988            c = c + 1
989
990        return (
991            self._points[r][c],
992            self._points[r][c + 1],
993            self._points[r + 1][c + 1],
994            self._points[r + 1][c],
995        )

Get the four corner coordinates of a cell.

Returns the corners in clockwise order starting from top-left, suitable for use with OpenCV drawing functions.

Arguments:
  • cell: Cell indices as (row, col).
Returns:

Four corner points as ((x,y), (x,y), (x,y), (x,y)) in order: top-left, top-right, bottom-right, bottom-left.

Raises:
  • TauluException: If row or col indices are out of bounds.
Example:
>>> lt, rt, rb, lb = grid.cell_polygon((0, 0))
>>> pts = np.array([lt, rt, rb, lb], dtype=np.int32)
>>> cv2.polylines(image, [pts], True, (0, 255, 0), 2)
def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int]]:
 997    def region(
 998        self, start: tuple[int, int], end: tuple[int, int]
 999    ) -> tuple[Point, Point, Point, Point]:
1000        """
1001        Get the bounding polygon for a rectangular region of cells.
1002
1003        Returns the four corner coordinates that enclose all cells from
1004        start to end (inclusive).
1005
1006        Args:
1007            start: Top-left cell as (row, col).
1008            end: Bottom-right cell as (row, col).
1009
1010        Returns:
1011            Four corner points (lt, rt, rb, lb) enclosing the region,
1012            each as (x, y) pixel coordinates.
1013
1014        Raises:
1015            TauluException: If any row or col indices are out of bounds.
1016
1017        Example:
1018            >>> # Get bounding box for cells (0,0) through (2,3)
1019            >>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
1020        """
1021        r0, c0 = start
1022        r1, c1 = end
1023
1024        self._check_row_idx(r0)
1025        self._check_row_idx(r1)
1026        self._check_col_idx(c0)
1027        self._check_col_idx(c1)
1028
1029        if self._right_offset is not None and c0 >= self._right_offset:
1030            c0 = c0 + 1
1031
1032        if self._right_offset is not None and c1 >= self._right_offset:
1033            c1 = c1 + 1
1034
1035        lt = self._points[r0][c0]
1036        rt = self._points[r0][c1 + 1]
1037        rb = self._points[r1 + 1][c1 + 1]
1038        lb = self._points[r1 + 1][c0]
1039
1040        return lt, rt, rb, lb

Get the bounding polygon for a rectangular region of cells.

Returns the four corner coordinates that enclose all cells from start to end (inclusive).

Arguments:
  • start: Top-left cell as (row, col).
  • end: Bottom-right cell as (row, col).
Returns:

Four corner points (lt, rt, rb, lb) enclosing the region, each as (x, y) pixel coordinates.

Raises:
  • TauluException: If any row or col indices are out of bounds.
Example:
>>> # Get bounding box for cells (0,0) through (2,3)
>>> lt, rt, rb, lb = grid.region((0, 0), (2, 3))
def visualize_points(self, img: Union[cv2.Mat, numpy.ndarray]):
1042    def visualize_points(self, img: MatLike):
1043        """
1044        Draw the detected table points on the image for visual verification
1045        """
1046        import colorsys
1047
1048        def clr(index, total_steps):
1049            hue = index / total_steps  # Normalized hue between 0 and 1
1050            r, g, b = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
1051            return int(r * 255), int(g * 255), int(b * 255)
1052
1053        for i, row in enumerate(self._points):
1054            for p in row:
1055                cv.circle(img, p, 4, clr(i, len(self._points)), -1)
1056
1057        imu.show(img)

Draw the detected table points on the image for visual verification

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 10, margin_y: int = -3) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1059    def text_regions(
1060        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -3
1061    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
1062        def vertical_rule_crop(row: int, col: int):
1063            self._check_col_idx(col)
1064            self._check_row_idx(row)
1065
1066            if self._right_offset is not None and col >= self._right_offset:
1067                col = col + 1
1068
1069            top = self._points[row][col]
1070            bottom = self._points[row + 1][col]
1071
1072            left = int(min(top[0], bottom[0]))
1073            right = int(max(top[0], bottom[0]))
1074
1075            return img[
1076                int(top[1]) - margin_y : int(bottom[1]) + margin_y,
1077                left - margin_x : right + margin_x,
1078            ]
1079
1080        result = []
1081
1082        start = None
1083        for col in range(self.cols):
1084            crop = vertical_rule_crop(row, col)
1085            text_over_score = imu.text_presence_score(crop)
1086            text_over = text_over_score > -0.10
1087
1088            if not text_over:
1089                if start is not None:
1090                    result.append(((row, start), (row, col - 1)))
1091                start = col
1092
1093        if start is not None:
1094            result.append(((row, start), (row, self.cols - 1)))
1095
1096        return result

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

class HeaderAligner:
 23class HeaderAligner:
 24    """
 25    Aligns table header templates to subject images using feature-based registration.
 26
 27    This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and
 28    matching to compute a homography transformation that maps points from a header
 29    template image to their corresponding locations in full table images.
 30
 31    ## How it Works
 32
 33    1. **Feature Detection**: Extracts ORB keypoints from both template and subject
 34    2. **Feature Matching**: Finds correspondences using Hamming distance
 35    3. **Filtering**: Keeps top matches and prunes based on spatial consistency
 36    4. **Homography Estimation**: Computes perspective transform using RANSAC
 37
 38    The computed homography can then transform any point from template space to
 39    image space, allowing you to locate table structures based on your annotation.
 40
 41    ## Preprocessing Options
 42
 43    - Set `k` parameter to apply Sauvola thresholding before feature detection.
 44      This can improve matching on documents with variable lighting.
 45    - Set `k=None` to use raw images (just extract blue channel for BGR images)
 46
 47    ## Tuning Guidelines
 48
 49    - **max_features**: Increase if matching fails on complex templates
 50    - **match_fraction**: Decrease if you get many incorrect matches
 51    - **max_dist**: Increase for documents with more warping/distortion
 52    - **scale**: Decrease (<1.0) to speed up on high-resolution images
 53
 54    Args:
 55        template (MatLike | PathLike[str] | str | None): Header template image or path.
 56            This should contain a clear, representative view of the table header.
 57        max_features (int): Maximum ORB features to detect. More features = slower
 58            but potentially more robust matching.
 59        patch_size (int): ORB patch size for feature extraction.
 60        match_fraction (float): Fraction [0, 1] of matches to keep after sorting by
 61            quality. Higher = more matches but potentially more outliers.
 62        scale (float): Image downscaling factor (0, 1] for processing speed.
 63        max_dist (float): Maximum allowed distance (relative to image size) between
 64            matched keypoints. Filters out spatially inconsistent matches.
 65        k (float | None): Sauvola threshold parameter for preprocessing. If None,
 66            no thresholding is applied. Typical range: 0.03-0.15.
 67    """
 68
 69    def __init__(
 70        self,
 71        template: None | MatLike | PathLike[str] | str = None,
 72        max_features: int = 25_000,
 73        patch_size: int = 31,
 74        match_fraction: float = 0.6,
 75        scale: float = 1.0,
 76        max_dist: float = 1.00,
 77        k: float | None = 0.05,
 78    ):
 79        """
 80        Args:
 81            template (MatLike | str): (path of) template image, with the table template clearly visible
 82            max_features (int): maximal number of features that will be extracted by ORB
 83            patch_size (int): for ORB feature extractor
 84            match_fraction (float): best fraction of matches that are kept
 85            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
 86            max_dist (float): maximum distance (relative to image size) of matched features.
 87                Increase this value if the warping between image and template needs to be more agressive
 88            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
 89        """
 90
 91        if type(template) is str or type(template) is PathLike:
 92            value = cv.imread(fspath(template))
 93            template = value
 94
 95        self._k = k
 96        if scale > 1.0:
 97            raise TauluException(
 98                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
 99            )
100        if scale == 0:
101            raise TauluException("Use 0 < scale <= 1.0")
102
103        self._scale = scale
104        self._template = self._scale_img(cast(MatLike, template))
105        self._template_orig: None | MatLike = None
106        self._preprocess_template()
107        self._max_features = max_features
108        self._patch_size = patch_size
109        self._match_fraction = match_fraction
110        self._max_dist = max_dist
111
112    def _scale_img(self, img: MatLike) -> MatLike:
113        if self._scale == 1.0:
114            return img
115
116        return cv.resize(img, None, fx=self._scale, fy=self._scale)
117
118    def _unscale_img(self, img: MatLike) -> MatLike:
119        if self._scale == 1.0:
120            return img
121
122        return cv.resize(img, None, fx=1 / self._scale, fy=1 / self._scale)
123
124    def _unscale_homography(self, h: np.ndarray) -> np.ndarray:
125        if self._scale == 1.0:
126            return h
127
128        scale_matrix = np.diag([self._scale, self._scale, 1.0])
129        # inv_scale_matrix = np.linalg.inv(scale_matrix)
130        inv_scale_matrix = np.diag([1.0 / self._scale, 1.0 / self._scale, 1.0])
131        # return inv_scale_matrix @ h @ scale_matrix
132        return inv_scale_matrix @ h @ scale_matrix
133
134    @property
135    def template(self):
136        """The template image that subject images are aligned to"""
137        return self._template
138
139    @template.setter
140    def template(self, value: MatLike | str):
141        """Set the template image as a path or an image"""
142
143        if type(value) is str:
144            value = cv.imread(value)
145            self._template = value
146
147        # TODO: check if the image has the right properties (dimensions etc.)
148        self._template = cast(MatLike, value)
149
150        self._preprocess_template()
151
152    def _preprocess_template(self):
153        self._template_orig = cv.cvtColor(self._template, cv.COLOR_BGR2GRAY)
154        if self._k is not None:
155            self._template = imu.sauvola(self._template, self._k)
156            self._template = cv.bitwise_not(self._template)
157        else:
158            _, _, self._template = cv.split(self._template)
159
160    def _preprocess_image(self, img: MatLike):
161        if self._template_orig is None:
162            raise TauluException("process the template first")
163
164        if self._k is not None:
165            img = imu.sauvola(img, self._k)
166            img = cv.bitwise_not(img)
167        else:
168            _, _, img = cv.split(img)
169
170        return img
171
172    @log_calls(level=logging.DEBUG, include_return=True)
173    def _find_transform_of_template_on(
174        self, im: MatLike, visual: bool = False, window: str = WINDOW
175    ):
176        im = self._scale_img(im)
177        # Detect ORB features and compute descriptors.
178        orb = cv.ORB_create(
179            self._max_features,  # type:ignore
180            patchSize=self._patch_size,
181        )
182        keypoints_im, descriptors_im = orb.detectAndCompute(im, None)
183        keypoints_tg, descriptors_tg = orb.detectAndCompute(self._template, None)
184
185        # Match features
186        matcher = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
187        matches = matcher.match(descriptors_im, descriptors_tg)
188
189        # Sort matches by score
190        matches = sorted(matches, key=lambda x: x.distance)
191
192        # Remove not so good matches
193        numGoodMatches = int(len(matches) * self._match_fraction)
194        matches = matches[:numGoodMatches]
195
196        if visual:
197            final_img_filtered = cv.drawMatches(
198                im,
199                keypoints_im,
200                self._template,
201                keypoints_tg,
202                matches[:10],
203                None,  # type:ignore
204                cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
205            )
206            imu.show(final_img_filtered, title="matches", window=window)
207
208        # Extract location of good matches
209        points1 = np.zeros((len(matches), 2), dtype=np.float32)
210        points2 = np.zeros((len(matches), 2), dtype=np.float32)
211
212        for i, match in enumerate(matches):
213            points1[i, :] = keypoints_tg[match.trainIdx].pt
214            points2[i, :] = keypoints_im[match.queryIdx].pt
215
216        # Prune reference points based upon distance between
217        # key points. This assumes a fairly good alignment to start with
218        # due to the protocol used (location of the sheets)
219        p1 = pd.DataFrame(data=points1)
220        p2 = pd.DataFrame(data=points2)
221        refdist = abs(p1 - p2)
222
223        mask_x = refdist.loc[:, 0] < (im.shape[0] * self._max_dist)
224        mask_y = refdist.loc[:, 1] < (im.shape[1] * self._max_dist)
225        mask = mask_x & mask_y
226        points1 = points1[mask.to_numpy()]
227        points2 = points2[mask.to_numpy()]
228
229        # Find homography
230        h, _ = cv.findHomography(points1, points2, cv.RANSAC)
231
232        return self._unscale_homography(h)
233
234    def view_alignment(self, img: MatLike, h: NDArray):
235        """
236        Show the alignment of the template on the given image
237        by transforming it using the supplied transformation matrix `h`
238        and visualising both on different channels
239
240        Args:
241            img (MatLike): the image on which the template is transformed
242            h (NDArray): the transformation matrix
243        """
244
245        im = imu.ensure_gray(img)
246        header = imu.ensure_gray(self._unscale_img(self._template))
247        height, width = im.shape
248
249        header_warped = cv.warpPerspective(header, h, (width, height))
250
251        merged = np.full((height, width, 3), 255, dtype=np.uint8)
252
253        merged[..., 1] = im
254        merged[..., 2] = header_warped
255
256        return imu.show(merged)
257
258    @log_calls(level=logging.DEBUG, include_return=True)
259    def align(
260        self, img: MatLike | str, visual: bool = False, window: str = WINDOW
261    ) -> NDArray:
262        """
263        Calculates a homogeneous transformation matrix that maps pixels of
264        the template to the given image
265        """
266
267        logger.info("Aligning header with supplied table image")
268
269        if type(img) is str:
270            img = cv.imread(img)
271        img = cast(MatLike, img)
272
273        img = self._preprocess_image(img)
274
275        h = self._find_transform_of_template_on(img, visual, window)
276
277        if visual:
278            self.view_alignment(img, h)
279
280        return h
281
282    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
283        """
284        Transform the given point (in template-space) using the transformation h
285        (obtained through the `align` method)
286
287        Args:
288            h (NDArray): transformation matrix of shape (3, 3)
289            point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
290        """
291
292        point = np.array([[point[0], point[1], 1]])  # type:ignore
293        transformed = np.dot(h, point.T)  # type:ignore
294
295        transformed /= transformed[2]
296
297        return int(transformed[0][0]), int(transformed[1][0])

Aligns table header templates to subject images using feature-based registration.

This class uses ORB (Oriented FAST and Rotated BRIEF) feature detection and matching to compute a homography transformation that maps points from a header template image to their corresponding locations in full table images.

How it Works

  1. Feature Detection: Extracts ORB keypoints from both template and subject
  2. Feature Matching: Finds correspondences using Hamming distance
  3. Filtering: Keeps top matches and prunes based on spatial consistency
  4. Homography Estimation: Computes perspective transform using RANSAC

The computed homography can then transform any point from template space to image space, allowing you to locate table structures based on your annotation.

Preprocessing Options

  • Set k parameter to apply Sauvola thresholding before feature detection. This can improve matching on documents with variable lighting.
  • Set k=None to use raw images (just extract blue channel for BGR images)

Tuning Guidelines

  • max_features: Increase if matching fails on complex templates
  • match_fraction: Decrease if you get many incorrect matches
  • max_dist: Increase for documents with more warping/distortion
  • scale: Decrease (<1.0) to speed up on high-resolution images
Arguments:
  • template (MatLike | PathLike[str] | str | None): Header template image or path. This should contain a clear, representative view of the table header.
  • max_features (int): Maximum ORB features to detect. More features = slower but potentially more robust matching.
  • patch_size (int): ORB patch size for feature extraction.
  • match_fraction (float): Fraction [0, 1] of matches to keep after sorting by quality. Higher = more matches but potentially more outliers.
  • scale (float): Image downscaling factor (0, 1] for processing speed.
  • max_dist (float): Maximum allowed distance (relative to image size) between matched keypoints. Filters out spatially inconsistent matches.
  • k (float | None): Sauvola threshold parameter for preprocessing. If None, no thresholding is applied. Typical range: 0.03-0.15.
HeaderAligner( template: Union[NoneType, cv2.Mat, numpy.ndarray, os.PathLike[str], str] = None, max_features: int = 25000, patch_size: int = 31, match_fraction: float = 0.6, scale: float = 1.0, max_dist: float = 1.0, k: float | None = 0.05)
 69    def __init__(
 70        self,
 71        template: None | MatLike | PathLike[str] | str = None,
 72        max_features: int = 25_000,
 73        patch_size: int = 31,
 74        match_fraction: float = 0.6,
 75        scale: float = 1.0,
 76        max_dist: float = 1.00,
 77        k: float | None = 0.05,
 78    ):
 79        """
 80        Args:
 81            template (MatLike | str): (path of) template image, with the table template clearly visible
 82            max_features (int): maximal number of features that will be extracted by ORB
 83            patch_size (int): for ORB feature extractor
 84            match_fraction (float): best fraction of matches that are kept
 85            scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
 86            max_dist (float): maximum distance (relative to image size) of matched features.
 87                Increase this value if the warping between image and template needs to be more agressive
 88            k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
 89        """
 90
 91        if type(template) is str or type(template) is PathLike:
 92            value = cv.imread(fspath(template))
 93            template = value
 94
 95        self._k = k
 96        if scale > 1.0:
 97            raise TauluException(
 98                "Scaling up the image for header alignment is useless. Use 0 < scale <= 1.0"
 99            )
100        if scale == 0:
101            raise TauluException("Use 0 < scale <= 1.0")
102
103        self._scale = scale
104        self._template = self._scale_img(cast(MatLike, template))
105        self._template_orig: None | MatLike = None
106        self._preprocess_template()
107        self._max_features = max_features
108        self._patch_size = patch_size
109        self._match_fraction = match_fraction
110        self._max_dist = max_dist
Arguments:
  • template (MatLike | str): (path of) template image, with the table template clearly visible
  • max_features (int): maximal number of features that will be extracted by ORB
  • patch_size (int): for ORB feature extractor
  • match_fraction (float): best fraction of matches that are kept
  • scale (float): image scale factor to do calculations on (useful for increasing calculation speed mostly)
  • max_dist (float): maximum distance (relative to image size) of matched features. Increase this value if the warping between image and template needs to be more agressive
  • k (float | None): sauvola thresholding threshold value. If None, no sauvola thresholding is done
template
134    @property
135    def template(self):
136        """The template image that subject images are aligned to"""
137        return self._template

The template image that subject images are aligned to

def view_alignment( self, img: Union[cv2.Mat, numpy.ndarray], h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]):
234    def view_alignment(self, img: MatLike, h: NDArray):
235        """
236        Show the alignment of the template on the given image
237        by transforming it using the supplied transformation matrix `h`
238        and visualising both on different channels
239
240        Args:
241            img (MatLike): the image on which the template is transformed
242            h (NDArray): the transformation matrix
243        """
244
245        im = imu.ensure_gray(img)
246        header = imu.ensure_gray(self._unscale_img(self._template))
247        height, width = im.shape
248
249        header_warped = cv.warpPerspective(header, h, (width, height))
250
251        merged = np.full((height, width, 3), 255, dtype=np.uint8)
252
253        merged[..., 1] = im
254        merged[..., 2] = header_warped
255
256        return imu.show(merged)

Show the alignment of the template on the given image by transforming it using the supplied transformation matrix h and visualising both on different channels

Arguments:
  • img (MatLike): the image on which the template is transformed
  • h (NDArray): the transformation matrix
@log_calls(level=logging.DEBUG, include_return=True)
def align( self, img: Union[cv2.Mat, numpy.ndarray, str], visual: bool = False, window: str = 'taulu') -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]]:
258    @log_calls(level=logging.DEBUG, include_return=True)
259    def align(
260        self, img: MatLike | str, visual: bool = False, window: str = WINDOW
261    ) -> NDArray:
262        """
263        Calculates a homogeneous transformation matrix that maps pixels of
264        the template to the given image
265        """
266
267        logger.info("Aligning header with supplied table image")
268
269        if type(img) is str:
270            img = cv.imread(img)
271        img = cast(MatLike, img)
272
273        img = self._preprocess_image(img)
274
275        h = self._find_transform_of_template_on(img, visual, window)
276
277        if visual:
278            self.view_alignment(img, h)
279
280        return h

Calculates a homogeneous transformation matrix that maps pixels of the template to the given image

def template_to_img( self, h: numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[~_ScalarT]], point: Iterable[int]) -> tuple[int, int]:
282    def template_to_img(self, h: NDArray, point: Iterable[int]) -> tuple[int, int]:
283        """
284        Transform the given point (in template-space) using the transformation h
285        (obtained through the `align` method)
286
287        Args:
288            h (NDArray): transformation matrix of shape (3, 3)
289            point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
290        """
291
292        point = np.array([[point[0], point[1], 1]])  # type:ignore
293        transformed = np.dot(h, point.T)  # type:ignore
294
295        transformed /= transformed[2]
296
297        return int(transformed[0][0]), int(transformed[1][0])

Transform the given point (in template-space) using the transformation h (obtained through the align method)

Arguments:
  • h (NDArray): transformation matrix of shape (3, 3)
  • point (Iterable[int]): the to-be-transformed point, should conform to (x, y)
class HeaderTemplate(taulu.TableIndexer):
151class HeaderTemplate(TableIndexer):
152    def __init__(self, rules: Iterable[Iterable[int]]):
153        """
154        A TableTemplate is a collection of rules of a table. This class implements methods
155        for finding cell positions in a table image, given the template the image adheres to.
156
157        Args:
158            rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
159        """
160
161        super().__init__()
162        self._rules = [_Rule(*rule) for rule in rules]
163        self._h_rules = sorted(
164            [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y
165        )
166        self._v_rules = sorted(
167            [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x
168        )
169
170    @log_calls(level=logging.DEBUG)
171    def save(self, path: PathLike[str]):
172        """
173        Save the HeaderTemplate to the given path, as a json
174        """
175
176        data = {"rules": [r.to_dict() for r in self._rules]}
177
178        with open(path, "w") as f:
179            json.dump(data, f)
180
181    @staticmethod
182    @log_calls(level=logging.DEBUG)
183    def from_saved(path: PathLike[str]) -> "HeaderTemplate":
184        with open(path, "r") as f:
185            data = json.load(f)
186            rules = data["rules"]
187            rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules]
188
189            return HeaderTemplate(rules)
190
191    @property
192    def cols(self) -> int:
193        return len(self._v_rules) - 1
194
195    @property
196    def rows(self) -> int:
197        return len(self._h_rules) - 1
198
199    @staticmethod
200    @log_calls(level=logging.DEBUG)
201    def annotate_image(
202        template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10
203    ) -> "HeaderTemplate":
204        """
205        Utility method that allows users to create a template form a template image.
206
207        The user is asked to click to annotate lines (two clicks per line).
208
209        Args:
210            template: the image on which to annotate the header lines
211            crop (str | None): if str, crop the template image first, then do the annotation.
212                The cropped image will be stored at the supplied path
213            margin (int): margin to add around the cropping of the header
214        """
215
216        if type(template) is str:
217            value = cv.imread(template)
218            template = value
219        template = cast(MatLike, template)
220
221        if crop is not None:
222            cropped = HeaderTemplate._crop(template, margin)
223            cv.imwrite(os.fspath(crop), cropped)
224            template = cropped
225
226        start_point = None
227        lines: list[list[int]] = []
228
229        anno_template = np.copy(template)
230
231        def get_point(event, x, y, flags, params):
232            nonlocal lines, start_point, anno_template
233            _ = flags
234            _ = params
235            if event == cv.EVENT_LBUTTONDOWN:
236                if start_point is not None:
237                    line: list[int] = [start_point[1], start_point[0], x, y]
238
239                    cv.line(  # type:ignore
240                        anno_template,  # type:ignore
241                        (start_point[1], start_point[0]),
242                        (x, y),
243                        (0, 255, 0),
244                        2,
245                        cv.LINE_AA,
246                    )
247                    cv.imshow(constants.WINDOW, anno_template)  # type:ignore
248
249                    lines.append(line)
250                    start_point = None
251                else:
252                    start_point = (y, x)
253            elif event == cv.EVENT_RBUTTONDOWN:
254                start_point = None
255
256                # remove the last annotation
257                lines = lines[:-1]
258
259                anno_template = np.copy(anno_template)
260
261                for line in lines:
262                    cv.line(
263                        template,
264                        (line[0], line[1]),
265                        (line[2], line[3]),
266                        (0, 255, 0),
267                        2,
268                        cv.LINE_AA,
269                    )
270
271                cv.imshow(constants.WINDOW, template)
272
273        print(ANNO_HELP)
274
275        imu.show(anno_template, get_point, title="annotate the header")
276
277        return HeaderTemplate(lines)
278
279    @staticmethod
280    @log_calls(level=logging.DEBUG, include_return=True)
281    def _crop(template: MatLike, margin: int = 10) -> MatLike:
282        """
283        Crop the image to contain only the annotations, such that it can be used as the header image in the taulu workflow.
284        """
285
286        points = []
287        anno_template = np.copy(template)
288
289        def get_point(event, x, y, flags, params):
290            nonlocal points, anno_template
291            _ = flags
292            _ = params
293            if event == cv.EVENT_LBUTTONDOWN:
294                point = (x, y)
295
296                cv.circle(  # type:ignore
297                    anno_template,  # type:ignore
298                    (x, y),
299                    4,
300                    (0, 255, 0),
301                    2,
302                )
303                cv.imshow(constants.WINDOW, anno_template)  # type:ignore
304
305                points.append(point)
306            elif event == cv.EVENT_RBUTTONDOWN:
307                # remove the last annotation
308                points = points[:-1]
309
310                anno_template = np.copy(anno_template)
311
312                for p in points:
313                    cv.circle(
314                        anno_template,
315                        p,
316                        4,
317                        (0, 255, 0),
318                        2,
319                    )
320
321                cv.imshow(constants.WINDOW, anno_template)
322
323        print(CROP_HELP)
324
325        imu.show(anno_template, get_point, title="crop the header")
326
327        assert len(points) == 4, (
328            "you need to annotate the four corners of the table in order to crop it"
329        )
330
331        # crop the image to contain all of the points (just crop rectangularly, x, y, w, h)
332        # Convert points to numpy array
333        points_np = np.array(points)
334
335        # Find bounding box
336        x_min = np.min(points_np[:, 0])
337        y_min = np.min(points_np[:, 1])
338        x_max = np.max(points_np[:, 0])
339        y_max = np.max(points_np[:, 1])
340
341        # Compute width and height
342        width = x_max - x_min
343        height = y_max - y_min
344
345        # Ensure integers and within image boundaries
346        x_min = max(int(x_min), 0)
347        y_min = max(int(y_min), 0)
348        width = int(width)
349        height = int(height)
350
351        # Crop the image
352        cropped = template[
353            y_min - margin : y_min + height + margin,
354            x_min - margin : x_min + width + margin,
355        ]
356
357        return cropped
358
359    @staticmethod
360    def from_vgg_annotation(annotation: str) -> "HeaderTemplate":
361        """
362        Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool.
363
364        Args:
365            annotation (str): the path of the annotation csv file
366        """
367
368        rules = []
369        with open(annotation, "r") as csvfile:
370            reader = csv.DictReader(csvfile)
371            for row in reader:
372                shape_attributes = json.loads(row["region_shape_attributes"])
373                if shape_attributes["name"] == "polyline":
374                    x_points = shape_attributes["all_points_x"]
375                    y_points = shape_attributes["all_points_y"]
376                    if len(x_points) == 2 and len(y_points) == 2:
377                        rules.append(
378                            [x_points[0], y_points[0], x_points[1], y_points[1]]
379                        )
380
381        return HeaderTemplate(rules)
382
383    def cell_width(self, i: int) -> int:
384        self._check_col_idx(i)
385        return int(self._v_rules[i + 1]._x - self._v_rules[i]._x)
386
387    def cell_widths(self, start: int = 0) -> list[int]:
388        return [self.cell_width(i) for i in range(start, self.cols)]
389
390    def cell_height(self, header_factor: float = 0.8) -> int:
391        return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor)
392
393    def cell_heights(self, header_factors: list[float] | float) -> list[int]:
394        if isinstance(header_factors, float):
395            header_factors = [header_factors]
396        header_factors = cast(list, header_factors)
397        return [
398            int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors
399        ]
400
401    def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
402        """
403        Returns the interaction of the index[0]th horizontal rule and the
404        index[1]th vertical rule
405        """
406
407        ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]])
408        assert ints is not None
409        return ints
410
411    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
412        """
413        Get the cell index (row, col) that corresponds with the point (x, y) in the template image
414
415        Args:
416            point (tuple[float, float]): the coordinates in the template image
417
418        Returns:
419            tuple[int, int]: (row, col)
420        """
421
422        x, y = point
423
424        row = -1
425        col = -1
426
427        for i in range(self.rows):
428            y0 = self._h_rules[i]._y_at_x(x)
429            y1 = self._h_rules[i + 1]._y_at_x(x)
430            if min(y0, y1) <= y <= max(y0, y1):
431                row = i
432                break
433
434        for i in range(self.cols):
435            x0 = self._v_rules[i]._x_at_y(y)
436            x1 = self._v_rules[i + 1]._x_at_y(y)
437            if min(x0, x1) <= x <= max(x0, x1):
438                col = i
439                break
440
441        if row == -1 or col == -1:
442            return (-1, -1)
443
444        return (row, col)
445
446    def cell_polygon(
447        self, cell: tuple[int, int]
448    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
449        """
450        Return points (x,y) that make up a polygon around the requested cell
451        (top left, top right, bottom right, bottom left)
452        """
453
454        row, col = cell
455
456        self._check_col_idx(col)
457        self._check_row_idx(row)
458
459        top_rule = self._h_rules[row]
460        bottom_rule = self._h_rules[row + 1]
461        left_rule = self._v_rules[col]
462        right_rule = self._v_rules[col + 1]
463
464        # Calculate corner points using intersections
465        top_left = top_rule.intersection(left_rule)
466        top_right = top_rule.intersection(right_rule)
467        bottom_left = bottom_rule.intersection(left_rule)
468        bottom_right = bottom_rule.intersection(right_rule)
469
470        if not all(
471            [
472                point is not None
473                for point in [top_left, top_right, bottom_left, bottom_right]
474            ]
475        ):
476            raise TauluException("the lines around this cell do not intersect")
477
478        return top_left, top_right, bottom_right, bottom_left  # type:ignore
479
480    def region(
481        self, start: tuple[int, int], end: tuple[int, int]
482    ) -> tuple[Point, Point, Point, Point]:
483        self._check_row_idx(start[0])
484        self._check_row_idx(end[0])
485        self._check_col_idx(start[1])
486        self._check_col_idx(end[1])
487
488        # the rules that surround this row
489        top_rule = self._h_rules[start[0]]
490        bottom_rule = self._h_rules[end[0] + 1]
491        left_rule = self._v_rules[start[1]]
492        right_rule = self._v_rules[end[1] + 1]
493
494        # four points that will be the bounding polygon of the result,
495        # which needs to be rectified
496        top_left = top_rule.intersection(left_rule)
497        top_right = top_rule.intersection(right_rule)
498        bottom_left = bottom_rule.intersection(left_rule)
499        bottom_right = bottom_rule.intersection(right_rule)
500
501        if (
502            top_left is None
503            or top_right is None
504            or bottom_left is None
505            or bottom_right is None
506        ):
507            raise TauluException("the lines around this row do not intersect properly")
508
509        def to_point(pnt) -> Point:
510            return (int(pnt[0]), int(pnt[1]))
511
512        return (
513            to_point(top_left),
514            to_point(top_right),
515            to_point(bottom_right),
516            to_point(bottom_left),
517        )
518
519    def text_regions(
520        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20
521    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
522        raise TauluException("text_regions should not be called on a HeaderTemplate")

Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.

HeaderTemplate(rules: Iterable[Iterable[int]])
152    def __init__(self, rules: Iterable[Iterable[int]]):
153        """
154        A TableTemplate is a collection of rules of a table. This class implements methods
155        for finding cell positions in a table image, given the template the image adheres to.
156
157        Args:
158            rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
159        """
160
161        super().__init__()
162        self._rules = [_Rule(*rule) for rule in rules]
163        self._h_rules = sorted(
164            [rule for rule in self._rules if rule._is_horizontal()], key=lambda r: r._y
165        )
166        self._v_rules = sorted(
167            [rule for rule in self._rules if rule._is_vertical()], key=lambda r: r._x
168        )

A TableTemplate is a collection of rules of a table. This class implements methods for finding cell positions in a table image, given the template the image adheres to.

Arguments:
  • rules: 2D array of lines, where each line is represented as [x0, y0, x1, y1]
@log_calls(level=logging.DEBUG)
def save(self, path: os.PathLike[str]):
170    @log_calls(level=logging.DEBUG)
171    def save(self, path: PathLike[str]):
172        """
173        Save the HeaderTemplate to the given path, as a json
174        """
175
176        data = {"rules": [r.to_dict() for r in self._rules]}
177
178        with open(path, "w") as f:
179            json.dump(data, f)

Save the HeaderTemplate to the given path, as a json

@staticmethod
@log_calls(level=logging.DEBUG)
def from_saved(path: os.PathLike[str]) -> HeaderTemplate:
181    @staticmethod
182    @log_calls(level=logging.DEBUG)
183    def from_saved(path: PathLike[str]) -> "HeaderTemplate":
184        with open(path, "r") as f:
185            data = json.load(f)
186            rules = data["rules"]
187            rules = [[r["x0"], r["y0"], r["x1"], r["y1"]] for r in rules]
188
189            return HeaderTemplate(rules)
cols: int
191    @property
192    def cols(self) -> int:
193        return len(self._v_rules) - 1
rows: int
195    @property
196    def rows(self) -> int:
197        return len(self._h_rules) - 1
@staticmethod
@log_calls(level=logging.DEBUG)
def annotate_image( template: Union[cv2.Mat, numpy.ndarray, str], crop: Optional[os.PathLike[str]] = None, margin: int = 10) -> HeaderTemplate:
199    @staticmethod
200    @log_calls(level=logging.DEBUG)
201    def annotate_image(
202        template: MatLike | str, crop: Optional[PathLike[str]] = None, margin: int = 10
203    ) -> "HeaderTemplate":
204        """
205        Utility method that allows users to create a template form a template image.
206
207        The user is asked to click to annotate lines (two clicks per line).
208
209        Args:
210            template: the image on which to annotate the header lines
211            crop (str | None): if str, crop the template image first, then do the annotation.
212                The cropped image will be stored at the supplied path
213            margin (int): margin to add around the cropping of the header
214        """
215
216        if type(template) is str:
217            value = cv.imread(template)
218            template = value
219        template = cast(MatLike, template)
220
221        if crop is not None:
222            cropped = HeaderTemplate._crop(template, margin)
223            cv.imwrite(os.fspath(crop), cropped)
224            template = cropped
225
226        start_point = None
227        lines: list[list[int]] = []
228
229        anno_template = np.copy(template)
230
231        def get_point(event, x, y, flags, params):
232            nonlocal lines, start_point, anno_template
233            _ = flags
234            _ = params
235            if event == cv.EVENT_LBUTTONDOWN:
236                if start_point is not None:
237                    line: list[int] = [start_point[1], start_point[0], x, y]
238
239                    cv.line(  # type:ignore
240                        anno_template,  # type:ignore
241                        (start_point[1], start_point[0]),
242                        (x, y),
243                        (0, 255, 0),
244                        2,
245                        cv.LINE_AA,
246                    )
247                    cv.imshow(constants.WINDOW, anno_template)  # type:ignore
248
249                    lines.append(line)
250                    start_point = None
251                else:
252                    start_point = (y, x)
253            elif event == cv.EVENT_RBUTTONDOWN:
254                start_point = None
255
256                # remove the last annotation
257                lines = lines[:-1]
258
259                anno_template = np.copy(anno_template)
260
261                for line in lines:
262                    cv.line(
263                        template,
264                        (line[0], line[1]),
265                        (line[2], line[3]),
266                        (0, 255, 0),
267                        2,
268                        cv.LINE_AA,
269                    )
270
271                cv.imshow(constants.WINDOW, template)
272
273        print(ANNO_HELP)
274
275        imu.show(anno_template, get_point, title="annotate the header")
276
277        return HeaderTemplate(lines)

Utility method that allows users to create a template form a template image.

The user is asked to click to annotate lines (two clicks per line).

Arguments:
  • template: the image on which to annotate the header lines
  • crop (str | None): if str, crop the template image first, then do the annotation. The cropped image will be stored at the supplied path
  • margin (int): margin to add around the cropping of the header
@staticmethod
def from_vgg_annotation(annotation: str) -> HeaderTemplate:
359    @staticmethod
360    def from_vgg_annotation(annotation: str) -> "HeaderTemplate":
361        """
362        Create a TableTemplate from annotations made in [vgg](https://annotate.officialstatistics.org/), using the polylines tool.
363
364        Args:
365            annotation (str): the path of the annotation csv file
366        """
367
368        rules = []
369        with open(annotation, "r") as csvfile:
370            reader = csv.DictReader(csvfile)
371            for row in reader:
372                shape_attributes = json.loads(row["region_shape_attributes"])
373                if shape_attributes["name"] == "polyline":
374                    x_points = shape_attributes["all_points_x"]
375                    y_points = shape_attributes["all_points_y"]
376                    if len(x_points) == 2 and len(y_points) == 2:
377                        rules.append(
378                            [x_points[0], y_points[0], x_points[1], y_points[1]]
379                        )
380
381        return HeaderTemplate(rules)

Create a TableTemplate from annotations made in vgg, using the polylines tool.

Arguments:
  • annotation (str): the path of the annotation csv file
def cell_width(self, i: int) -> int:
383    def cell_width(self, i: int) -> int:
384        self._check_col_idx(i)
385        return int(self._v_rules[i + 1]._x - self._v_rules[i]._x)
def cell_widths(self, start: int = 0) -> list[int]:
387    def cell_widths(self, start: int = 0) -> list[int]:
388        return [self.cell_width(i) for i in range(start, self.cols)]
def cell_height(self, header_factor: float = 0.8) -> int:
390    def cell_height(self, header_factor: float = 0.8) -> int:
391        return int((self._h_rules[1]._y - self._h_rules[0]._y) * header_factor)
def cell_heights(self, header_factors: list[float] | float) -> list[int]:
393    def cell_heights(self, header_factors: list[float] | float) -> list[int]:
394        if isinstance(header_factors, float):
395            header_factors = [header_factors]
396        header_factors = cast(list, header_factors)
397        return [
398            int((self._h_rules[1]._y - self._h_rules[0]._y) * f) for f in header_factors
399        ]
def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
401    def intersection(self, index: tuple[int, int]) -> tuple[float, float]:
402        """
403        Returns the interaction of the index[0]th horizontal rule and the
404        index[1]th vertical rule
405        """
406
407        ints = self._h_rules[index[0]].intersection(self._v_rules[index[1]])
408        assert ints is not None
409        return ints

Returns the interaction of the index[0]th horizontal rule and the index[1]th vertical rule

def cell(self, point: tuple[float, float]) -> tuple[int, int]:
411    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
412        """
413        Get the cell index (row, col) that corresponds with the point (x, y) in the template image
414
415        Args:
416            point (tuple[float, float]): the coordinates in the template image
417
418        Returns:
419            tuple[int, int]: (row, col)
420        """
421
422        x, y = point
423
424        row = -1
425        col = -1
426
427        for i in range(self.rows):
428            y0 = self._h_rules[i]._y_at_x(x)
429            y1 = self._h_rules[i + 1]._y_at_x(x)
430            if min(y0, y1) <= y <= max(y0, y1):
431                row = i
432                break
433
434        for i in range(self.cols):
435            x0 = self._v_rules[i]._x_at_y(y)
436            x1 = self._v_rules[i + 1]._x_at_y(y)
437            if min(x0, x1) <= x <= max(x0, x1):
438                col = i
439                break
440
441        if row == -1 or col == -1:
442            return (-1, -1)
443
444        return (row, col)

Get the cell index (row, col) that corresponds with the point (x, y) in the template image

Arguments:
  • point (tuple[float, float]): the coordinates in the template image
Returns:

tuple[int, int]: (row, col)

def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
446    def cell_polygon(
447        self, cell: tuple[int, int]
448    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
449        """
450        Return points (x,y) that make up a polygon around the requested cell
451        (top left, top right, bottom right, bottom left)
452        """
453
454        row, col = cell
455
456        self._check_col_idx(col)
457        self._check_row_idx(row)
458
459        top_rule = self._h_rules[row]
460        bottom_rule = self._h_rules[row + 1]
461        left_rule = self._v_rules[col]
462        right_rule = self._v_rules[col + 1]
463
464        # Calculate corner points using intersections
465        top_left = top_rule.intersection(left_rule)
466        top_right = top_rule.intersection(right_rule)
467        bottom_left = bottom_rule.intersection(left_rule)
468        bottom_right = bottom_rule.intersection(right_rule)
469
470        if not all(
471            [
472                point is not None
473                for point in [top_left, top_right, bottom_left, bottom_right]
474            ]
475        ):
476            raise TauluException("the lines around this cell do not intersect")
477
478        return top_left, top_right, bottom_right, bottom_left  # type:ignore

Return points (x,y) that make up a polygon around the requested cell (top left, top right, bottom right, bottom left)

def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int]]:
480    def region(
481        self, start: tuple[int, int], end: tuple[int, int]
482    ) -> tuple[Point, Point, Point, Point]:
483        self._check_row_idx(start[0])
484        self._check_row_idx(end[0])
485        self._check_col_idx(start[1])
486        self._check_col_idx(end[1])
487
488        # the rules that surround this row
489        top_rule = self._h_rules[start[0]]
490        bottom_rule = self._h_rules[end[0] + 1]
491        left_rule = self._v_rules[start[1]]
492        right_rule = self._v_rules[end[1] + 1]
493
494        # four points that will be the bounding polygon of the result,
495        # which needs to be rectified
496        top_left = top_rule.intersection(left_rule)
497        top_right = top_rule.intersection(right_rule)
498        bottom_left = bottom_rule.intersection(left_rule)
499        bottom_right = bottom_rule.intersection(right_rule)
500
501        if (
502            top_left is None
503            or top_right is None
504            or bottom_left is None
505            or bottom_right is None
506        ):
507            raise TauluException("the lines around this row do not intersect properly")
508
509        def to_point(pnt) -> Point:
510            return (int(pnt[0]), int(pnt[1]))
511
512        return (
513            to_point(top_left),
514            to_point(top_right),
515            to_point(bottom_right),
516            to_point(bottom_left),
517        )

Get the bounding box for the rectangular region that goes from start to end

Returns:

4 points: lt, rt, rb, lb, in format (x, y)

def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 10, margin_y: int = -20) -> list[tuple[tuple[int, int], tuple[int, int]]]:
519    def text_regions(
520        self, img: MatLike, row: int, margin_x: int = 10, margin_y: int = -20
521    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
522        raise TauluException("text_regions should not be called on a HeaderTemplate")

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

class TableIndexer(abc.ABC):
 72class TableIndexer(ABC):
 73    """
 74    Subclasses implement methods for going from a pixel in the input image to a table cell index,
 75    and cropping an image to the given table cell index.
 76    """
 77
 78    def __init__(self):
 79        self._col_offset = 0
 80
 81    @property
 82    def col_offset(self) -> int:
 83        return self._col_offset
 84
 85    @col_offset.setter
 86    def col_offset(self, value: int):
 87        assert value >= 0
 88        self._col_offset = value
 89
 90    @property
 91    @abstractmethod
 92    def cols(self) -> int:
 93        pass
 94
 95    @property
 96    @abstractmethod
 97    def rows(self) -> int:
 98        pass
 99
100    def cells(self) -> Generator[tuple[int, int], None, None]:
101        """
102        Generate all cell indices in row-major order.
103
104        Yields (row, col) tuples for every cell in the table, iterating
105        through each row from left to right, top to bottom.
106
107        Yields:
108            tuple[int, int]: Cell indices as (row, col).
109
110        Example:
111            >>> for row, col in grid.cells():
112            ...     cell_img = grid.crop_cell(image, (row, col))
113            ...     process(cell_img)
114        """
115        for row in range(self.rows):
116            for col in range(self.cols):
117                yield (row, col)
118
119    def _check_row_idx(self, row: int):
120        if row < 0:
121            raise TauluException("row number needs to be positive or zero")
122        if row >= self.rows:
123            raise TauluException(f"row number too high: {row} >= {self.rows}")
124
125    def _check_col_idx(self, col: int):
126        if col < 0:
127            raise TauluException("col number needs to be positive or zero")
128        if col >= self.cols:
129            raise TauluException(f"col number too high: {col} >= {self.cols}")
130
131    @abstractmethod
132    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
133        """
134        Returns the coordinate (row, col) of the cell that contains the given position
135
136        Args:
137            point (tuple[float, float]): a location in the input image
138
139        Returns:
140            tuple[int, int]: the cell index (row, col) that contains the given point
141        """
142        pass
143
144    @abstractmethod
145    def cell_polygon(
146        self, cell: tuple[int, int]
147    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
148        """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position"""
149        pass
150
151    def _highlight_cell(
152        self,
153        image: MatLike,
154        cell: tuple[int, int],
155        color: tuple[int, int, int] = (0, 0, 255),
156        thickness: int = 2,
157    ):
158        polygon = self.cell_polygon(cell)
159        points = np.int32(list(polygon))  # type:ignore
160        cv.polylines(image, [points], True, color, thickness, cv.LINE_AA)  # type:ignore
161        cv.putText(
162            image,
163            str(cell),
164            (int(polygon[3][0] + 10), int(polygon[3][1] - 10)),
165            cv.FONT_HERSHEY_PLAIN,
166            2.0,
167            (255, 255, 255),
168            2,
169        )
170
171    def highlight_all_cells(
172        self,
173        image: MatLike,
174        color: tuple[int, int, int] = (0, 0, 255),
175        thickness: int = 1,
176    ) -> MatLike:
177        img = np.copy(image)
178
179        for cell in self.cells():
180            self._highlight_cell(img, cell, color, thickness)
181
182        return img
183
184    def select_one_cell(
185        self,
186        image: MatLike,
187        window: str = WINDOW,
188        color: tuple[int, int, int] = (255, 0, 0),
189        thickness: int = 2,
190    ) -> tuple[int, int] | None:
191        clicked = None
192
193        def click_event(event, x, y, flags, params):
194            nonlocal clicked
195
196            img = np.copy(image)
197            _ = flags
198            _ = params
199            if event == cv.EVENT_LBUTTONDOWN:
200                cell = self.cell((x, y))
201                if cell[0] >= 0:
202                    clicked = cell
203                else:
204                    return
205                self._highlight_cell(img, cell, color, thickness)
206                cv.imshow(window, img)
207
208        imu.show(image, click_event=click_event, title="select one cell", window=window)
209
210        return clicked
211
212    def show_cells(
213        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
214    ) -> list[tuple[int, int]]:
215        if not isinstance(image, np.ndarray):
216            image = cv.imread(os.fspath(image))
217
218        img = np.copy(image)
219
220        cells = []
221
222        def click_event(event, x, y, flags, params):
223            _ = flags
224            _ = params
225            if event == cv.EVENT_LBUTTONDOWN:
226                cell = self.cell((x, y))
227                if cell[0] >= 0:
228                    cells.append(cell)
229                else:
230                    return
231                self._highlight_cell(img, cell)
232                cv.imshow(window, img)
233
234        imu.show(
235            img,
236            click_event=click_event,
237            title="click to highlight cells",
238            window=window,
239        )
240
241        return cells
242
243    @abstractmethod
244    def region(
245        self,
246        start: tuple[int, int],
247        end: tuple[int, int],
248    ) -> tuple[Point, Point, Point, Point]:
249        """
250        Get the bounding box for the rectangular region that goes from start to end
251
252        Returns:
253            4 points: lt, rt, rb, lb, in format (x, y)
254        """
255        pass
256
257    def crop_region(
258        self,
259        image: MatLike,
260        start: tuple[int, int],
261        end: tuple[int, int],
262        margin: int = 0,
263        margin_top: int | None = None,
264        margin_bottom: int | None = None,
265        margin_left: int | None = None,
266        margin_right: int | None = None,
267        margin_y: int | None = None,
268        margin_x: int | None = None,
269    ) -> MatLike:
270        """
271        Extract a multi-cell region from the image with perspective correction.
272
273        Crops the image to include all cells from start to end (inclusive),
274        applying a perspective transform to produce a rectangular output.
275
276        Args:
277            image: Source image (BGR or grayscale).
278            start: Top-left cell as (row, col).
279            end: Bottom-right cell as (row, col).
280            margin: Uniform margin in pixels (default 0).
281            margin_top: Override top margin.
282            margin_bottom: Override bottom margin.
283            margin_left: Override left margin.
284            margin_right: Override right margin.
285            margin_y: Override vertical margins (top and bottom).
286            margin_x: Override horizontal margins (left and right).
287
288        Returns:
289            Cropped and perspective-corrected image.
290
291        Example:
292            >>> # Extract a 3x2 region starting at cell (1, 0)
293            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
294        """
295
296        region = self.region(start, end)
297
298        lt, rt, rb, lb = _apply_margin(
299            *region,
300            margin=margin,
301            margin_top=margin_top,
302            margin_bottom=margin_bottom,
303            margin_left=margin_left,
304            margin_right=margin_right,
305            margin_y=margin_y,
306            margin_x=margin_x,
307        )
308
309        # apply margins according to priority:
310        # margin_top > margin_y > margin (etc.)
311
312        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
313        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
314
315        # crop by doing a perspective transform to the desired quad
316        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
317        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
318        M = cv.getPerspectiveTransform(src_pts, dst_pts)
319        warped = cv.warpPerspective(image, M, (int(w), int(h)))  # type:ignore
320
321        return warped
322
323    @abstractmethod
324    def text_regions(
325        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
326    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
327        """
328        Split the row into regions of continuous text
329
330        Returns
331            list[tuple[int, int]]: a list of spans (start col, end col)
332        """
333
334        pass
335
336    def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike:
337        """
338        Extract a single cell from the image with perspective correction.
339
340        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
341
342        Args:
343            image: Source image (BGR or grayscale).
344            cell: Cell indices as (row, col).
345            margin: Padding in pixels around the cell (default 0).
346
347        Returns:
348            Cropped and perspective-corrected cell image.
349
350        Example:
351            >>> cell_img = grid.crop_cell(image, (0, 0))
352            >>> cv2.imwrite("cell_0_0.png", cell_img)
353        """
354        return self.crop_region(image, cell, cell, margin)

Subclasses implement methods for going from a pixel in the input image to a table cell index, and cropping an image to the given table cell index.

col_offset: int
81    @property
82    def col_offset(self) -> int:
83        return self._col_offset
cols: int
90    @property
91    @abstractmethod
92    def cols(self) -> int:
93        pass
rows: int
95    @property
96    @abstractmethod
97    def rows(self) -> int:
98        pass
def cells(self) -> Generator[tuple[int, int], NoneType, NoneType]:
100    def cells(self) -> Generator[tuple[int, int], None, None]:
101        """
102        Generate all cell indices in row-major order.
103
104        Yields (row, col) tuples for every cell in the table, iterating
105        through each row from left to right, top to bottom.
106
107        Yields:
108            tuple[int, int]: Cell indices as (row, col).
109
110        Example:
111            >>> for row, col in grid.cells():
112            ...     cell_img = grid.crop_cell(image, (row, col))
113            ...     process(cell_img)
114        """
115        for row in range(self.rows):
116            for col in range(self.cols):
117                yield (row, col)

Generate all cell indices in row-major order.

Yields (row, col) tuples for every cell in the table, iterating through each row from left to right, top to bottom.

Yields:

tuple[int, int]: Cell indices as (row, col).

Example:
>>> for row, col in grid.cells():
...     cell_img = grid.crop_cell(image, (row, col))
...     process(cell_img)
@abstractmethod
def cell(self, point: tuple[float, float]) -> tuple[int, int]:
131    @abstractmethod
132    def cell(self, point: tuple[float, float]) -> tuple[int, int]:
133        """
134        Returns the coordinate (row, col) of the cell that contains the given position
135
136        Args:
137            point (tuple[float, float]): a location in the input image
138
139        Returns:
140            tuple[int, int]: the cell index (row, col) that contains the given point
141        """
142        pass

Returns the coordinate (row, col) of the cell that contains the given position

Arguments:
  • point (tuple[float, float]): a location in the input image
Returns:

tuple[int, int]: the cell index (row, col) that contains the given point

@abstractmethod
def cell_polygon( self, cell: tuple[int, int]) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
144    @abstractmethod
145    def cell_polygon(
146        self, cell: tuple[int, int]
147    ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]:
148        """returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position"""
149        pass

returns the polygon (used in e.g. opencv) that enscribes the cell at the given cell position

def highlight_all_cells( self, image: Union[cv2.Mat, numpy.ndarray], color: tuple[int, int, int] = (0, 0, 255), thickness: int = 1) -> Union[cv2.Mat, numpy.ndarray]:
171    def highlight_all_cells(
172        self,
173        image: MatLike,
174        color: tuple[int, int, int] = (0, 0, 255),
175        thickness: int = 1,
176    ) -> MatLike:
177        img = np.copy(image)
178
179        for cell in self.cells():
180            self._highlight_cell(img, cell, color, thickness)
181
182        return img
def select_one_cell( self, image: Union[cv2.Mat, numpy.ndarray], window: str = 'taulu', color: tuple[int, int, int] = (255, 0, 0), thickness: int = 2) -> tuple[int, int] | None:
184    def select_one_cell(
185        self,
186        image: MatLike,
187        window: str = WINDOW,
188        color: tuple[int, int, int] = (255, 0, 0),
189        thickness: int = 2,
190    ) -> tuple[int, int] | None:
191        clicked = None
192
193        def click_event(event, x, y, flags, params):
194            nonlocal clicked
195
196            img = np.copy(image)
197            _ = flags
198            _ = params
199            if event == cv.EVENT_LBUTTONDOWN:
200                cell = self.cell((x, y))
201                if cell[0] >= 0:
202                    clicked = cell
203                else:
204                    return
205                self._highlight_cell(img, cell, color, thickness)
206                cv.imshow(window, img)
207
208        imu.show(image, click_event=click_event, title="select one cell", window=window)
209
210        return clicked
def show_cells( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], window: str = 'taulu') -> list[tuple[int, int]]:
212    def show_cells(
213        self, image: MatLike | os.PathLike[str] | str, window: str = WINDOW
214    ) -> list[tuple[int, int]]:
215        if not isinstance(image, np.ndarray):
216            image = cv.imread(os.fspath(image))
217
218        img = np.copy(image)
219
220        cells = []
221
222        def click_event(event, x, y, flags, params):
223            _ = flags
224            _ = params
225            if event == cv.EVENT_LBUTTONDOWN:
226                cell = self.cell((x, y))
227                if cell[0] >= 0:
228                    cells.append(cell)
229                else:
230                    return
231                self._highlight_cell(img, cell)
232                cv.imshow(window, img)
233
234        imu.show(
235            img,
236            click_event=click_event,
237            title="click to highlight cells",
238            window=window,
239        )
240
241        return cells
@abstractmethod
def region( self, start: tuple[int, int], end: tuple[int, int]) -> tuple[typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int], typing.Tuple[int, int]]:
243    @abstractmethod
244    def region(
245        self,
246        start: tuple[int, int],
247        end: tuple[int, int],
248    ) -> tuple[Point, Point, Point, Point]:
249        """
250        Get the bounding box for the rectangular region that goes from start to end
251
252        Returns:
253            4 points: lt, rt, rb, lb, in format (x, y)
254        """
255        pass

Get the bounding box for the rectangular region that goes from start to end

Returns:

4 points: lt, rt, rb, lb, in format (x, y)

def crop_region( self, image: Union[cv2.Mat, numpy.ndarray], start: tuple[int, int], end: tuple[int, int], margin: int = 0, margin_top: int | None = None, margin_bottom: int | None = None, margin_left: int | None = None, margin_right: int | None = None, margin_y: int | None = None, margin_x: int | None = None) -> Union[cv2.Mat, numpy.ndarray]:
257    def crop_region(
258        self,
259        image: MatLike,
260        start: tuple[int, int],
261        end: tuple[int, int],
262        margin: int = 0,
263        margin_top: int | None = None,
264        margin_bottom: int | None = None,
265        margin_left: int | None = None,
266        margin_right: int | None = None,
267        margin_y: int | None = None,
268        margin_x: int | None = None,
269    ) -> MatLike:
270        """
271        Extract a multi-cell region from the image with perspective correction.
272
273        Crops the image to include all cells from start to end (inclusive),
274        applying a perspective transform to produce a rectangular output.
275
276        Args:
277            image: Source image (BGR or grayscale).
278            start: Top-left cell as (row, col).
279            end: Bottom-right cell as (row, col).
280            margin: Uniform margin in pixels (default 0).
281            margin_top: Override top margin.
282            margin_bottom: Override bottom margin.
283            margin_left: Override left margin.
284            margin_right: Override right margin.
285            margin_y: Override vertical margins (top and bottom).
286            margin_x: Override horizontal margins (left and right).
287
288        Returns:
289            Cropped and perspective-corrected image.
290
291        Example:
292            >>> # Extract a 3x2 region starting at cell (1, 0)
293            >>> region_img = grid.crop_region(image, (1, 0), (3, 1))
294        """
295
296        region = self.region(start, end)
297
298        lt, rt, rb, lb = _apply_margin(
299            *region,
300            margin=margin,
301            margin_top=margin_top,
302            margin_bottom=margin_bottom,
303            margin_left=margin_left,
304            margin_right=margin_right,
305            margin_y=margin_y,
306            margin_x=margin_x,
307        )
308
309        # apply margins according to priority:
310        # margin_top > margin_y > margin (etc.)
311
312        w = (rt[0] - lt[0] + rb[0] - lb[0]) / 2
313        h = (rb[1] - rt[1] + lb[1] - lt[1]) / 2
314
315        # crop by doing a perspective transform to the desired quad
316        src_pts = np.array([lt, rt, rb, lb], dtype="float32")
317        dst_pts = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype="float32")
318        M = cv.getPerspectiveTransform(src_pts, dst_pts)
319        warped = cv.warpPerspective(image, M, (int(w), int(h)))  # type:ignore
320
321        return warped

Extract a multi-cell region from the image with perspective correction.

Crops the image to include all cells from start to end (inclusive), applying a perspective transform to produce a rectangular output.

Arguments:
  • image: Source image (BGR or grayscale).
  • start: Top-left cell as (row, col).
  • end: Bottom-right cell as (row, col).
  • margin: Uniform margin in pixels (default 0).
  • margin_top: Override top margin.
  • margin_bottom: Override bottom margin.
  • margin_left: Override left margin.
  • margin_right: Override right margin.
  • margin_y: Override vertical margins (top and bottom).
  • margin_x: Override horizontal margins (left and right).
Returns:

Cropped and perspective-corrected image.

Example:
>>> # Extract a 3x2 region starting at cell (1, 0)
>>> region_img = grid.crop_region(image, (1, 0), (3, 1))
@abstractmethod
def text_regions( self, img: Union[cv2.Mat, numpy.ndarray], row: int, margin_x: int = 0, margin_y: int = 0) -> list[tuple[tuple[int, int], tuple[int, int]]]:
323    @abstractmethod
324    def text_regions(
325        self, img: MatLike, row: int, margin_x: int = 0, margin_y: int = 0
326    ) -> list[tuple[tuple[int, int], tuple[int, int]]]:
327        """
328        Split the row into regions of continuous text
329
330        Returns
331            list[tuple[int, int]]: a list of spans (start col, end col)
332        """
333
334        pass

Split the row into regions of continuous text

Returns list[tuple[int, int]]: a list of spans (start col, end col)

def crop_cell( self, image, cell: tuple[int, int], margin: int = 0) -> Union[cv2.Mat, numpy.ndarray]:
336    def crop_cell(self, image, cell: tuple[int, int], margin: int = 0) -> MatLike:
337        """
338        Extract a single cell from the image with perspective correction.
339
340        Convenience method equivalent to `crop_region(image, cell, cell, margin)`.
341
342        Args:
343            image: Source image (BGR or grayscale).
344            cell: Cell indices as (row, col).
345            margin: Padding in pixels around the cell (default 0).
346
347        Returns:
348            Cropped and perspective-corrected cell image.
349
350        Example:
351            >>> cell_img = grid.crop_cell(image, (0, 0))
352            >>> cv2.imwrite("cell_0_0.png", cell_img)
353        """
354        return self.crop_region(image, cell, cell, margin)

Extract a single cell from the image with perspective correction.

Convenience method equivalent to crop_region(image, cell, cell, margin).

Arguments:
  • image: Source image (BGR or grayscale).
  • cell: Cell indices as (row, col).
  • margin: Padding in pixels around the cell (default 0).
Returns:

Cropped and perspective-corrected cell image.

Example:
>>> cell_img = grid.crop_cell(image, (0, 0))
>>> cv2.imwrite("cell_0_0.png", cell_img)
class Split(typing.Generic[~T]):
 15class Split(Generic[T]):
 16    """
 17    Container for paired left/right data with convenient manipulation methods.
 18
 19    The Split class is designed for working with table images that span two pages
 20    or have distinct left and right sections. It allows you to:
 21    - Store related data for both sides
 22    - Apply functions to both sides simultaneously
 23    - Access attributes/methods of contained objects transparently
 24
 25    Examples:
 26        >>> # Create a split with different parameters for each side
 27        >>> thresholds = Split(0.25, 0.30)
 28        >>>
 29        >>> # Apply a function to both sides
 30        >>> images = Split(left_img, right_img)
 31        >>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
 32        >>>
 33        >>> # Use with different parameters per side
 34        >>> results = images.apply(
 35        ...     lambda img, k: sauvola_threshold(img, k),
 36        ...     k=thresholds  # k.left used for left img, k.right for right
 37        ... )
 38        >>>
 39        >>> # Access methods of contained objects directly
 40        >>> templates = Split(template_left, template_right)
 41        >>> widths = templates.cell_widths(0)  # Calls on both templates
 42
 43    Type Parameters:
 44        T: The type of objects stored in left and right
 45    """
 46
 47    def __init__(self, left: T | None = None, right: T | None = None):
 48        """
 49        Initialize a Split container.
 50
 51        Args:
 52            left: Data for the left side
 53            right: Data for the right side
 54
 55        Note:
 56            Both can initially be None. Use the `append` method or set
 57            properties directly to populate.
 58        """
 59        self._left = left
 60        self._right = right
 61
 62    @property
 63    def left(self) -> T:
 64        assert self._left is not None
 65        return self._left
 66
 67    @left.setter
 68    def left(self, value: T):
 69        self._left = value
 70
 71    @property
 72    def right(self) -> T:
 73        assert self._right is not None
 74        return self._right
 75
 76    @right.setter
 77    def right(self, value: T):
 78        self._right = value
 79
 80    def append(self, value: T):
 81        if self._left is None:
 82            self._left = value
 83        else:
 84            self._right = value
 85
 86    def __repr__(self) -> str:
 87        return f"left: {self._left}, right: {self._right}"
 88
 89    def __iter__(self):
 90        assert self._left is not None
 91        assert self._right is not None
 92        return iter((self._left, self._right))
 93
 94    def __getitem__(self, index: bool) -> T:
 95        assert self._left is not None
 96        assert self._right is not None
 97        if int(index) == 0:
 98            return self._left
 99        else:
100            return self._right
101
102    def apply(
103        self,
104        funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]",
105        *args,
106        **kwargs,
107    ) -> "Split[V]":
108        if not isinstance(funcs, Split):
109            funcs = Split(funcs, funcs)
110
111        def get_arg(side: str, arg):
112            if isinstance(arg, Split):
113                return getattr(arg, side)
114            return arg
115
116        def call(side: str):
117            func = getattr(funcs, side)
118            target = getattr(self, side)
119
120            side_args = [get_arg(side, arg) for arg in args]
121            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
122
123            return func(target, *side_args, **side_kwargs)
124
125        return Split(call("left"), call("right"))
126
127    def __getattr__(self, attr_name: str):
128        if attr_name in self.__dict__:
129            return getattr(self, attr_name)
130
131        def wrapper(*args, **kwargs):
132            return self.apply(
133                Split(
134                    getattr(self.left.__class__, attr_name),
135                    getattr(self.right.__class__, attr_name),
136                ),
137                *args,
138                **kwargs,
139            )
140
141        return wrapper

Container for paired left/right data with convenient manipulation methods.

The Split class is designed for working with table images that span two pages or have distinct left and right sections. It allows you to:

  • Store related data for both sides
  • Apply functions to both sides simultaneously
  • Access attributes/methods of contained objects transparently
Examples:
>>> # Create a split with different parameters for each side
>>> thresholds = Split(0.25, 0.30)
>>>
>>> # Apply a function to both sides
>>> images = Split(left_img, right_img)
>>> processed = images.apply(lambda img: cv2.blur(img, (5, 5)))
>>>
>>> # Use with different parameters per side
>>> results = images.apply(
...     lambda img, k: sauvola_threshold(img, k),
...     k=thresholds  # k.left used for left img, k.right for right
... )
>>>
>>> # Access methods of contained objects directly
>>> templates = Split(template_left, template_right)
>>> widths = templates.cell_widths(0)  # Calls on both templates
Type Parameters:

T: The type of objects stored in left and right

Split(left: Optional[~T] = None, right: Optional[~T] = None)
47    def __init__(self, left: T | None = None, right: T | None = None):
48        """
49        Initialize a Split container.
50
51        Args:
52            left: Data for the left side
53            right: Data for the right side
54
55        Note:
56            Both can initially be None. Use the `append` method or set
57            properties directly to populate.
58        """
59        self._left = left
60        self._right = right

Initialize a Split container.

Arguments:
  • left: Data for the left side
  • right: Data for the right side
Note:

Both can initially be None. Use the append method or set properties directly to populate.

left: ~T
62    @property
63    def left(self) -> T:
64        assert self._left is not None
65        return self._left
right: ~T
71    @property
72    def right(self) -> T:
73        assert self._right is not None
74        return self._right
def append(self, value: ~T):
80    def append(self, value: T):
81        if self._left is None:
82            self._left = value
83        else:
84            self._right = value
def apply( self, funcs: 'Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]', *args, **kwargs) -> Split[~V]:
102    def apply(
103        self,
104        funcs: "Split[Callable[[T, *Any], V]] | Callable[[T, *Any], V]",
105        *args,
106        **kwargs,
107    ) -> "Split[V]":
108        if not isinstance(funcs, Split):
109            funcs = Split(funcs, funcs)
110
111        def get_arg(side: str, arg):
112            if isinstance(arg, Split):
113                return getattr(arg, side)
114            return arg
115
116        def call(side: str):
117            func = getattr(funcs, side)
118            target = getattr(self, side)
119
120            side_args = [get_arg(side, arg) for arg in args]
121            side_kwargs = {k: get_arg(side, v) for k, v in kwargs.items()}
122
123            return func(target, *side_args, **side_kwargs)
124
125        return Split(call("left"), call("right"))
class Taulu:
 41class Taulu:
 42    """
 43    High-level API for table segmentation from images.
 44
 45    Taulu orchestrates header alignment, grid detection, and table segmentation
 46    into a single workflow.
 47
 48    Workflow:
 49        1. Create annotated header images via `Taulu.annotate()`
 50        2. Initialize Taulu with header(s) and parameters
 51        3. Call `segment_table()` to get a `TableGrid` with cell boundaries
 52
 53    For two-page tables, use `Split[T]` to provide different parameters for
 54    left and right sides.
 55
 56    Example:
 57        >>> from taulu import Taulu
 58        >>> Taulu.annotate("table_image.png", "header.png")
 59        >>> taulu = Taulu("header.png")
 60        >>> grid = taulu.segment_table("table_page_01.png")
 61        >>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
 62    """
 63
 64    def __init__(
 65        self,
 66        header_image_path: Splittable[PathLike[str]] | Splittable[str],
 67        cell_height_factor: Splittable[float] | Splittable[list[float]] = [1.0],
 68        header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 69        sauvola_k: Splittable[float] = 0.25,
 70        search_region: Splittable[int] = 60,
 71        distance_penalty: Splittable[float] = 0.4,
 72        cross_width: Splittable[int] = 10,
 73        morph_size: Splittable[int] = 4,
 74        kernel_size: Splittable[int] = 41,
 75        processing_scale: Splittable[float] = 1.0,
 76        skip_astar_threshold: Splittable[float] = 0.2,
 77        min_rows: Splittable[int] = 5,
 78        look_distance: Splittable[int] = 3,
 79        grow_threshold: Splittable[float] = 0.3,
 80        smooth_grid: bool = False,
 81        cuts: Splittable[int] = 3,
 82        cut_fraction: Splittable[float] = 0.5,
 83    ):
 84        """
 85        Args:
 86            header_image_path: Path to header template image(s). Use `Split` for two-page tables.
 87            cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 88            header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
 89            sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
 90            search_region: Corner search area in pixels. Default: 60
 91            distance_penalty: Position penalty weight [0, 1]. Default: 0.4
 92            cross_width: Cross-kernel width matching line thickness. Default: 10
 93            morph_size: Morphological dilation size. Default: 4
 94            kernel_size: Cross-kernel size (odd). Default: 41
 95            processing_scale: Image downscale factor (0, 1]. Default: 1.0
 96            skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
 97            min_rows: Minimum rows before completion. Default: 5
 98            look_distance: Rows to examine for extrapolation. Default: 3
 99            grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
100            smooth_grid: Apply grid smoothing after detection. Default: False
101            cuts: Number of grid cuts during growing. Default: 3
102            cut_fraction: Fraction of points to delete per cut. Default: 0.5
103        """
104        self._processing_scale = processing_scale
105        self._cell_height_factor = cell_height_factor
106        self._smooth = smooth_grid
107
108        if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split):
109            header = Split(Path(header_image_path.left), Path(header_image_path.right))
110
111            if not exists(header.left.with_suffix(".png")) or not exists(
112                header.right.with_suffix(".png")
113            ):
114                raise TauluException(
115                    "The header images you provided do not exist (or they aren't .png files)"
116                )
117
118            if header_anno_path is None:
119                if not exists(header.left.with_suffix(".json")) or not exists(
120                    header.right.with_suffix(".json")
121                ):
122                    raise TauluException(
123                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
124                    )
125
126                template_left = HeaderTemplate.from_saved(
127                    header.left.with_suffix(".json")
128                )
129                template_right = HeaderTemplate.from_saved(
130                    header.right.with_suffix(".json")
131                )
132
133            else:
134                if not exists(header_anno_path.left) or not exists(
135                    header_anno_path.right
136                ):
137                    raise TauluException(
138                        "The header annotation files you provided do not exist (or they aren't .json files)"
139                    )
140
141                template_left = HeaderTemplate.from_saved(header_anno_path.left)
142                template_right = HeaderTemplate.from_saved(header_anno_path.right)
143
144            self._header = Split(
145                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
146            )
147
148            self._aligner = Split(
149                HeaderAligner(
150                    self._header.left, scale=get_param(self._processing_scale, "left")
151                ),
152                HeaderAligner(
153                    self._header.right, scale=get_param(self._processing_scale, "right")
154                ),
155            )
156
157            self._template = Split(template_left, template_right)
158
159            self._cell_heights = Split(
160                self._template.left.cell_heights(get_param(cell_height_factor, "left")),
161                self._template.right.cell_heights(
162                    get_param(cell_height_factor, "right")
163                ),
164            )
165
166            # Create GridDetector for left and right with potentially different parameters
167            self._grid_detector = Split(
168                GridDetector(
169                    kernel_size=get_param(kernel_size, "left"),
170                    cross_width=get_param(cross_width, "left"),
171                    morph_size=get_param(morph_size, "left"),
172                    search_region=get_param(search_region, "left"),
173                    sauvola_k=get_param(sauvola_k, "left"),
174                    distance_penalty=get_param(distance_penalty, "left"),
175                    scale=get_param(self._processing_scale, "left"),
176                    skip_astar_threshold=get_param(skip_astar_threshold, "left"),
177                    min_rows=get_param(min_rows, "left"),
178                    look_distance=get_param(look_distance, "left"),
179                    grow_threshold=get_param(grow_threshold, "left"),
180                    cuts=get_param(cuts, "left"),
181                    cut_fraction=get_param(cut_fraction, "left"),
182                ),
183                GridDetector(
184                    kernel_size=get_param(kernel_size, "right"),
185                    cross_width=get_param(cross_width, "right"),
186                    morph_size=get_param(morph_size, "right"),
187                    search_region=get_param(search_region, "right"),
188                    sauvola_k=get_param(sauvola_k, "right"),
189                    distance_penalty=get_param(distance_penalty, "right"),
190                    scale=get_param(self._processing_scale, "right"),
191                    skip_astar_threshold=get_param(skip_astar_threshold, "right"),
192                    min_rows=get_param(min_rows, "right"),
193                    look_distance=get_param(look_distance, "right"),
194                    grow_threshold=get_param(grow_threshold, "right"),
195                    cuts=get_param(cuts, "right"),
196                    cut_fraction=get_param(cut_fraction, "right"),
197                ),
198            )
199
200        else:
201            header_image_path = Path(header_image_path)
202            self._header = cv2.imread(os.fspath(header_image_path))
203            self._aligner = HeaderAligner(self._header)
204            self._template = HeaderTemplate.from_saved(
205                header_image_path.with_suffix(".json")
206            )
207
208            # For single header, parameters should not be Split objects
209            if any(
210                isinstance(param, Split)
211                for param in [
212                    sauvola_k,
213                    search_region,
214                    distance_penalty,
215                    cross_width,
216                    morph_size,
217                    kernel_size,
218                    processing_scale,
219                    min_rows,
220                    look_distance,
221                    grow_threshold,
222                    cell_height_factor,
223                    cuts,
224                    cut_fraction,
225                ]
226            ):
227                raise TauluException(
228                    "Split parameters can only be used with split headers (tuple header_path)"
229                )
230
231            self._cell_heights = self._template.cell_heights(self._cell_height_factor)
232
233            self._grid_detector = GridDetector(
234                kernel_size=kernel_size,  # ty: ignore
235                cross_width=cross_width,  # ty: ignore
236                morph_size=morph_size,  # ty: ignore
237                search_region=search_region,  # ty: ignore
238                sauvola_k=sauvola_k,  # ty: ignore
239                distance_penalty=distance_penalty,  # ty: ignore
240                scale=self._processing_scale,  # ty: ignore
241                skip_astar_threshold=skip_astar_threshold,  # ty: ignore
242                min_rows=min_rows,  # ty: ignore
243                look_distance=look_distance,  # ty: ignore
244                grow_threshold=grow_threshold,  # ty: ignore
245                cuts=cuts,
246                cut_fraction=cut_fraction,
247            )
248
249    @staticmethod
250    def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str):
251        """
252        Interactive tool to create header annotations for table segmentation.
253
254        This method guides you through a two-step annotation process:
255
256        1. **Crop the header**: Click four corners to define the header region
257        2. **Annotate lines**: Click pairs of points to define each vertical and
258           horizontal line in the header
259
260        The annotations are saved as:
261        - A cropped header image (.png) at `output_path`
262        - A JSON file (.json) containing line coordinates
263
264        ## Annotation Guidelines
265
266        **Which lines to annotate:**
267        - All vertical lines that extend into the table body (column separators)
268        - The top horizontal line of the header
269        - The bottom horizontal line of the header (top of data rows)
270
271        **Order doesn't matter** - annotate lines in any order that's convenient.
272
273        **To annotate a line:**
274        1. Click once at one endpoint
275        2. Click again at the other endpoint
276        3. A green line appears showing your annotation
277
278        **To undo:**
279        - Right-click anywhere to remove the last line you drew
280
281        **When finished:**
282        - Press 'n' to save and exit
283        - Press 'q' to quit without saving
284
285        Args:
286            image_path (PathLike[str] | str): Path to a table image containing
287                a clear view of the header. This can be a full table image.
288            output_path (PathLike[str] | str): Where to save the cropped header
289                image. The annotation JSON will be saved with the same name but
290                .json extension.
291
292        Raises:
293            TauluException: If image_path doesn't exist or output_path is a directory
294
295        Examples:
296            Annotate a single header:
297
298            >>> from taulu import Taulu
299            >>> Taulu.annotate("scan_page_01.png", "header.png")
300            # Interactive window opens
301            # After annotation: creates header.png and header.json
302
303            Annotate left and right headers for a split table:
304
305            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
306            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
307            # Creates header_left.{png,json} and header_right.{png,json}
308
309        Notes:
310            - The header image doesn't need to be perfectly cropped initially -
311              the tool will help you crop it precisely
312            - Annotation accuracy is important: misaligned lines will cause
313              segmentation errors
314            - You can re-run this method to update annotations if needed
315        """
316
317        if not exists(image_path):
318            raise TauluException(f"Image path {image_path} does not exist")
319
320        if os.path.isdir(output_path):
321            raise TauluException("Output path should be a file")
322
323        output_path = Path(output_path)
324
325        template = HeaderTemplate.annotate_image(
326            os.fspath(image_path), crop=output_path.with_suffix(".png")
327        )
328
329        template.save(output_path.with_suffix(".json"))
330
331    def segment_table(
332        self,
333        image: MatLike | PathLike[str] | str,
334        filtered: Optional[MatLike | PathLike[str] | str] = None,
335        debug_view: bool = False,
336    ) -> TableGrid:
337        """
338        Segment a table image into a grid of cells.
339
340        Orchestrates header alignment, grid detection, corner growing, and
341        extrapolation to produce a complete grid structure.
342
343        Args:
344            image: Table image to segment (file path or numpy array).
345            filtered: Optional pre-filtered binary image for corner detection.
346                If provided, binarization parameters are ignored.
347            debug_view: Show intermediate processing steps. Press 'n' to advance,
348                'q' to quit. Default: False
349
350        Returns:
351            TableGrid: Grid structure with methods for cell access (`crop_cell`,
352                `cell_polygon`), visualization (`show_cells`), and persistence
353                (`save`, `from_saved`).
354
355        Raises:
356            TauluException: If image cannot be loaded or grid detection fails.
357        """
358
359        if not isinstance(image, MatLike):
360            image = cast(str | PathLike[str], image)
361            image = cv2.imread(os.fspath(image))
362
363        now = perf_counter()
364        h = self._aligner.align(image, visual=debug_view)
365        align_time = perf_counter() - now
366        logger.info(f"Header alignment took {align_time:.2f} seconds")
367
368        # find the starting point for the table grid algorithm
369
370        def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray):
371            top_row = []
372            for x in range(template.cols + 1):
373                on_template = template.intersection((1, x))
374                on_template = (int(on_template[0]), int(on_template[1]))
375
376                on_img = aligner.template_to_img(h, on_template)
377
378                top_row.append(on_img)
379
380            return top_row
381
382        if isinstance(self._aligner, Split):
383            top_row = Split(
384                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
385                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
386            )
387        else:
388            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
389
390        now = perf_counter()
391        table = self._grid_detector.find_table_points(
392            image,  # ty:ignore
393            top_row,  # ty:ignore
394            self._template.cell_widths(0),
395            self._cell_heights,  # ty:ignore
396            visual=debug_view,
397            filtered=filtered,  # ty:ignore
398            smooth=self._smooth,
399        )
400        grid_time = perf_counter() - now
401        logger.info(f"Grid detection took {grid_time:.2f} seconds")
402
403        if isinstance(table, Split):
404            table = TableGrid.from_split(table, (0, 0))  # ty: ignore
405
406        return table

High-level API for table segmentation from images.

Taulu orchestrates header alignment, grid detection, and table segmentation into a single workflow.

Workflow:
  1. Create annotated header images via Taulu.annotate()
  2. Initialize Taulu with header(s) and parameters
  3. Call segment_table() to get a TableGrid with cell boundaries

For two-page tables, use Split[T] to provide different parameters for left and right sides.

Example:
>>> from taulu import Taulu
>>> Taulu.annotate("table_image.png", "header.png")
>>> taulu = Taulu("header.png")
>>> grid = taulu.segment_table("table_page_01.png")
>>> cell_image = grid.crop_cell(cv2.imread("table_page_01.png"), (0, 0))
Taulu( header_image_path: Splittable[os.PathLike[str]] | Splittable[str], cell_height_factor: Splittable[float] | Splittable[list[float]] = [1.0], header_anno_path: Splittable[os.PathLike[str]] | Splittable[str] | None = None, sauvola_k: Splittable[float] = 0.25, search_region: Splittable[int] = 60, distance_penalty: Splittable[float] = 0.4, cross_width: Splittable[int] = 10, morph_size: Splittable[int] = 4, kernel_size: Splittable[int] = 41, processing_scale: Splittable[float] = 1.0, skip_astar_threshold: Splittable[float] = 0.2, min_rows: Splittable[int] = 5, look_distance: Splittable[int] = 3, grow_threshold: Splittable[float] = 0.3, smooth_grid: bool = False, cuts: Splittable[int] = 3, cut_fraction: Splittable[float] = 0.5)
 64    def __init__(
 65        self,
 66        header_image_path: Splittable[PathLike[str]] | Splittable[str],
 67        cell_height_factor: Splittable[float] | Splittable[list[float]] = [1.0],
 68        header_anno_path: Splittable[PathLike[str]] | Splittable[str] | None = None,
 69        sauvola_k: Splittable[float] = 0.25,
 70        search_region: Splittable[int] = 60,
 71        distance_penalty: Splittable[float] = 0.4,
 72        cross_width: Splittable[int] = 10,
 73        morph_size: Splittable[int] = 4,
 74        kernel_size: Splittable[int] = 41,
 75        processing_scale: Splittable[float] = 1.0,
 76        skip_astar_threshold: Splittable[float] = 0.2,
 77        min_rows: Splittable[int] = 5,
 78        look_distance: Splittable[int] = 3,
 79        grow_threshold: Splittable[float] = 0.3,
 80        smooth_grid: bool = False,
 81        cuts: Splittable[int] = 3,
 82        cut_fraction: Splittable[float] = 0.5,
 83    ):
 84        """
 85        Args:
 86            header_image_path: Path to header template image(s). Use `Split` for two-page tables.
 87            cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
 88            header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
 89            sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
 90            search_region: Corner search area in pixels. Default: 60
 91            distance_penalty: Position penalty weight [0, 1]. Default: 0.4
 92            cross_width: Cross-kernel width matching line thickness. Default: 10
 93            morph_size: Morphological dilation size. Default: 4
 94            kernel_size: Cross-kernel size (odd). Default: 41
 95            processing_scale: Image downscale factor (0, 1]. Default: 1.0
 96            skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
 97            min_rows: Minimum rows before completion. Default: 5
 98            look_distance: Rows to examine for extrapolation. Default: 3
 99            grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
100            smooth_grid: Apply grid smoothing after detection. Default: False
101            cuts: Number of grid cuts during growing. Default: 3
102            cut_fraction: Fraction of points to delete per cut. Default: 0.5
103        """
104        self._processing_scale = processing_scale
105        self._cell_height_factor = cell_height_factor
106        self._smooth = smooth_grid
107
108        if isinstance(header_image_path, Split) or isinstance(header_anno_path, Split):
109            header = Split(Path(header_image_path.left), Path(header_image_path.right))
110
111            if not exists(header.left.with_suffix(".png")) or not exists(
112                header.right.with_suffix(".png")
113            ):
114                raise TauluException(
115                    "The header images you provided do not exist (or they aren't .png files)"
116                )
117
118            if header_anno_path is None:
119                if not exists(header.left.with_suffix(".json")) or not exists(
120                    header.right.with_suffix(".json")
121                ):
122                    raise TauluException(
123                        "You need to annotate the headers of your table first\n\nsee the Taulu.annotate method"
124                    )
125
126                template_left = HeaderTemplate.from_saved(
127                    header.left.with_suffix(".json")
128                )
129                template_right = HeaderTemplate.from_saved(
130                    header.right.with_suffix(".json")
131                )
132
133            else:
134                if not exists(header_anno_path.left) or not exists(
135                    header_anno_path.right
136                ):
137                    raise TauluException(
138                        "The header annotation files you provided do not exist (or they aren't .json files)"
139                    )
140
141                template_left = HeaderTemplate.from_saved(header_anno_path.left)
142                template_right = HeaderTemplate.from_saved(header_anno_path.right)
143
144            self._header = Split(
145                cv2.imread(os.fspath(header.left)), cv2.imread(os.fspath(header.right))
146            )
147
148            self._aligner = Split(
149                HeaderAligner(
150                    self._header.left, scale=get_param(self._processing_scale, "left")
151                ),
152                HeaderAligner(
153                    self._header.right, scale=get_param(self._processing_scale, "right")
154                ),
155            )
156
157            self._template = Split(template_left, template_right)
158
159            self._cell_heights = Split(
160                self._template.left.cell_heights(get_param(cell_height_factor, "left")),
161                self._template.right.cell_heights(
162                    get_param(cell_height_factor, "right")
163                ),
164            )
165
166            # Create GridDetector for left and right with potentially different parameters
167            self._grid_detector = Split(
168                GridDetector(
169                    kernel_size=get_param(kernel_size, "left"),
170                    cross_width=get_param(cross_width, "left"),
171                    morph_size=get_param(morph_size, "left"),
172                    search_region=get_param(search_region, "left"),
173                    sauvola_k=get_param(sauvola_k, "left"),
174                    distance_penalty=get_param(distance_penalty, "left"),
175                    scale=get_param(self._processing_scale, "left"),
176                    skip_astar_threshold=get_param(skip_astar_threshold, "left"),
177                    min_rows=get_param(min_rows, "left"),
178                    look_distance=get_param(look_distance, "left"),
179                    grow_threshold=get_param(grow_threshold, "left"),
180                    cuts=get_param(cuts, "left"),
181                    cut_fraction=get_param(cut_fraction, "left"),
182                ),
183                GridDetector(
184                    kernel_size=get_param(kernel_size, "right"),
185                    cross_width=get_param(cross_width, "right"),
186                    morph_size=get_param(morph_size, "right"),
187                    search_region=get_param(search_region, "right"),
188                    sauvola_k=get_param(sauvola_k, "right"),
189                    distance_penalty=get_param(distance_penalty, "right"),
190                    scale=get_param(self._processing_scale, "right"),
191                    skip_astar_threshold=get_param(skip_astar_threshold, "right"),
192                    min_rows=get_param(min_rows, "right"),
193                    look_distance=get_param(look_distance, "right"),
194                    grow_threshold=get_param(grow_threshold, "right"),
195                    cuts=get_param(cuts, "right"),
196                    cut_fraction=get_param(cut_fraction, "right"),
197                ),
198            )
199
200        else:
201            header_image_path = Path(header_image_path)
202            self._header = cv2.imread(os.fspath(header_image_path))
203            self._aligner = HeaderAligner(self._header)
204            self._template = HeaderTemplate.from_saved(
205                header_image_path.with_suffix(".json")
206            )
207
208            # For single header, parameters should not be Split objects
209            if any(
210                isinstance(param, Split)
211                for param in [
212                    sauvola_k,
213                    search_region,
214                    distance_penalty,
215                    cross_width,
216                    morph_size,
217                    kernel_size,
218                    processing_scale,
219                    min_rows,
220                    look_distance,
221                    grow_threshold,
222                    cell_height_factor,
223                    cuts,
224                    cut_fraction,
225                ]
226            ):
227                raise TauluException(
228                    "Split parameters can only be used with split headers (tuple header_path)"
229                )
230
231            self._cell_heights = self._template.cell_heights(self._cell_height_factor)
232
233            self._grid_detector = GridDetector(
234                kernel_size=kernel_size,  # ty: ignore
235                cross_width=cross_width,  # ty: ignore
236                morph_size=morph_size,  # ty: ignore
237                search_region=search_region,  # ty: ignore
238                sauvola_k=sauvola_k,  # ty: ignore
239                distance_penalty=distance_penalty,  # ty: ignore
240                scale=self._processing_scale,  # ty: ignore
241                skip_astar_threshold=skip_astar_threshold,  # ty: ignore
242                min_rows=min_rows,  # ty: ignore
243                look_distance=look_distance,  # ty: ignore
244                grow_threshold=grow_threshold,  # ty: ignore
245                cuts=cuts,
246                cut_fraction=cut_fraction,
247            )
Arguments:
  • header_image_path: Path to header template image(s). Use Split for two-page tables.
  • cell_height_factor: Row height relative to header (e.g., 0.8 for 80%). Default: [1.0]
  • header_anno_path: Explicit annotation JSON path. Default: inferred from image path.
  • sauvola_k: Binarization threshold (0.0-1.0). Higher = less noise. Default: 0.25
  • search_region: Corner search area in pixels. Default: 60
  • distance_penalty: Position penalty weight [0, 1]. Default: 0.4
  • cross_width: Cross-kernel width matching line thickness. Default: 10
  • morph_size: Morphological dilation size. Default: 4
  • kernel_size: Cross-kernel size (odd). Default: 41
  • processing_scale: Image downscale factor (0, 1]. Default: 1.0
  • skip_astar_threshold: Confidence to skip A* pathfinding. Default: 0.2
  • min_rows: Minimum rows before completion. Default: 5
  • look_distance: Rows to examine for extrapolation. Default: 3
  • grow_threshold: Corner acceptance confidence [0, 1]. Default: 0.3
  • smooth_grid: Apply grid smoothing after detection. Default: False
  • cuts: Number of grid cuts during growing. Default: 3
  • cut_fraction: Fraction of points to delete per cut. Default: 0.5
@staticmethod
def annotate( image_path: os.PathLike[str] | str, output_path: os.PathLike[str] | str):
249    @staticmethod
250    def annotate(image_path: PathLike[str] | str, output_path: PathLike[str] | str):
251        """
252        Interactive tool to create header annotations for table segmentation.
253
254        This method guides you through a two-step annotation process:
255
256        1. **Crop the header**: Click four corners to define the header region
257        2. **Annotate lines**: Click pairs of points to define each vertical and
258           horizontal line in the header
259
260        The annotations are saved as:
261        - A cropped header image (.png) at `output_path`
262        - A JSON file (.json) containing line coordinates
263
264        ## Annotation Guidelines
265
266        **Which lines to annotate:**
267        - All vertical lines that extend into the table body (column separators)
268        - The top horizontal line of the header
269        - The bottom horizontal line of the header (top of data rows)
270
271        **Order doesn't matter** - annotate lines in any order that's convenient.
272
273        **To annotate a line:**
274        1. Click once at one endpoint
275        2. Click again at the other endpoint
276        3. A green line appears showing your annotation
277
278        **To undo:**
279        - Right-click anywhere to remove the last line you drew
280
281        **When finished:**
282        - Press 'n' to save and exit
283        - Press 'q' to quit without saving
284
285        Args:
286            image_path (PathLike[str] | str): Path to a table image containing
287                a clear view of the header. This can be a full table image.
288            output_path (PathLike[str] | str): Where to save the cropped header
289                image. The annotation JSON will be saved with the same name but
290                .json extension.
291
292        Raises:
293            TauluException: If image_path doesn't exist or output_path is a directory
294
295        Examples:
296            Annotate a single header:
297
298            >>> from taulu import Taulu
299            >>> Taulu.annotate("scan_page_01.png", "header.png")
300            # Interactive window opens
301            # After annotation: creates header.png and header.json
302
303            Annotate left and right headers for a split table:
304
305            >>> Taulu.annotate("scan_page_01.png", "header_left.png")
306            >>> Taulu.annotate("scan_page_01.png", "header_right.png")
307            # Creates header_left.{png,json} and header_right.{png,json}
308
309        Notes:
310            - The header image doesn't need to be perfectly cropped initially -
311              the tool will help you crop it precisely
312            - Annotation accuracy is important: misaligned lines will cause
313              segmentation errors
314            - You can re-run this method to update annotations if needed
315        """
316
317        if not exists(image_path):
318            raise TauluException(f"Image path {image_path} does not exist")
319
320        if os.path.isdir(output_path):
321            raise TauluException("Output path should be a file")
322
323        output_path = Path(output_path)
324
325        template = HeaderTemplate.annotate_image(
326            os.fspath(image_path), crop=output_path.with_suffix(".png")
327        )
328
329        template.save(output_path.with_suffix(".json"))

Interactive tool to create header annotations for table segmentation.

This method guides you through a two-step annotation process:

  1. Crop the header: Click four corners to define the header region
  2. Annotate lines: Click pairs of points to define each vertical and horizontal line in the header

The annotations are saved as:

  • A cropped header image (.png) at output_path
  • A JSON file (.json) containing line coordinates

Annotation Guidelines

Which lines to annotate:

  • All vertical lines that extend into the table body (column separators)
  • The top horizontal line of the header
  • The bottom horizontal line of the header (top of data rows)

Order doesn't matter - annotate lines in any order that's convenient.

To annotate a line:

  1. Click once at one endpoint
  2. Click again at the other endpoint
  3. A green line appears showing your annotation

To undo:

  • Right-click anywhere to remove the last line you drew

When finished:

  • Press 'n' to save and exit
  • Press 'q' to quit without saving
Arguments:
  • image_path (PathLike[str] | str): Path to a table image containing a clear view of the header. This can be a full table image.
  • output_path (PathLike[str] | str): Where to save the cropped header image. The annotation JSON will be saved with the same name but .json extension.
Raises:
  • TauluException: If image_path doesn't exist or output_path is a directory
Examples:

Annotate a single header:

>>> from taulu import Taulu
>>> Taulu.annotate("scan_page_01.png", "header.png")
<h1 id="interactive-window-opens">Interactive window opens</h1>

After annotation: creates header.png and header.json

Annotate left and right headers for a split table:

>>> Taulu.annotate("scan_page_01.png", "header_left.png")
>>> Taulu.annotate("scan_page_01.png", "header_right.png")
<h1 id="creates-header_leftpngjson-and-header_rightpngjson">Creates header_left.{png,json} and header_right.{png,json}</h1>
Notes:
  • The header image doesn't need to be perfectly cropped initially - the tool will help you crop it precisely
  • Annotation accuracy is important: misaligned lines will cause segmentation errors
  • You can re-run this method to update annotations if needed
def segment_table( self, image: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str], filtered: Union[cv2.Mat, numpy.ndarray, os.PathLike[str], str, NoneType] = None, debug_view: bool = False) -> TableGrid:
331    def segment_table(
332        self,
333        image: MatLike | PathLike[str] | str,
334        filtered: Optional[MatLike | PathLike[str] | str] = None,
335        debug_view: bool = False,
336    ) -> TableGrid:
337        """
338        Segment a table image into a grid of cells.
339
340        Orchestrates header alignment, grid detection, corner growing, and
341        extrapolation to produce a complete grid structure.
342
343        Args:
344            image: Table image to segment (file path or numpy array).
345            filtered: Optional pre-filtered binary image for corner detection.
346                If provided, binarization parameters are ignored.
347            debug_view: Show intermediate processing steps. Press 'n' to advance,
348                'q' to quit. Default: False
349
350        Returns:
351            TableGrid: Grid structure with methods for cell access (`crop_cell`,
352                `cell_polygon`), visualization (`show_cells`), and persistence
353                (`save`, `from_saved`).
354
355        Raises:
356            TauluException: If image cannot be loaded or grid detection fails.
357        """
358
359        if not isinstance(image, MatLike):
360            image = cast(str | PathLike[str], image)
361            image = cv2.imread(os.fspath(image))
362
363        now = perf_counter()
364        h = self._aligner.align(image, visual=debug_view)
365        align_time = perf_counter() - now
366        logger.info(f"Header alignment took {align_time:.2f} seconds")
367
368        # find the starting point for the table grid algorithm
369
370        def make_top_row(template: HeaderTemplate, aligner: HeaderAligner, h: NDArray):
371            top_row = []
372            for x in range(template.cols + 1):
373                on_template = template.intersection((1, x))
374                on_template = (int(on_template[0]), int(on_template[1]))
375
376                on_img = aligner.template_to_img(h, on_template)
377
378                top_row.append(on_img)
379
380            return top_row
381
382        if isinstance(self._aligner, Split):
383            top_row = Split(
384                make_top_row(self._template.left, self._aligner.left, h.left),  # ty:ignore
385                make_top_row(self._template.right, self._aligner.right, h.right),  # ty:ignore
386            )
387        else:
388            top_row = make_top_row(self._template, self._aligner, h)  # ty:ignore
389
390        now = perf_counter()
391        table = self._grid_detector.find_table_points(
392            image,  # ty:ignore
393            top_row,  # ty:ignore
394            self._template.cell_widths(0),
395            self._cell_heights,  # ty:ignore
396            visual=debug_view,
397            filtered=filtered,  # ty:ignore
398            smooth=self._smooth,
399        )
400        grid_time = perf_counter() - now
401        logger.info(f"Grid detection took {grid_time:.2f} seconds")
402
403        if isinstance(table, Split):
404            table = TableGrid.from_split(table, (0, 0))  # ty: ignore
405
406        return table

Segment a table image into a grid of cells.

Orchestrates header alignment, grid detection, corner growing, and extrapolation to produce a complete grid structure.

Arguments:
  • image: Table image to segment (file path or numpy array).
  • filtered: Optional pre-filtered binary image for corner detection. If provided, binarization parameters are ignored.
  • debug_view: Show intermediate processing steps. Press 'n' to advance, 'q' to quit. Default: False
Returns:

TableGrid: Grid structure with methods for cell access (crop_cell, cell_polygon), visualization (show_cells), and persistence (save, from_saved).

Raises:
  • TauluException: If image cannot be loaded or grid detection fails.