Skip to content

API Reference

This page is generated from Python docstrings.

Projector

cameralib.projector.Projector

A projector to perform camera coordinates operations on ODX datasets

Parameters:

Name Type Description Default
project_path str

Path to ODX project

required
z_sample_window int

Size of the window to use when sampling elevation values

1
z_sample_strategy str

Strategy to use when sampling elevation values. Can be one of: ['minimum', 'maximum', 'average', 'median']

'median'
z_sample_target str

Elevation raster to use for sampling elevation. One of: ['dsm', 'dtm']

'dsm'
z_fill_nodata bool

Whether to fill nodata cells with nearest neighbor cell values. This gives a wider coverage for queries, but increases the initialization time.

True
raycast_resolution_multiplier float

Value that affects the ray sampling resolution. Lower values can lead to slightly more precise results, but increase processing time.

0.7071
dem_path str | None

Manually set a path to a valid GeoTIFF DEM for sampling Z values instead of using the default.

None
Source code in cameralib/projector.py
class Projector:
    """A projector to perform camera coordinates operations on ODX datasets

    Args:
        project_path (str): Path to ODX project
        z_sample_window (int): Size of the window to use when sampling elevation values
        z_sample_strategy (str): Strategy to use when sampling elevation values. Can be one of: ['minimum', 'maximum', 'average', 'median']
        z_sample_target (str): Elevation raster to use for sampling elevation. One of: ['dsm', 'dtm']
        z_fill_nodata (bool): Whether to fill nodata cells with nearest neighbor cell values. This gives a wider coverage for queries, but increases the initialization time.
        raycast_resolution_multiplier (float): Value that affects the ray sampling resolution. Lower values can lead to slightly more precise results, but increase processing time.
        dem_path (str | None): Manually set a path to a valid GeoTIFF DEM for sampling Z values instead of using the default.
    """
    def __init__(self, project_path: str, z_sample_window: int = 1, z_sample_strategy: str = 'median', z_sample_target: str = 'dsm', z_fill_nodata: bool = True, raycast_resolution_multiplier: float = 0.7071, dem_path: str | None = None) -> None:
        if not os.path.isdir(project_path):
            raise IOError(f"{project_path} is not a valid path to an ODX project")

        self.project_path = project_path
        self.z_sample_window = z_sample_window
        self.z_sample_strategy = z_sample_strategy
        self.z_fill_nodata = z_fill_nodata
        self.raycast_resolution_multiplier = raycast_resolution_multiplier

        if self.z_sample_window % 2 == 0 or self.z_sample_window <= 0:
            raise InvalidArgError("z_sample_window must be an odd number > 0")

        self.dsm_path = os.path.abspath(os.path.join(project_path, "odm_dem", "dsm.tif"))
        self.dtm_path = os.path.abspath(os.path.join(project_path, "odm_dem", "dtm.tif"))

        if dem_path is not None:
            self.dem_path = dem_path
        else:
            if z_sample_target == 'dsm':
                self.dem_path = self.dsm_path
            elif z_sample_target == 'dtm':
                self.dem_path = self.dtm_path
            else:
                raise InvalidArgError(f"Invalid z_sample_target {z_sample_target}")

        if not os.path.isfile(self.dem_path):
            raise InvalidArgError(f"{self.dem_path} does not exist. A surface model is required.")
        with rasterio.open(self.dem_path, "r") as r:
            self.dem_nodata = r.nodata

        self.shots_path = os.path.abspath(os.path.join(project_path, "odm_report", "shots.geojson"))
        self.cameras_path = os.path.abspath(os.path.join(project_path, "cameras.json"))

        self.shots, self.shots_map = load_shots(self.shots_path)
        self.cameras = load_cameras(self.cameras_path)

        self.raster = None
        self.dem_data = None
        self.min_z = None

    def _read_dem(self) -> None:
        if self.raster is None:
            self.raster = rasterio.open(self.dem_path, 'r')
            self.dem_data = self.raster.read(1)
            valid_mask = self.dem_data!=self.raster.nodata
            self.min_z = self.dem_data[valid_mask].min()
            if self.z_fill_nodata:
                indices = ndimage.distance_transform_edt(~valid_mask, 
                                                    return_distances=False, 
                                                    return_indices=True)
                self.dem_data = self.dem_data[tuple(indices)]

    def __del__(self) -> None:
        if self.raster is not None:
            self.raster.close()
            self.raster = None

    def cam2world(self, image: str, coordinates: list[tuple[float, float]] | np.ndarray, normalized: bool = False) -> list[tuple[float, float, float] | None]:
        """Project 2D pixel coordinates in camera space to geographic coordinates

        Args:
            image (str): Image filename.
            coordinates (list[tuple[float, float]] | numpy.ndarray): Pixel coordinates as (x, y) pairs.
            normalized (bool): Whether the input coordinates are normalized to [0..1].

        Returns:
            Geographic coordinates (list[tuple[float, float, float] | None]): Geographic coordinates as (lat, lon, elevation) for each coordinate pair. Returns None for rays that do not intersect the DEM.
        """
        if not image in self.shots_map:
            raise InvalidArgError(f"Image {image} not found in {self.shots_path}")

        s = self.shots[self.shots_map[image]]
        cam_id = s['cam_id'].replace("v2 ", "")
        cam = self.cameras[cam_id]

        self._read_dem()

        r = s['rotation']
        img_w = s['width']
        img_h = s['height']
        coordinates = np.asarray(coordinates, dtype=float)
        if normalized:
            coordinates *= np.array([img_w, img_h])

        t = s['translation'].reshape(3, 1)
        resolution_step = abs(self.raster.transform[0]) * self.raycast_resolution_multiplier

        rays_cam = cam.pixel_bearing_many(np.array(coordinates)).T
        rays_world = np.matmul(np.linalg.inv(r), rays_cam).T
        results = []

        for ray_world in rays_world:
            ray_world = ray_world.reshape((3, 1))

            if float(ray_world[2]) > 0:
                logger.warning(f"Ray from {image} pointing up, cannot raycast")
                continue

            step = 0 # meters
            prev_pt = None
            result = None

            while True:
                ray_pt = (ray_world * step + t).ravel()
                step += resolution_step

                # No hits
                if ray_pt[2] < self.min_z:
                    break

                y, x = self.raster.index(ray_pt[0], ray_pt[1], op=round)

                if x >= 0 and x < self.dem_data.shape[1] and y >= 0 and y < self.dem_data.shape[0]:
                    pix_z = raster_sample_z(self.dem_data, self.raster.nodata, y, x, window=self.z_sample_window, strategy=self.z_sample_strategy)

                    if pix_z == self.raster.nodata:
                        continue

                    if prev_pt is None:
                        prev_pt = ray_pt

                    if ray_pt[2] <= pix_z:
                        # Hit
                        midpoint = (prev_pt + ray_pt) / 2.0
                        lat, lon = get_latlon(self.raster, midpoint[0], midpoint[1])
                        result = (lat,lon,pix_z)
                        break

                    prev_pt = ray_pt

            results.append(result)

        return results


    def cam2geoJSON(self, image: str, coordinates: list[tuple[float, float]] | np.ndarray, properties: dict[str, Any] | None = None, normalized: bool = False) -> dict[str, Any]:
        """Project 2D pixel coordinates in camera space to geographic coordinates and output the result
        as GeoJSON. A single coordinate results in a Point, two coordinates into a LineString and more than two into a Polygon.

        Args:
            image (str): Image filename.
            coordinates (list[tuple[float, float]] | numpy.ndarray): Pixel coordinates as (x, y) pairs.
            properties (dict[str, Any] | None): Optional GeoJSON feature properties.
            normalized (bool): Whether the input coordinates are normalized to [0..1].

        Returns:
            dict[str, Any]: GeoJSON FeatureCollection.
        """
        if properties is None:
            properties = {}

        results = self.cam2world(image, coordinates, normalized)

        if 'image' not in properties:
            properties['image'] = image

        if len(results) == 1:
            geom = 'Point'
            lat,lon,z = results[0]
            coords = [lon,lat,z]
        elif len(results) == 2:
            geom = 'LineString'
            coords = list([lon,lat,z] for lat,lon,z in results)
        else:
            geom = 'Polygon'
            coords = [list([lon,lat,z] for lat,lon,z in results)]
            coords[0].append(coords[0][0])

        j = {
            'type': 'FeatureCollection',
            'features':[{
                'type': 'Feature',
                'properties': properties,
                'geometry': {
                    'coordinates': coords,
                    'type': geom
                }
            }]
        }

        return j


    def world2cams(self, longitude: float, latitude: float, normalized: bool = False) -> list[dict[str, str | float]]:
        """Find which cameras in the reconstruction see a particular location.

        Args:
            longitude (float): Longitude
            latitude (float): Latitude
            normalized (bool): Whether to normalize pixel coordinates by the image dimension. By default pixel coordinates are in range [0..image width], [0..image height].

        Returns:
            Cameras (list[dict[str, str | float]]): A list of camera dictionaries containing filename, x, and y keys.
        """
        self._read_dem()
        Xa, Ya, Za = get_utm_xyz(self.raster, self.dem_data, self.dem_nodata, longitude, latitude, 
                                    z_sample_window=self.z_sample_window,
                                    z_sample_strategy=self.z_sample_strategy)
        if Za == self.dem_nodata:
            return []

        images = []
        for s in self.shots:
            r = s['rotation']
            a1 = r[0][0]
            b1 = r[0][1]
            c1 = r[0][2]
            a2 = r[1][0]
            b2 = r[1][1]
            c2 = r[1][2]
            a3 = r[2][0]
            b3 = r[2][1]
            c3 = r[2][2]

            cam_id = s['cam_id'].replace("v2 ", "")
            focal = s['focal']
            img_w = s['width']
            img_h = s['height']
            Xs, Ys, Zs = s['translation']

            half_img_w = (img_w - 1) / 2.0
            half_img_h = (img_h - 1) / 2.0
            f = focal * max(img_w, img_h)

            dx = (Xa - Xs)
            dy = (Ya - Ys)
            dz = (Za - Zs)

            den = a3 * dx + b3 * dy + c3 * dz
            x = half_img_w - (f * (a1 * dx + b1 * dy + c1 * dz) / den)
            y = half_img_h - (f * (a2 * dx + b2 * dy + c2 * dz) / den)

            if x >= 0 and y >= 0 and x <= img_w - 1 and y <= img_h - 1:
                valid = True # assumed
                result = {
                    'filename': s['filename']
                }
                if cam_id is not None and cam_id in self.cameras:
                    cam = self.cameras[cam_id]

                    # Back-undistort to find exact UV coordinates

                    xi = img_w - 1 - int(round(x))
                    yi = img_h - 1 - int(round(y))
                    xu, yu = map_pixels(cam.undistorted(), cam, np.array([[xi, yi]])).ravel()

                    valid = xu >= 0 and xu <= img_w and yu >= 0 and yu <= img_h

                    result['x'] = float(xu)
                    result['y'] = float(yu)
                    if normalized:
                        result['x'] /= img_w
                        result['y'] /= img_h

                if valid:
                    images.append(result)

        return images

cam2world(image, coordinates, normalized=False)

Project 2D pixel coordinates in camera space to geographic coordinates

Parameters:

Name Type Description Default
image str

Image filename.

required
coordinates list[tuple[float, float]] | ndarray

Pixel coordinates as (x, y) pairs.

required
normalized bool

Whether the input coordinates are normalized to [0..1].

False

Returns:

Type Description
list[tuple[float, float, float] | None]

Geographic coordinates (list[tuple[float, float, float] | None]): Geographic coordinates as (lat, lon, elevation) for each coordinate pair. Returns None for rays that do not intersect the DEM.

Source code in cameralib/projector.py
def cam2world(self, image: str, coordinates: list[tuple[float, float]] | np.ndarray, normalized: bool = False) -> list[tuple[float, float, float] | None]:
    """Project 2D pixel coordinates in camera space to geographic coordinates

    Args:
        image (str): Image filename.
        coordinates (list[tuple[float, float]] | numpy.ndarray): Pixel coordinates as (x, y) pairs.
        normalized (bool): Whether the input coordinates are normalized to [0..1].

    Returns:
        Geographic coordinates (list[tuple[float, float, float] | None]): Geographic coordinates as (lat, lon, elevation) for each coordinate pair. Returns None for rays that do not intersect the DEM.
    """
    if not image in self.shots_map:
        raise InvalidArgError(f"Image {image} not found in {self.shots_path}")

    s = self.shots[self.shots_map[image]]
    cam_id = s['cam_id'].replace("v2 ", "")
    cam = self.cameras[cam_id]

    self._read_dem()

    r = s['rotation']
    img_w = s['width']
    img_h = s['height']
    coordinates = np.asarray(coordinates, dtype=float)
    if normalized:
        coordinates *= np.array([img_w, img_h])

    t = s['translation'].reshape(3, 1)
    resolution_step = abs(self.raster.transform[0]) * self.raycast_resolution_multiplier

    rays_cam = cam.pixel_bearing_many(np.array(coordinates)).T
    rays_world = np.matmul(np.linalg.inv(r), rays_cam).T
    results = []

    for ray_world in rays_world:
        ray_world = ray_world.reshape((3, 1))

        if float(ray_world[2]) > 0:
            logger.warning(f"Ray from {image} pointing up, cannot raycast")
            continue

        step = 0 # meters
        prev_pt = None
        result = None

        while True:
            ray_pt = (ray_world * step + t).ravel()
            step += resolution_step

            # No hits
            if ray_pt[2] < self.min_z:
                break

            y, x = self.raster.index(ray_pt[0], ray_pt[1], op=round)

            if x >= 0 and x < self.dem_data.shape[1] and y >= 0 and y < self.dem_data.shape[0]:
                pix_z = raster_sample_z(self.dem_data, self.raster.nodata, y, x, window=self.z_sample_window, strategy=self.z_sample_strategy)

                if pix_z == self.raster.nodata:
                    continue

                if prev_pt is None:
                    prev_pt = ray_pt

                if ray_pt[2] <= pix_z:
                    # Hit
                    midpoint = (prev_pt + ray_pt) / 2.0
                    lat, lon = get_latlon(self.raster, midpoint[0], midpoint[1])
                    result = (lat,lon,pix_z)
                    break

                prev_pt = ray_pt

        results.append(result)

    return results

cam2geoJSON(image, coordinates, properties=None, normalized=False)

Project 2D pixel coordinates in camera space to geographic coordinates and output the result as GeoJSON. A single coordinate results in a Point, two coordinates into a LineString and more than two into a Polygon.

Parameters:

Name Type Description Default
image str

Image filename.

required
coordinates list[tuple[float, float]] | ndarray

Pixel coordinates as (x, y) pairs.

required
properties dict[str, Any] | None

Optional GeoJSON feature properties.

None
normalized bool

Whether the input coordinates are normalized to [0..1].

False

Returns:

Type Description
dict[str, Any]

dict[str, Any]: GeoJSON FeatureCollection.

Source code in cameralib/projector.py
def cam2geoJSON(self, image: str, coordinates: list[tuple[float, float]] | np.ndarray, properties: dict[str, Any] | None = None, normalized: bool = False) -> dict[str, Any]:
    """Project 2D pixel coordinates in camera space to geographic coordinates and output the result
    as GeoJSON. A single coordinate results in a Point, two coordinates into a LineString and more than two into a Polygon.

    Args:
        image (str): Image filename.
        coordinates (list[tuple[float, float]] | numpy.ndarray): Pixel coordinates as (x, y) pairs.
        properties (dict[str, Any] | None): Optional GeoJSON feature properties.
        normalized (bool): Whether the input coordinates are normalized to [0..1].

    Returns:
        dict[str, Any]: GeoJSON FeatureCollection.
    """
    if properties is None:
        properties = {}

    results = self.cam2world(image, coordinates, normalized)

    if 'image' not in properties:
        properties['image'] = image

    if len(results) == 1:
        geom = 'Point'
        lat,lon,z = results[0]
        coords = [lon,lat,z]
    elif len(results) == 2:
        geom = 'LineString'
        coords = list([lon,lat,z] for lat,lon,z in results)
    else:
        geom = 'Polygon'
        coords = [list([lon,lat,z] for lat,lon,z in results)]
        coords[0].append(coords[0][0])

    j = {
        'type': 'FeatureCollection',
        'features':[{
            'type': 'Feature',
            'properties': properties,
            'geometry': {
                'coordinates': coords,
                'type': geom
            }
        }]
    }

    return j

world2cams(longitude, latitude, normalized=False)

Find which cameras in the reconstruction see a particular location.

Parameters:

Name Type Description Default
longitude float

Longitude

required
latitude float

Latitude

required
normalized bool

Whether to normalize pixel coordinates by the image dimension. By default pixel coordinates are in range [0..image width], [0..image height].

False

Returns:

Name Type Description
Cameras list[dict[str, str | float]]

A list of camera dictionaries containing filename, x, and y keys.

Source code in cameralib/projector.py
def world2cams(self, longitude: float, latitude: float, normalized: bool = False) -> list[dict[str, str | float]]:
    """Find which cameras in the reconstruction see a particular location.

    Args:
        longitude (float): Longitude
        latitude (float): Latitude
        normalized (bool): Whether to normalize pixel coordinates by the image dimension. By default pixel coordinates are in range [0..image width], [0..image height].

    Returns:
        Cameras (list[dict[str, str | float]]): A list of camera dictionaries containing filename, x, and y keys.
    """
    self._read_dem()
    Xa, Ya, Za = get_utm_xyz(self.raster, self.dem_data, self.dem_nodata, longitude, latitude, 
                                z_sample_window=self.z_sample_window,
                                z_sample_strategy=self.z_sample_strategy)
    if Za == self.dem_nodata:
        return []

    images = []
    for s in self.shots:
        r = s['rotation']
        a1 = r[0][0]
        b1 = r[0][1]
        c1 = r[0][2]
        a2 = r[1][0]
        b2 = r[1][1]
        c2 = r[1][2]
        a3 = r[2][0]
        b3 = r[2][1]
        c3 = r[2][2]

        cam_id = s['cam_id'].replace("v2 ", "")
        focal = s['focal']
        img_w = s['width']
        img_h = s['height']
        Xs, Ys, Zs = s['translation']

        half_img_w = (img_w - 1) / 2.0
        half_img_h = (img_h - 1) / 2.0
        f = focal * max(img_w, img_h)

        dx = (Xa - Xs)
        dy = (Ya - Ys)
        dz = (Za - Zs)

        den = a3 * dx + b3 * dy + c3 * dz
        x = half_img_w - (f * (a1 * dx + b1 * dy + c1 * dz) / den)
        y = half_img_h - (f * (a2 * dx + b2 * dy + c2 * dz) / den)

        if x >= 0 and y >= 0 and x <= img_w - 1 and y <= img_h - 1:
            valid = True # assumed
            result = {
                'filename': s['filename']
            }
            if cam_id is not None and cam_id in self.cameras:
                cam = self.cameras[cam_id]

                # Back-undistort to find exact UV coordinates

                xi = img_w - 1 - int(round(x))
                yi = img_h - 1 - int(round(y))
                xu, yu = map_pixels(cam.undistorted(), cam, np.array([[xi, yi]])).ravel()

                valid = xu >= 0 and xu <= img_w and yu >= 0 and yu <= img_h

                result['x'] = float(xu)
                result['y'] = float(yu)
                if normalized:
                    result['x'] /= img_w
                    result['y'] /= img_h

            if valid:
                images.append(result)

    return images

Utilities

cameralib.utils

read_xanylabeling_annotations(labels_dir)

Read an annotation file generated with X-AnyLabeling (https://github.com/CVHub520/X-AnyLabeling)

Parameters:

Name Type Description Default
labels_dir str

Path to a directory containing X-AnyLabeling labels.

required

Returns:

Name Type Description
Annotations list[dict[str, Any]]

A list of annotations with image, coordinates, properties, and normalized fields.

Source code in cameralib/utils.py
def read_xanylabeling_annotations(labels_dir: str) -> list[dict[str, Any]]:
    """Read an annotation file generated with X-AnyLabeling (https://github.com/CVHub520/X-AnyLabeling)

    Args:
        labels_dir (str): Path to a directory containing X-AnyLabeling labels.

    Returns:
        Annotations (list[dict[str, Any]]): A list of annotations with image, coordinates, properties, and normalized fields.
    """
    files = glob.glob(os.path.join(labels_dir, "*.json")) + glob.glob(os.path.join(labels_dir, "*.JSON"))
    annotations = []

    for fi in files:
        with open(fi, 'r') as f:
            j = json.load(f)

        annotations += [{
                'image': os.path.basename(j['imagePath']),
                'coordinates': s['points'],
                'properties': {
                    'label': s.get('label')
                },
                'normalized': False,
            }for s in j['shapes']]

    return annotations

read_yolov7_annotations(labels_dir, image_suffix='.JPG')

Read an annotation directory in YOLOv7 format

Parameters:

Name Type Description Default
labels_dir str

Path to a directory containing YOLOv7 labels.

required
image_suffix str

Extension of the target images.

'.JPG'

Returns:

Name Type Description
Annotations list[dict[str, Any]]

A list of annotations with image, coordinates, properties, and normalized fields.

Source code in cameralib/utils.py
def read_yolov7_annotations(labels_dir: str, image_suffix: str = '.JPG') -> list[dict[str, Any]]:
    """Read an annotation directory in YOLOv7 format

    Args:
        labels_dir (str): Path to a directory containing YOLOv7 labels.
        image_suffix (str): Extension of the target images.

    Returns:
        Annotations (list[dict[str, Any]]): A list of annotations with image, coordinates, properties, and normalized fields.
    """

    files = glob.glob(os.path.join(labels_dir, "*.txt")) + glob.glob(os.path.join(labels_dir, "*.TXT"))
    annotations = []

    for fi in files:
        with open(fi, 'r') as f:
            lines = [l for l in f.read().split("\n") if l.strip() != ""]  
            for line in lines:
                parts = line.split(" ")
                if len(parts) == 5:
                    try:
                        label, x_center, y_center, width, height = [float(p) for p in parts]
                        xmin = x_center - width / 2.0
                        ymin = y_center - height / 2.0
                        annotations.append({
                            'image': Path(fi).with_suffix(image_suffix).name,
                            'label': label,
                            'bbox': {
                                'xmin': xmin,
                                'xmax': xmin + width,
                                'ymin': ymin,
                                'ymax': ymin + height
                            }
                        })
                    except ValueError as e:
                        logger.warning(f"Cannot parse values in {line} ({fi})")
                else:
                    logger.warning(f"Cannot parse line {line} ({fi})")

    return [{
            'image': a['image'],
            'coordinates': [
                            [a['bbox']['xmin'], a['bbox']['ymin']],
                            [a['bbox']['xmax'], a['bbox']['ymin']],
                            [a['bbox']['xmax'], a['bbox']['ymax']],
                            [a['bbox']['xmin'], a['bbox']['ymax']]
                           ],
            'properties': {
                'label': a['label']
            },
            'normalized': True,
        }for a in annotations]