Source code for cameratransform.projection

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# projection.py

# Copyright (c) 2017-2021, Richard Gerum
#
# This file is part of the cameratransform package.
#
# cameratransform is free software: you can redistribute it and/or modify
# it under the terms of the MIT licence.
#
# cameratransform is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# MIT License for more details.
#
# You should have received a copy of the license
# along with cameratransform. If not, see <https://opensource.org/licenses/MIT>

import json
import numpy as np
from .parameter_set import ParameterSet, ClassWithParameterSet, Parameter, TYPE_INTRINSIC


[docs]class CameraProjection(ClassWithParameterSet): """ Defines a camera projection. The necessary parameters are: focalllength_x_px, focalllength_y_px, center_x_px, center_y_px, image_width_px, image_height_px. Depending on the information available different initialisation routines can be used. .. note:: This is the base class for projections. it the should not be instantiated. Available projections are :py:class:`RectilinearProjection`, :py:class:`CylindricalProjection`, or :py:class:`EquirectangularProjection`. Examples -------- This section provides some examples how the projections can be initialized. >>> import cameratransform as ct **Image Dimensions**: The image dimensions can be provided as two values: >>> projection = ct.RectilinearProjection(focallength_px=3863.64, image_width_px=4608, image_height_px=3456) or as a tuple: >>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=(4608, 3456)) or by providing a numpy array of an example image: >>> import matplotlib.pyplot as plt >>> im = plt.imread("test.jpg") >>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=im) **Focal Length**: The focal length can be provided in mm, when also a sensor size is provided: >>> projection = ct.RectilinearProjection(focallength_mm=14, sensor=(17.3, 9.731), image=(4608, 3456)) or directly in pixels without the sensor size: >>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=(4608, 3456)) or as a tuple to give different focal lengths in x and y direction, if the pixels on the sensor are not square: >>> projection = ct.RectilinearProjection(focallength_px=(3772, 3774), image=(4608, 3456)) or the focal length is given by providing a field of view angle: >>> projection = ct.RectilinearProjection(view_x_deg=61.617, image=(4608, 3456)) >>> projection = ct.RectilinearProjection(view_y_deg=48.192, image=(4608, 3456)) **Central Point**: If the position of the optical axis or center of the image is not provided, it is assumed to be in the middle of the image. But it can be specifided, as two values or a tuple: >>> projection = ct.RectilinearProjection(focallength_px=3863.64, center=(2304, 1728), image=(4608, 3456)) >>> projection = ct.RectilinearProjection(focallength_px=3863.64, center_x_px=2304, center_y_px=1728, image=(4608, 3456)) """ def __init__(self, focallength_px=None, focallength_x_px=None, focallength_y_px=None, center_x_px=None, center_y_px=None, center=None, focallength_mm=None, image_width_px=None, image_height_px=None, sensor_width_mm=None, sensor_height_mm=None, image=None, sensor=None, view_x_deg=None, view_y_deg=None): # split image in width and height if image is not None: try: image_height_px, image_width_px = image.shape[:2] except AttributeError: image_width_px, image_height_px = image if center is not None: center_x_px, center_y_px = center if center_x_px is None and image_width_px is not None: center_x_px = image_width_px / 2 if center_y_px is None and image_height_px is not None: center_y_px = image_height_px / 2 # split sensor in width and height if sensor is not None: sensor_width_mm, sensor_height_mm = sensor if sensor_height_mm is None and sensor_width_mm is not None: sensor_height_mm = image_height_px / image_width_px * sensor_width_mm elif sensor_width_mm is None and sensor_height_mm is not None: sensor_width_mm = image_width_px / image_height_px * sensor_height_mm # get the focalllength focallength_x_px = focallength_x_px focallength_y_px = focallength_y_px if focallength_px is not None: try: focallength_x_px, focallength_y_px = focallength_px except TypeError: focallength_x_px = focallength_px focallength_y_px = focallength_px elif focallength_mm is not None and sensor_width_mm is not None: focallength_px = focallength_mm / sensor_width_mm * image_width_px focallength_x_px = focallength_px focallength_y_px = focallength_px self.parameters = ParameterSet( # the intrinsic parameters focallength_x_px=Parameter(focallength_x_px, default=3600, type=TYPE_INTRINSIC), # the focal length in px focallength_y_px=Parameter(focallength_y_px, default=3600, type=TYPE_INTRINSIC), # the focal length in px center_x_px=Parameter(center_x_px, default=0, type=TYPE_INTRINSIC), # the focal length in mm center_y_px=Parameter(center_y_px, default=0, type=TYPE_INTRINSIC), # the focal length in mm image_height_px=Parameter(image_height_px, default=3456, type=TYPE_INTRINSIC), # the image height in px image_width_px=Parameter(image_width_px, default=4608, type=TYPE_INTRINSIC), # the image width in px sensor_height_mm=Parameter(sensor_height_mm, default=13.0, type=TYPE_INTRINSIC), # the sensor height in mm sensor_width_mm=Parameter(sensor_width_mm, default=17.3, type=TYPE_INTRINSIC), # the sensor width in mm ) # add parameter focallength_px that sets x and y simultaneously fx = self.parameters.parameters["focallength_x_px"] fy = self.parameters.parameters["focallength_y_px"] f = Parameter(focallength_x_px, default=3600, type=TYPE_INTRINSIC) def callback(): fx.value = f.value if fx.callback is not None: fx.callback() fy.value = f.value if fy.callback is not None: fy.callback() f.callback = callback self.parameters.parameters["focallength_px"] = f if view_x_deg is not None or view_y_deg is not None: if sensor_width_mm is None: if view_x_deg is not None: self.sensor_width_mm = self.imageFromFOV(view_x=view_x_deg) self.sensor_height_mm = self.image_height_px / self.image_width_px * self.sensor_width_mm elif view_y_deg is not None: self.sensor_height_mm = self.imageFromFOV(view_y=view_y_deg) self.sensor_width_mm = self.image_width_px / self.image_height_px * self.sensor_height_mm if focallength_mm is not None: self.focallength_px = focallength_mm / self.sensor_width_mm * self.image_width_px self.focallength_x_px = focallength_px self.focallength_y_px = focallength_px else: self.focallength_x_px = self.focallengthFromFOV(view_x_deg, view_y_deg) self.focallength_y_px = self.focallengthFromFOV(view_x_deg, view_y_deg) def __str__(self): string = "" string += " intrinsic (%s):\n" % type(self).__name__ #string += " f:\t\t%.1f mm\n sensor:\t%.2f×%.2f mm\n image:\t%d×%d px\n" % ( # self.parameters.focallength_mm, self.parameters.sensor_width_mm, self.parameters.sensor_height_mm, # self.parameters.image_width_px, self.parameters.image_height_px) string += " f:\t\t%.1f px\n sensor:\t%.2f×%.2f mm\n image:\t%d×%d px\n" % ( self.parameters.focallength_x_px, self.parameters.sensor_width_mm, self.parameters.sensor_height_mm, self.parameters.image_width_px, self.parameters.image_height_px) return string def save(self, filename): keys = self.parameters.parameters.keys() export_dict = {key: getattr(self, key) for key in keys if key != "focallength_px"} with open(filename, "w") as fp: fp.write(json.dumps(export_dict)) def load(self, filename): with open(filename, "r") as fp: variables = json.loads(fp.read()) for key in variables: setattr(self, key, variables[key])
[docs] def imageFromCamera(self, points): # pragma: no cover """ Convert points (Nx3) from the **camera** coordinate system to the **image** coordinate system. Parameters ---------- points : ndarray the points in **camera** coordinates to transform, dimensions (3), (Nx3) Returns ------- points : ndarray the points in the **image** coordinate system, dimensions (2), (Nx2) Examples -------- >>> import cameratransform as ct >>> proj = ct.RectilinearProjection(focallength_px=3729, image=(4608, 2592)) transform a single point from the **camera** coordinates to the image: >>> proj.imageFromCamera([-0.09, -0.27, -1.00]) [2639.61 2302.83] or multiple points in one go: >>> proj.imageFromCamera([[-0.09, -0.27, -1.00], [-0.18, -0.24, -1.00]]) [[2639.61 2302.83] [2975.22 2190.96]] """ # to be overloaded by the child class. return None
[docs] def getRay(self, points, normed=False): # pragma: no cover """ As the transformation from the **image** coordinate system to the **camera** coordinate system is not unique, **image** points can only be uniquely mapped to a ray in **camera** coordinates. Parameters ---------- points : ndarray the points in **image** coordinates for which to get the ray, dimensions (2), (Nx2) Returns ------- rays : ndarray the rays in the **camera** coordinate system, dimensions (3), (Nx3) Examples -------- >>> import cameratransform as ct >>> proj = ct.RectilinearProjection(focallength_px=3729, image=(4608, 2592)) get the ray of a point in the image: >>> proj.getRay([1968, 2291]) [0.09 -0.27 -1.00] or the rays of multiple points in the image: >>> proj.getRay([[1968, 2291], [1650, 2189]]) [[0.09 -0.27 -1.00] [0.18 -0.24 -1.00]] """ # to be overloaded by the child class. return None
[docs] def getFieldOfView(self): # pragma: no cover """ The field of view of the projection in x (width, horizontal) and y (height, vertical) direction. Returns ------- view_x_deg : float the horizontal field of view in degree. view_y_deg : float the vertical field of view in degree. """ # to be overloaded by the child class. return 0, 0
[docs] def focallengthFromFOV(self, view_x=None, view_y=None): # pragma: no cover """ The focal length (in x or y direction) based on the given field of view. Parameters ---------- view_x : float the field of view in x direction in degrees. If not given only view_y is processed. view_y : float the field of view in y direction in degrees. If not given only view_y is processed. Returns ------- focallength_px : float the focal length in pixels. """ # to be overloaded by the child class. return 0
[docs] def imageFromFOV(self, view_x=None, view_y=None): # pragma: no cover """ The image width or height in pixel based on the given field of view. Parameters ---------- view_x : float the field of view in x direction in degrees. If not given only view_y is processed. view_y : float the field of view in y direction in degrees. If not given only view_y is processed. Returns ------- width/height : float the width or height in pixels. """ # to be overloaded by the child class. return 0
[docs]class RectilinearProjection(CameraProjection): r""" This projection is the standard "pin-hole", or frame camera model, which is the most common projection for single images. The angles :math:`\pm 180°` are projected to :math:`\pm \infty`. Therefore, the maximal possible field of view in this projection would be 180° for an infinitely large image. **Projection**: .. math:: x_\mathrm{im} &= f_x \cdot \frac{x}{z} + c_x\\ y_\mathrm{im} &= f_y \cdot \frac{y}{z} + c_y **Rays**: .. math:: \vec{r} = \begin{pmatrix} (x_\mathrm{im} - c_x)/f_x\\ (y_\mathrm{im} - c_y)/f_y\\ 1\\ \end{pmatrix} **Matrix**: The rectilinear projection can also be represented in matrix notation: .. math:: C_{\mathrm{intr.}} &= \begin{pmatrix} f_x & 0 & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \\ \end{pmatrix}\\ """ def getRay(self, points, normed=False): # ensure that the points are provided as an array points = np.array(points) # set z=focallenth and solve the other equations for x and y ray = np.array([-(points[..., 0] - self.center_x_px) / self.focallength_x_px, (points[..., 1] - self.center_y_px) / self.focallength_y_px, np.ones(points[..., 1].shape)]).T # norm the ray if desired if normed: ray /= np.linalg.norm(ray, axis=-1)[..., None] # return the ray return -ray def imageFromCamera(self, points, hide_backpoints=True): """ x y x_im = f_x * --- + offset_x y_im = f_y * --- + offset_y z z """ points = np.array(points) # set small z distances to 0 points[np.abs(points[..., 2]) < 1e-10] = 0 # transform the points transformed_points = np.array([-points[..., 0] * self.focallength_x_px / points[..., 2] + self.center_x_px, points[..., 1] * self.focallength_y_px / points[..., 2] + self.center_y_px]).T if hide_backpoints: transformed_points[points[..., 2] > 0] = np.nan return transformed_points def getFieldOfView(self): return np.rad2deg(2 * np.arctan(self.image_width_px / (2 * self.focallength_x_px))), \ np.rad2deg(2 * np.arctan(self.image_height_px / (2 * self.focallength_y_px))) def focallengthFromFOV(self, view_x=None, view_y=None): if view_x is not None: return self.image_width_px / (2 * np.tan(np.deg2rad(view_x) / 2)) else: return self.image_height_px / (2 * np.tan(np.deg2rad(view_y) / 2)) def imageFromFOV(self, view_x=None, view_y=None): if view_x is not None: # image_width_px return self.focallength_x_px * (2 * np.tan(np.deg2rad(view_x) / 2)) else: # image_height_px return self.focallength_y_px * (2 * np.tan(np.deg2rad(view_y) / 2))
[docs]class CylindricalProjection(CameraProjection): r""" This projection is a common projection used for panoranic images. This projection is often used for wide panoramic images, as it can cover the full 360° range in the x-direction. The poles cannot be represented in this projection, as they would be projected to :math:`y = \pm\infty`. **Projection**: .. math:: x_\mathrm{im} &= f_x \cdot \arctan{\left(\frac{x}{z}\right)} + c_x\\ y_\mathrm{im} &= f_y \cdot \frac{y}{\sqrt{x^2+z^2}} + c_y **Rays**: .. math:: \vec{r} = \begin{pmatrix} \sin\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)\\ \frac{y_\mathrm{im} - c_y}{f_y}\\ \cos\left(\frac{x_\mathrm{im} - c_x}{f_x}\right) \end{pmatrix} """ def getRay(self, points, normed=False): # ensure that the points are provided as an array points = np.array(points) # set r=1 and solve the other equations for x and y r = 1 alpha = (points[..., 0] - self.center_x_px) / self.focallength_x_px x = -np.sin(alpha) * r z = np.cos(alpha) * r y = r * (points[..., 1] - self.center_y_px) / self.focallength_y_px # compose the ray ray = np.array([x, y, z]).T # norm the ray if desired if normed: ray /= np.linalg.norm(ray, axis=-1)[..., None] # return the rey return -ray def imageFromCamera(self, points, hide_backpoints=True): """ ( x ) y x_im = f_x * arctan(---) + offset_x y_im = f_y * --------------- + offset_y ( z ) sqrt(x**2+z**2) """ # ensure that the points are provided as an array points = np.array(points) # set small z distances to 0 points[np.abs(points[..., 2]) < 1e-10] = 0 # transform the points transformed_points = np.array( [-self.focallength_x_px * np.arctan2(-points[..., 0], -points[..., 2]) + self.center_x_px, -self.focallength_y_px * points[..., 1] / np.linalg.norm(points[..., [0, 2]], axis=-1) + self.center_y_px]).T # ensure that points' x values are also nan when the y values are nan transformed_points[np.isnan(transformed_points[..., 1])] = np.nan # return the points return transformed_points def getFieldOfView(self): return np.rad2deg(self.image_width_px / self.focallength_x_px), \ np.rad2deg(2 * np.arctan(self.image_height_px / (2 * self.focallength_y_px))) def focallengthFromFOV(self, view_x=None, view_y=None): if view_x is not None: return self.image_width_px / np.deg2rad(view_x) else: return self.image_height_px / (2 * np.tan(np.deg2rad(view_y) / 2)) def imageFromFOV(self, view_x=None, view_y=None): if view_x is not None: # image_width_px return self.focallength_x_px * np.deg2rad(view_x) else: # image_height_px return self.focallength_y_px * (2 * np.tan(np.deg2rad(view_y) / 2))
[docs]class EquirectangularProjection(CameraProjection): r""" This projection is a common projection used for panoranic images. The projection can cover the full range of angles in both x and y direction. **Projection**: .. math:: x_\mathrm{im} &= f_x \cdot \arctan{\left(\frac{x}{z}\right)} + c_x\\ y_\mathrm{im} &= f_y \cdot \arctan{\left(\frac{y}{\sqrt{x^2+z^2}}\right)} + c_y **Rays**: .. math:: \vec{r} = \begin{pmatrix} \sin\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)\\ \tan\left(\frac{y_\mathrm{im} - c_y}{f_y}\right)\\ \cos\left(\frac{x_\mathrm{im} - c_x}{f_x}\right) \end{pmatrix} """ def getRay(self, points, normed=False): # ensure that the points are provided as an array points = np.array(points) # set r=1 and solve the other equations for x and y r = 1 alpha = (points[..., 0] - self.center_x_px) / self.focallength_x_px x = -np.sin(alpha) * r z = np.cos(alpha) * r y = r * np.tan((points[..., 1] - self.center_y_px) / self.focallength_y_px) # compose the ray ray = np.array([x, y, z]).T # norm the ray if desired if normed: ray /= np.linalg.norm(ray, axis=-1)[..., None] # return the rey return -ray def imageFromCamera(self, points, hide_backpoints=True): """ ( x ) ( y ) x_im = f_x * arctan(---) + offset_x y_im = f_y * arctan(---------------) + offset_y ( z ) (sqrt(x**2+z**2)) """ # ensure that the points are provided as an array points = np.array(points) # set small z distances to 0 points[np.abs(points[..., 2]) < 1e-10] = 0 # transform the points transformed_points = np.array([-self.focallength_x_px * np.arctan(points[..., 0] / points[..., 2]) + self.center_x_px, -self.focallength_y_px * np.arctan(points[..., 1] / np.sqrt( points[..., 0] ** 2 + points[..., 2] ** 2)) + self.center_y_px]).T # return the points return transformed_points def getFieldOfView(self): return np.rad2deg(self.image_width_px / self.focallength_x_px),\ np.rad2deg(self.image_height_px / self.focallength_y_px) def focallengthFromFOV(self, view_x=None, view_y=None): if view_x is not None: return self.image_width_px / np.deg2rad(view_x) else: return self.image_height_px / np.deg2rad(view_y) def imageFromFOV(self, view_x=None, view_y=None): if view_x is not None: # image_width_mm return self.focallength_x_px * np.deg2rad(view_x) else: # image_height_mm return self.focallength_y_px * np.deg2rad(view_y)