#!/usr/bin/env python
# -*- coding: utf-8 -*-
# projection.py
# Copyright (c) 2017-2021, Richard Gerum
#
# This file is part of the cameratransform package.
#
# cameratransform is free software: you can redistribute it and/or modify
# it under the terms of the MIT licence.
#
# cameratransform is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# MIT License for more details.
#
# You should have received a copy of the license
# along with cameratransform. If not, see <https://opensource.org/licenses/MIT>
import json
import numpy as np
from .parameter_set import ParameterSet, ClassWithParameterSet, Parameter, TYPE_INTRINSIC
[docs]class CameraProjection(ClassWithParameterSet):
"""
Defines a camera projection. The necessary parameters are:
focalllength_x_px, focalllength_y_px, center_x_px, center_y_px, image_width_px, image_height_px. Depending on the
information available different initialisation routines can be used.
.. note::
This is the base class for projections. it the should not be instantiated. Available projections are
:py:class:`RectilinearProjection`, :py:class:`CylindricalProjection`, or :py:class:`EquirectangularProjection`.
Examples
--------
This section provides some examples how the projections can be initialized.
>>> import cameratransform as ct
**Image Dimensions**:
The image dimensions can be provided as two values:
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, image_width_px=4608, image_height_px=3456)
or as a tuple:
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=(4608, 3456))
or by providing a numpy array of an example image:
>>> import matplotlib.pyplot as plt
>>> im = plt.imread("test.jpg")
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=im)
**Focal Length**:
The focal length can be provided in mm, when also a sensor size is provided:
>>> projection = ct.RectilinearProjection(focallength_mm=14, sensor=(17.3, 9.731), image=(4608, 3456))
or directly in pixels without the sensor size:
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, image=(4608, 3456))
or as a tuple to give different focal lengths in x and y direction, if the pixels on the sensor are not square:
>>> projection = ct.RectilinearProjection(focallength_px=(3772, 3774), image=(4608, 3456))
or the focal length is given by providing a field of view angle:
>>> projection = ct.RectilinearProjection(view_x_deg=61.617, image=(4608, 3456))
>>> projection = ct.RectilinearProjection(view_y_deg=48.192, image=(4608, 3456))
**Central Point**:
If the position of the optical axis or center of the image is not provided, it is assumed to be in the middle of the
image. But it can be specifided, as two values or a tuple:
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, center=(2304, 1728), image=(4608, 3456))
>>> projection = ct.RectilinearProjection(focallength_px=3863.64, center_x_px=2304, center_y_px=1728, image=(4608, 3456))
"""
def __init__(self, focallength_px=None, focallength_x_px=None, focallength_y_px=None, center_x_px=None, center_y_px=None, center=None, focallength_mm=None, image_width_px=None, image_height_px=None,
sensor_width_mm=None, sensor_height_mm=None, image=None, sensor=None, view_x_deg=None, view_y_deg=None):
# split image in width and height
if image is not None:
try:
image_height_px, image_width_px = image.shape[:2]
except AttributeError:
image_width_px, image_height_px = image
if center is not None:
center_x_px, center_y_px = center
if center_x_px is None and image_width_px is not None:
center_x_px = image_width_px / 2
if center_y_px is None and image_height_px is not None:
center_y_px = image_height_px / 2
# split sensor in width and height
if sensor is not None:
sensor_width_mm, sensor_height_mm = sensor
if sensor_height_mm is None and sensor_width_mm is not None:
sensor_height_mm = image_height_px / image_width_px * sensor_width_mm
elif sensor_width_mm is None and sensor_height_mm is not None:
sensor_width_mm = image_width_px / image_height_px * sensor_height_mm
# get the focalllength
focallength_x_px = focallength_x_px
focallength_y_px = focallength_y_px
if focallength_px is not None:
try:
focallength_x_px, focallength_y_px = focallength_px
except TypeError:
focallength_x_px = focallength_px
focallength_y_px = focallength_px
elif focallength_mm is not None and sensor_width_mm is not None:
focallength_px = focallength_mm / sensor_width_mm * image_width_px
focallength_x_px = focallength_px
focallength_y_px = focallength_px
self.parameters = ParameterSet(
# the intrinsic parameters
focallength_x_px=Parameter(focallength_x_px, default=3600, type=TYPE_INTRINSIC), # the focal length in px
focallength_y_px=Parameter(focallength_y_px, default=3600, type=TYPE_INTRINSIC), # the focal length in px
center_x_px=Parameter(center_x_px, default=0, type=TYPE_INTRINSIC), # the focal length in mm
center_y_px=Parameter(center_y_px, default=0, type=TYPE_INTRINSIC), # the focal length in mm
image_height_px=Parameter(image_height_px, default=3456, type=TYPE_INTRINSIC), # the image height in px
image_width_px=Parameter(image_width_px, default=4608, type=TYPE_INTRINSIC), # the image width in px
sensor_height_mm=Parameter(sensor_height_mm, default=13.0, type=TYPE_INTRINSIC), # the sensor height in mm
sensor_width_mm=Parameter(sensor_width_mm, default=17.3, type=TYPE_INTRINSIC), # the sensor width in mm
)
# add parameter focallength_px that sets x and y simultaneously
fx = self.parameters.parameters["focallength_x_px"]
fy = self.parameters.parameters["focallength_y_px"]
f = Parameter(focallength_x_px, default=3600, type=TYPE_INTRINSIC)
def callback():
fx.value = f.value
if fx.callback is not None:
fx.callback()
fy.value = f.value
if fy.callback is not None:
fy.callback()
f.callback = callback
self.parameters.parameters["focallength_px"] = f
if view_x_deg is not None or view_y_deg is not None:
if sensor_width_mm is None:
if view_x_deg is not None:
self.sensor_width_mm = self.imageFromFOV(view_x=view_x_deg)
self.sensor_height_mm = self.image_height_px / self.image_width_px * self.sensor_width_mm
elif view_y_deg is not None:
self.sensor_height_mm = self.imageFromFOV(view_y=view_y_deg)
self.sensor_width_mm = self.image_width_px / self.image_height_px * self.sensor_height_mm
if focallength_mm is not None:
self.focallength_px = focallength_mm / self.sensor_width_mm * self.image_width_px
self.focallength_x_px = focallength_px
self.focallength_y_px = focallength_px
else:
self.focallength_x_px = self.focallengthFromFOV(view_x_deg, view_y_deg)
self.focallength_y_px = self.focallengthFromFOV(view_x_deg, view_y_deg)
def __str__(self):
string = ""
string += " intrinsic (%s):\n" % type(self).__name__
#string += " f:\t\t%.1f mm\n sensor:\t%.2f×%.2f mm\n image:\t%d×%d px\n" % (
# self.parameters.focallength_mm, self.parameters.sensor_width_mm, self.parameters.sensor_height_mm,
# self.parameters.image_width_px, self.parameters.image_height_px)
string += " f:\t\t%.1f px\n sensor:\t%.2f×%.2f mm\n image:\t%d×%d px\n" % (
self.parameters.focallength_x_px, self.parameters.sensor_width_mm, self.parameters.sensor_height_mm,
self.parameters.image_width_px, self.parameters.image_height_px)
return string
def save(self, filename):
keys = self.parameters.parameters.keys()
export_dict = {key: getattr(self, key) for key in keys if key != "focallength_px"}
with open(filename, "w") as fp:
fp.write(json.dumps(export_dict))
def load(self, filename):
with open(filename, "r") as fp:
variables = json.loads(fp.read())
for key in variables:
setattr(self, key, variables[key])
[docs] def imageFromCamera(self, points): # pragma: no cover
"""
Convert points (Nx3) from the **camera** coordinate system to the **image** coordinate system.
Parameters
----------
points : ndarray
the points in **camera** coordinates to transform, dimensions (3), (Nx3)
Returns
-------
points : ndarray
the points in the **image** coordinate system, dimensions (2), (Nx2)
Examples
--------
>>> import cameratransform as ct
>>> proj = ct.RectilinearProjection(focallength_px=3729, image=(4608, 2592))
transform a single point from the **camera** coordinates to the image:
>>> proj.imageFromCamera([-0.09, -0.27, -1.00])
[2639.61 2302.83]
or multiple points in one go:
>>> proj.imageFromCamera([[-0.09, -0.27, -1.00], [-0.18, -0.24, -1.00]])
[[2639.61 2302.83]
[2975.22 2190.96]]
"""
# to be overloaded by the child class.
return None
[docs] def getRay(self, points, normed=False): # pragma: no cover
"""
As the transformation from the **image** coordinate system to the **camera** coordinate system is not unique,
**image** points can only be uniquely mapped to a ray in **camera** coordinates.
Parameters
----------
points : ndarray
the points in **image** coordinates for which to get the ray, dimensions (2), (Nx2)
Returns
-------
rays : ndarray
the rays in the **camera** coordinate system, dimensions (3), (Nx3)
Examples
--------
>>> import cameratransform as ct
>>> proj = ct.RectilinearProjection(focallength_px=3729, image=(4608, 2592))
get the ray of a point in the image:
>>> proj.getRay([1968, 2291])
[0.09 -0.27 -1.00]
or the rays of multiple points in the image:
>>> proj.getRay([[1968, 2291], [1650, 2189]])
[[0.09 -0.27 -1.00]
[0.18 -0.24 -1.00]]
"""
# to be overloaded by the child class.
return None
[docs] def getFieldOfView(self): # pragma: no cover
"""
The field of view of the projection in x (width, horizontal) and y (height, vertical) direction.
Returns
-------
view_x_deg : float
the horizontal field of view in degree.
view_y_deg : float
the vertical field of view in degree.
"""
# to be overloaded by the child class.
return 0, 0
[docs] def focallengthFromFOV(self, view_x=None, view_y=None): # pragma: no cover
"""
The focal length (in x or y direction) based on the given field of view.
Parameters
----------
view_x : float
the field of view in x direction in degrees. If not given only view_y is processed.
view_y : float
the field of view in y direction in degrees. If not given only view_y is processed.
Returns
-------
focallength_px : float
the focal length in pixels.
"""
# to be overloaded by the child class.
return 0
[docs] def imageFromFOV(self, view_x=None, view_y=None): # pragma: no cover
"""
The image width or height in pixel based on the given field of view.
Parameters
----------
view_x : float
the field of view in x direction in degrees. If not given only view_y is processed.
view_y : float
the field of view in y direction in degrees. If not given only view_y is processed.
Returns
-------
width/height : float
the width or height in pixels.
"""
# to be overloaded by the child class.
return 0
[docs]class RectilinearProjection(CameraProjection):
r"""
This projection is the standard "pin-hole", or frame camera model, which is the most common projection for single images. The angles
:math:`\pm 180°` are projected to :math:`\pm \infty`. Therefore, the maximal possible field of view in this projection
would be 180° for an infinitely large image.
**Projection**:
.. math::
x_\mathrm{im} &= f_x \cdot \frac{x}{z} + c_x\\
y_\mathrm{im} &= f_y \cdot \frac{y}{z} + c_y
**Rays**:
.. math::
\vec{r} = \begin{pmatrix}
(x_\mathrm{im} - c_x)/f_x\\
(y_\mathrm{im} - c_y)/f_y\\
1\\
\end{pmatrix}
**Matrix**:
The rectilinear projection can also be represented in matrix notation:
.. math::
C_{\mathrm{intr.}} &=
\begin{pmatrix}
f_x & 0 & c_x \\
0 & f_y & c_y \\
0 & 0 & 1 \\
\end{pmatrix}\\
"""
def getRay(self, points, normed=False):
# ensure that the points are provided as an array
points = np.array(points)
# set z=focallenth and solve the other equations for x and y
ray = np.array([-(points[..., 0] - self.center_x_px) / self.focallength_x_px,
(points[..., 1] - self.center_y_px) / self.focallength_y_px,
np.ones(points[..., 1].shape)]).T
# norm the ray if desired
if normed:
ray /= np.linalg.norm(ray, axis=-1)[..., None]
# return the ray
return -ray
def imageFromCamera(self, points, hide_backpoints=True):
"""
x y
x_im = f_x * --- + offset_x y_im = f_y * --- + offset_y
z z
"""
points = np.array(points)
# set small z distances to 0
points[np.abs(points[..., 2]) < 1e-10] = 0
# transform the points
transformed_points = np.array([-points[..., 0] * self.focallength_x_px / points[..., 2] + self.center_x_px,
points[..., 1] * self.focallength_y_px / points[..., 2] + self.center_y_px]).T
if hide_backpoints:
transformed_points[points[..., 2] > 0] = np.nan
return transformed_points
def getFieldOfView(self):
return np.rad2deg(2 * np.arctan(self.image_width_px / (2 * self.focallength_x_px))), \
np.rad2deg(2 * np.arctan(self.image_height_px / (2 * self.focallength_y_px)))
def focallengthFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
return self.image_width_px / (2 * np.tan(np.deg2rad(view_x) / 2))
else:
return self.image_height_px / (2 * np.tan(np.deg2rad(view_y) / 2))
def imageFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
# image_width_px
return self.focallength_x_px * (2 * np.tan(np.deg2rad(view_x) / 2))
else:
# image_height_px
return self.focallength_y_px * (2 * np.tan(np.deg2rad(view_y) / 2))
[docs]class CylindricalProjection(CameraProjection):
r"""
This projection is a common projection used for panoranic images. This projection is often used
for wide panoramic images, as it can cover the full 360° range in the x-direction. The poles cannot
be represented in this projection, as they would be projected to :math:`y = \pm\infty`.
**Projection**:
.. math::
x_\mathrm{im} &= f_x \cdot \arctan{\left(\frac{x}{z}\right)} + c_x\\
y_\mathrm{im} &= f_y \cdot \frac{y}{\sqrt{x^2+z^2}} + c_y
**Rays**:
.. math::
\vec{r} = \begin{pmatrix}
\sin\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)\\
\frac{y_\mathrm{im} - c_y}{f_y}\\
\cos\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)
\end{pmatrix}
"""
def getRay(self, points, normed=False):
# ensure that the points are provided as an array
points = np.array(points)
# set r=1 and solve the other equations for x and y
r = 1
alpha = (points[..., 0] - self.center_x_px) / self.focallength_x_px
x = -np.sin(alpha) * r
z = np.cos(alpha) * r
y = r * (points[..., 1] - self.center_y_px) / self.focallength_y_px
# compose the ray
ray = np.array([x, y, z]).T
# norm the ray if desired
if normed:
ray /= np.linalg.norm(ray, axis=-1)[..., None]
# return the rey
return -ray
def imageFromCamera(self, points, hide_backpoints=True):
"""
( x ) y
x_im = f_x * arctan(---) + offset_x y_im = f_y * --------------- + offset_y
( z ) sqrt(x**2+z**2)
"""
# ensure that the points are provided as an array
points = np.array(points)
# set small z distances to 0
points[np.abs(points[..., 2]) < 1e-10] = 0
# transform the points
transformed_points = np.array(
[-self.focallength_x_px * np.arctan2(-points[..., 0], -points[..., 2]) + self.center_x_px,
-self.focallength_y_px * points[..., 1] / np.linalg.norm(points[..., [0, 2]], axis=-1) + self.center_y_px]).T
# ensure that points' x values are also nan when the y values are nan
transformed_points[np.isnan(transformed_points[..., 1])] = np.nan
# return the points
return transformed_points
def getFieldOfView(self):
return np.rad2deg(self.image_width_px / self.focallength_x_px), \
np.rad2deg(2 * np.arctan(self.image_height_px / (2 * self.focallength_y_px)))
def focallengthFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
return self.image_width_px / np.deg2rad(view_x)
else:
return self.image_height_px / (2 * np.tan(np.deg2rad(view_y) / 2))
def imageFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
# image_width_px
return self.focallength_x_px * np.deg2rad(view_x)
else:
# image_height_px
return self.focallength_y_px * (2 * np.tan(np.deg2rad(view_y) / 2))
[docs]class EquirectangularProjection(CameraProjection):
r"""
This projection is a common projection used for panoranic images. The projection can cover the
full range of angles in both x and y direction.
**Projection**:
.. math::
x_\mathrm{im} &= f_x \cdot \arctan{\left(\frac{x}{z}\right)} + c_x\\
y_\mathrm{im} &= f_y \cdot \arctan{\left(\frac{y}{\sqrt{x^2+z^2}}\right)} + c_y
**Rays**:
.. math::
\vec{r} = \begin{pmatrix}
\sin\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)\\
\tan\left(\frac{y_\mathrm{im} - c_y}{f_y}\right)\\
\cos\left(\frac{x_\mathrm{im} - c_x}{f_x}\right)
\end{pmatrix}
"""
def getRay(self, points, normed=False):
# ensure that the points are provided as an array
points = np.array(points)
# set r=1 and solve the other equations for x and y
r = 1
alpha = (points[..., 0] - self.center_x_px) / self.focallength_x_px
x = -np.sin(alpha) * r
z = np.cos(alpha) * r
y = r * np.tan((points[..., 1] - self.center_y_px) / self.focallength_y_px)
# compose the ray
ray = np.array([x, y, z]).T
# norm the ray if desired
if normed:
ray /= np.linalg.norm(ray, axis=-1)[..., None]
# return the rey
return -ray
def imageFromCamera(self, points, hide_backpoints=True):
"""
( x ) ( y )
x_im = f_x * arctan(---) + offset_x y_im = f_y * arctan(---------------) + offset_y
( z ) (sqrt(x**2+z**2))
"""
# ensure that the points are provided as an array
points = np.array(points)
# set small z distances to 0
points[np.abs(points[..., 2]) < 1e-10] = 0
# transform the points
transformed_points = np.array([-self.focallength_x_px * np.arctan(points[..., 0] / points[..., 2]) + self.center_x_px,
-self.focallength_y_px * np.arctan(points[..., 1] / np.sqrt(
points[..., 0] ** 2 + points[..., 2] ** 2)) + self.center_y_px]).T
# return the points
return transformed_points
def getFieldOfView(self):
return np.rad2deg(self.image_width_px / self.focallength_x_px),\
np.rad2deg(self.image_height_px / self.focallength_y_px)
def focallengthFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
return self.image_width_px / np.deg2rad(view_x)
else:
return self.image_height_px / np.deg2rad(view_y)
def imageFromFOV(self, view_x=None, view_y=None):
if view_x is not None:
# image_width_mm
return self.focallength_x_px * np.deg2rad(view_x)
else:
# image_height_mm
return self.focallength_y_px * np.deg2rad(view_y)