Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/uniform point distribution #618

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions extensions/example/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ project boost-geometry-examples-extensions
;

build-project gis ;
build-project random ;
16 changes: 16 additions & 0 deletions extensions/example/random/Jamfile.v2
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Boost.Geometry (aka GGL, Generic Geometry Library)
#
# Copyright (c) 2007-2012 Barend Gehrels, Amsterdam, the Netherlands.
# Copyright (c) 2008-2012 Bruno Lalande, Paris, France.
# Copyright (c) 2009-2012 Mateusz Loskot, London, UK.

# Use, modification and distribution is subject to the Boost Software License,
# Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)


project boost-geometry-example-extensions-random
: # requirements
;

exe random_example : random_example.cpp ;
88 changes: 88 additions & 0 deletions extensions/example/random/random_example.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Boost.Geometry (aka GGL, Generic Geometry Library)

// Copyright (c) 2019 Tinko Bartels, Berlin, Germany.

// Contributed and/or modified by Tinko Bartels,
// as part of Google Summer of Code 2019 program.

// Use, modification and distribution is subject to the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

// Random Example - showing random sampling in various geometries

#include <fstream>
#include <random>
#include <cmath>

#include <boost/geometry.hpp>
#include <boost/geometry/extensions/random/uniform_point_distribution.hpp>

const int samples = 100;

template<typename Distribution, typename Generator, typename Mapper>
void draw_samples(Distribution& d, Generator& g, Mapper& m) {
for (int i = 0 ; i < samples ; ++i)
{
m.map(d(g), "fill-opacity:1;fill:rgb(255,0,0);stroke:rgb(255,0,0);stroke-width:1", 1);
}
}

int main()
{
namespace bg = boost::geometry;
typedef bg::model::point<double, 2, bg::cs::cartesian> point;
typedef bg::model::segment<point> segment;
typedef bg::model::box<point> box;
typedef bg::model::multi_point<point> multi_point;
typedef bg::model::polygon<point> polygon;

std::default_random_engine generator(1);
polygon poly;
boost::geometry::read_wkt("POLYGON((16 21,17.1226 17.5451,20.7553 17.5451,"
"17.8164 15.4098,18.9389 11.9549,16 14.0902,13.0611 11.9549,"
"14.1836 15.4098,11.2447 17.5451,14.8774 17.5451,16 21))",
poly);
box b(point(0, 0), point(10, 10));
segment s(point(11, 0), point(21, 10));
multi_point mp;
for (double y = 11.0 ; y <= 21.0 ; y += 0.5)
{
for(double x = 0.0 ; x <= 10.0 ; x += 0.5)
{
bg::append(mp, point(x, y));
}
}
bg::random::uniform_point_distribution<box> box_dist(b);
bg::random::uniform_point_distribution<multi_point> mp_dist(mp);
bg::random::uniform_point_distribution<segment> seg_dist(s);
bg::random::uniform_point_distribution<polygon> poly_dist(poly);
std::ofstream svg("random.svg");
bg::svg_mapper<point> mapper(svg, 720, 720);
mapper.add(poly);
mapper.add(b);
mapper.add(mp);
mapper.add(s);
mapper.add(box(point(0, 22), point(22, 35)));
mapper.map(b, "fill-opacity:0.3;fill:rgb(51,51,153);stroke:rgb(51,51,153);stroke-width:2");
mapper.map(s, "fill-opacity:0.3;fill:rgb(51,51,153);stroke:rgb(51,51,153);stroke-width:2");
mapper.map(mp, "fill-opacity:0.3;fill:rgb(51,51,153);stroke:rgb(51,51,153);stroke-width:2", 2);
mapper.map(poly, "fill-opacity:0.3;fill:rgb(51,51,153);stroke:rgb(51,51,153);stroke-width:2");
draw_samples(box_dist, generator, mapper);
draw_samples(mp_dist, generator, mapper);
draw_samples(seg_dist, generator, mapper);
draw_samples(poly_dist, generator, mapper);

typedef bg::model::point<double, 2, bg::cs::spherical_equatorial<bg::degree>> point_spherical;
typedef bg::model::box<point_spherical> box_spherical;
box_spherical sb(point_spherical(0, 0), point_spherical(90, 90));
bg::random::uniform_point_distribution<box_spherical> sb_dist(sb);
for (int i = 0 ; i < 10*samples ; ++i)
{
point_spherical p = sb_dist(generator);
double x = bg::get<0>(p) / 90 * 22;
double y = 32 - bg::get<1>(p) / 90 * 10;
mapper.map(point(x,y), "fill-opacity:1;fill:rgb(255,0,0);stroke:rgb(255,0,0);stroke-width:1", 1);
}
return 0;
}
2 changes: 1 addition & 1 deletion extensions/test/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ build-project algorithms ;
build-project gis ;
build-project iterators ;
build-project nsphere ;

build-project random ;
11 changes: 11 additions & 0 deletions extensions/test/random/Jamfile.v2
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Boost.Geometry (aka GGL, Generic Geometry Library)
#
# Use, modification and distribution is subject to the Boost Software License,
# Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)

test-suite boost-geometry-extensions-random
:
[ run random.cpp ]
;

149 changes: 149 additions & 0 deletions extensions/test/random/random.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Boost.Geometry (aka GGL, Generic Geometry Library)
// Unit Test

// Copyright (c) 2019 Tinko Bartels, Berlin, Germany.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can ack gsoc (see other PR)


// Contributed and/or modified by Tinko Bartels,
// as part of Google Summer of Code 2019 program.

// Use, modification and distribution is subject to the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#include <random>
#include <vector>
#include <sstream>

#include <geometry_test_common.hpp>

#include <boost/geometry.hpp>
#include <boost/geometry/extensions/random/uniform_point_distribution.hpp>

typedef bg::model::point<double, 2, bg::cs::cartesian> point2d_cart;
typedef bg::model::point<double, 3, bg::cs::cartesian> point3d_cart;
typedef bg::model::point<double, 2, bg::cs::geographic<bg::degree>> point2d_geog;

void test_geographic()
{
//We check whether the generated points lie roughly along
//the great circle segment (<2 km distance on a sphere with
//the radius of earth) and are distributed uniformly with respect
//to great circle arc length.
typedef bg::model::linestring<point2d_geog> linestring;
linestring ls {{ 0.0, 0.0 }, { 45.0, 45.0 }, { 60.0, 60.0 }};
bg::random::uniform_point_distribution<linestring> l_dist(ls);
std::mt19937 generator(0);
int sample_count = 2000;
int count_below_45 = 0;
for (int i = 0 ; i < sample_count ; ++i)
{
point2d_geog sample = l_dist(generator);
BOOST_CHECK( bg::distance(sample, ls) < 2000 );
if(bg::get<0>(sample) < 45.0) count_below_45++;
}
double length_ratio = bg::distance(ls[0], ls[1]) /
( bg::distance(ls[0], ls[1]) + bg::distance(ls[1], ls[2]) );
double sample_ratio = ((double) count_below_45) / sample_count;
bool in_range = sample_ratio * 0.95 < length_ratio
&& sample_ratio * 1.05 > length_ratio;
BOOST_CHECK( in_range );

//We check whether the generated points lie in the spherical box
//(which is actually a triangle in this case) and whether the latitude
//is distributed as expected for uniform spherical distribution, using
//known area ratios of spherical caps.
typedef bg::model::box<point2d_geog> box;
box b {{ 0.0, 0.0 }, { 90.0, 90.0 }};
bg::random::uniform_point_distribution<box> b_dist(b);
int under_60 = 0;
for (int i = 0 ; i < sample_count ; ++i)
{
point2d_geog sample = b_dist(generator);
BOOST_CHECK( bg::within(sample, b) );
if(bg::get<1>(sample) < 60.0) ++under_60;
}
BOOST_CHECK_GT(under_60, 0.5 * 0.95 * sample_count);
BOOST_CHECK_LT(under_60, 0.5 * 1.05 * sample_count);
}

void test_polygon()
{
//This test will test uniform sampling in polygon, which also checks
//uniform sampling in boxes. We check whether two equal distributions
//(copied using operator<< and operator>>) generate the same sequence
//of points and whether those points are uniformly distributed with
//respect to cartesian area.
typedef bg::model::polygon<point2d_cart> polygon;
polygon poly;
bg::read_wkt(
"POLYGON((16 21,17.1226 17.5451,20.7553 17.5451, 17.8164 15.4098,"
"18.9389 11.9549,16 14.0902,13.0611 11.9549, 14.1836 15.4098,"
"11.2447 17.5451,14.8774 17.5451,16 21))",
poly);
bg::random::uniform_point_distribution<polygon> poly_dist(poly);
bg::random::uniform_point_distribution<polygon> poly_dist2;
BOOST_CHECK( !(poly_dist == poly_dist2) );
std::stringstream ss;
ss << poly_dist;
ss >> poly_dist2;
BOOST_CHECK( poly_dist == poly_dist2 );
std::mt19937 generator(0), generator2(0);
for (int i = 0 ; i < 100 ; ++i)
{
point2d_cart sample1 = poly_dist(generator);
BOOST_CHECK( bg::equals(sample1, poly_dist2(generator2)) );
BOOST_CHECK( bg::within(sample1, poly) );
}
std::vector<point2d_cart> randoms;
const int uniformity_test_samples = 2000;
for (int i = 0 ; i < uniformity_test_samples ; ++i)
{
randoms.push_back(poly_dist(generator));
}
typedef bg::model::box<point2d_cart> box;
box env, lhalf;
bg::envelope(poly, env);
bg::set<bg::min_corner, 0>(lhalf, bg::get<bg::min_corner, 0>(env));
bg::set<bg::min_corner, 1>(lhalf, bg::get<bg::min_corner, 1>(env));
bg::set<bg::max_corner, 0>(lhalf, bg::get<bg::max_corner, 0>(env));
bg::set<bg::max_corner, 1>(lhalf,
(bg::get<bg::max_corner, 1>(env) + bg::get<bg::min_corner, 1>(env)) / 2);
std::vector<polygon> lower;
bg::intersection(lhalf, poly, lower);
double area_ratio = bg::area(lower[0])/bg::area(poly);
int in_lower = 0;
for (int i = 0 ; i < uniformity_test_samples ; ++i)
{
if(bg::within(randoms[i], lhalf))
++in_lower;
}
double sample_ratio = ((double) in_lower ) / uniformity_test_samples;
BOOST_CHECK_GT( sample_ratio * 1.05, area_ratio );
BOOST_CHECK_LT( sample_ratio * 0.95, area_ratio );
}

void test_multipoint()
{
typedef bg::model::multi_point<point3d_cart> multipoint;
multipoint mp {{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}};
int first = 0;
bg::random::uniform_point_distribution<multipoint> mp_dist(mp);
std::mt19937 generator(0);
int sample_count = 1000;
for (int i = 0 ; i < sample_count ; ++i)
{
point3d_cart sample = mp_dist(generator);
BOOST_CHECK( bg::within(sample, mp) );
if(bg::equals(sample, mp[0])) ++first;
}
BOOST_CHECK_GT(first * 1.05, sample_count / 3);
BOOST_CHECK_LT(first * 0.95, sample_count / 3);
}

int test_main(int, char* [])
{
test_polygon();
test_geographic();
test_multipoint();
return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Boost.Geometry (aka GGL, Generic Geometry Library)

// Copyright (c) 2019 Tinko Bartels, Berlin, Germany.

// Contributed and/or modified by Tinko Bartels,
// as part of Google Summer of Code 2019 program.

// Use, modification and distribution is subject to the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_GEOMETRY_EXTENSIONS_RANDOM_STRATEGIES_AGNOSTIC_UNIFORM_CONVEX_HULL_REJECTION_HPP
#define BOOST_GEOMETRY_EXTENSIONS_RANDOM_STRATEGIES_AGNOSTIC_UNIFORM_CONVEX_HULL_REJECTION_HPP

#include <random>
#include <vector>

#include <boost/geometry/algorithms/equals.hpp>
#include <boost/geometry/algorithms/transform.hpp>
#include <boost/geometry/algorithms/within.hpp>
#include <boost/geometry/geometries/polygon.hpp>
#include <boost/geometry/algorithms/convex_hull.hpp>

#include <boost/geometry/extensions/random/strategies/agnostic/uniform_convex_polygon_sampler.hpp>

namespace boost { namespace geometry
{

namespace strategy { namespace uniform_point_distribution {

// The following strategy is suitable for geometries for which
// a Triangle sampling strategy can be provided
// a SideStrategy that compues the triangle area can be provided.
template
<
typename Point,
typename DomainGeometry,
typename TriangleStrategy,
typename SideStrategy //Actually, we need a triangle area strategy here.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could existing area strategy be used here?

>
struct uniform_convex_hull_rejection
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any reason to have rejection sampling with convex polygons that are not envelopes (boxes)? Since it is more difficult to generate random points in polygon than boxes.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They have different performance characteristics.

Rejection sampling with envelope:
cost of initialization: obtain the envelope
cost per sample: (sample from the envelope (constant) + evaluate within-predicate (linear, I guess) ) * ( area of envelope / area of geometry)

Rejection sampling with convex hull:
cost of initialization: obtain the convex hull
cost per sample: (sample from the convex hull (logarithmic in convex hull size) + evaluate within-predicate (linear, I guess) ) * (area of convex hull / area of geometry)

Now, (area of convex / area of geometry) is usually smaller and at most equal to ( area of envelope / area of geometry) and the other factor should be dominated by the work to check for within. I guess the computation of the convex hull is more expensive than the computation of the envelope. So I'd expect rejection sampling with the convex hull to be slower if few samples are requested but faster if many samples are requested.

I did some benchmarks and was able to confirm that, if you are interested I can create a gist with the code and the results.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering why do we need rejection sampling at all? Are there any benefits of this solution vs. forcing the user to reject samples? AFAIU with this strategy it's the same as randomizing points using non-rejection sampling strategy and then manually rejecting samples if they are not within a polygon or a convex hull of a polygon. Or am I missing something?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that depends on the interface one wants to provide. E.g. consider uniform_real_distribution. It takes a domain from the user, even though it could just provide random numbers in [0,1], since mapping that to [a,b] is trivial, but for some reason they decided to add those constructor parameters.

My intended design for uniform_point_distribution was such that a user of this facility could just pass some geometry to the constructor and get the expected results. I could remove both rejection strategies and then the uniform_point_distribution would not work with arbitrary polygons. The user would have to call envelope or and use within or, if he wants more performance for a higher number of samples, use convex_polygon. As a related anecdote, until near the end of GSoC I was unaware of the existence of line_interpolate, so user users might also not know all facilities.

Another point is that the advantage of having an interface where the constructor of uniform_point_distribution takes polygons directly may speed up code that uses it that way, once a faster algorithm for sampling in polygons has been implemented. For example, if Boost.Geometry some day gets an algorithm for the triangulation of non-convex polygons then that would allow a strategy that is faster than the rejection-based strategies.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tinko92 if you have the code and results already, making them public in a gist will benefit all of us. There are also (at least) two more parameters that play a role in trade-offs between those two rejection samplers, the ratio of area(envelope)/area(polygon) and the size of the hull. But I agree that since there should be a better way of sampling (via non-convex triangulations) spending more time in comparisons between those two could be redundant.

Copy link
Collaborator Author

@tinko92 tinko92 Oct 21, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tinko92 Right, it makes sense. The interface allows to pass a geometry and internally various algorithms can be used during randomization. Whether it is rejection sampling or not doesn't really matter.

{
private:
typedef typename point_type<DomainGeometry>::type domain_point_type;
typedef boost::geometry::model::ring<domain_point_type> ring;
ring m_hull;
uniform_convex_polygon_sampler<Point, ring, TriangleStrategy, SideStrategy>
m_strategy;
public:
uniform_convex_hull_rejection(DomainGeometry const& d) : m_strategy(m_hull)
{
boost::geometry::convex_hull(d, m_hull);
m_strategy =
uniform_convex_polygon_sampler
<
Point,
ring,
TriangleStrategy,
SideStrategy
>(m_hull);
}
bool equals(DomainGeometry const& l_domain,
DomainGeometry const& r_domain,
uniform_convex_hull_rejection const& r_strategy) const
{
return boost::geometry::equals(l_domain, r_domain)
&& m_strategy.equals(l_domain, r_domain, r_strategy.m_strategy);
}
template<typename Generator>
Point apply(Generator& g, DomainGeometry const& d)
{
Point p;
do{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intersection of hull with d must be non-empty otherwise you end up in infinite loop. More interestingly, if dim(d) < 2 e.g. d is a linestring, or point, you end up in a long (if not endless) loop because the probability for hitting the line or point by sampling in an intersecting polygon is very small.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the implementation of uniform_point_distribution which is the user of all those strategies, hull will always be the convex hull of d, hence their intersection must be non-empty if d has a non-empty interior. I can see that this is problematic since the strategy in itself is an exposed interface that could be misused by passing different geometries to constructor and apply-call. I will defer that issue since it may be rendered moot by future refactoring because of the question of what belongs into strategies.

If d (as an areal geometry) has an empty interior (area is zero), I consider it to be invalid input since I am sampling from the interior with the areal types. I understand that this will need to be documented.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, please put (at least) a comment here.

p = m_strategy.apply(g, m_hull);
}while( !boost::geometry::within(p, d) );
return p;
}
void reset(DomainGeometry const&) {};
};

}} // namespace strategy::uniform_point_distribution

}} // namespace boost::geometry

#endif // BOOST_GEOMETRY_EXTENSIONS_RANDOM_STRATEGIES_AGNOSTIC_UNIFORM_CONVEX_HULL_REJECTION_HPP
Loading