doc/multi_newton_work.cpp.xml - cppad (upstream/2015.00.00.7)

Tree @upstream/2015.00.00.7 (Download .tar.gz)

multi_newton_work.cpp.xml @upstream/2015.00.00.7 — raw · history · blame

<?xml version='1.0'?>
<html xmlns='http://www.w3.org/1999/xhtml'
      xmlns:math='http://www.w3.org/1998/Math/MathML'
>
<head>
<title>Multi-threading Newton Method Utility Routines</title>
<meta http-equiv='Content-Type' content='text/html' charset='utf-8'/>
<meta name="description" id="description" content="Multi-threading Newton Method Utility Routines"/>
<meta name="keywords" id="keywords" content=" multi_newton_work multi-threading newton method utility routines multi_newton_setup multi_newton_worker multi_newton_combine setup multi_newton worker combine syntax purpose fun num_sub xlow xup epsilon max_itr num_threads xout source "/>
<style type='text/css'>
body { color : black }
body { background-color : white }
A:link { color : blue }
A:visited { color : purple }
A:active { color : purple }
</style>
<script type='text/javascript' language='JavaScript' src='_multi_newton_work.cpp_xml.js'>
</script>
</head>
<body>
<table><tr>
<td>
<a href="http://www.coin-or.org/CppAD/" target="_top"><img border="0" src="_image.gif"/></a>
</td>
<td><a href="multi_newton_time.cpp.xml" target="_top">Prev</a>
</td><td><a href="team_thread.hpp.xml" target="_top">Next</a>
</td><td>
<select onchange='choose_across0(this)'>
<option>Index-&gt;</option>
<option>contents</option>
<option>reference</option>
<option>index</option>
<option>search</option>
<option>external</option>
</select>
</td>
<td>
<select onchange='choose_up0(this)'>
<option>Up-&gt;</option>
<option>CppAD</option>
<option>multi_thread</option>
<option>thread_test.cpp</option>
<option>multi_newton.cpp</option>
<option>multi_newton_work.cpp</option>
</select>
</td>
<td>
<select onchange='choose_down3(this)'>
<option>multi_thread-&gt;</option>
<option>parallel_ad</option>
<option>thread_test.cpp</option>
</select>
</td>
<td>
<select onchange='choose_down2(this)'>
<option>thread_test.cpp-&gt;</option>
<option>a11c_openmp.cpp</option>
<option>a11c_bthread.cpp</option>
<option>a11c_pthread.cpp</option>
<option>simple_ad_openmp.cpp</option>
<option>simple_ad_bthread.cpp</option>
<option>simple_ad_pthread.cpp</option>
<option>team_example.cpp</option>
<option>harmonic.cpp</option>
<option>multi_newton.cpp</option>
<option>team_thread.hpp</option>
</select>
</td>
<td>
<select onchange='choose_down1(this)'>
<option>multi_newton.cpp-&gt;</option>
<option>multi_newton_time.cpp</option>
<option>multi_newton_work.cpp</option>
</select>
</td>
<td>multi_newton_work.cpp</td>
<td>
<select onchange='choose_current0(this)'>
<option>Headings-&gt;</option>
<option>Syntax</option>
<option>Purpose</option>
<option>multi_newton_setup</option>
<option>---..fun</option>
<option>---..num_sub</option>
<option>xlow</option>
<option>---..xup</option>
<option>---..epsilon</option>
<option>max_itr</option>
<option>---..num_threads</option>
<option>multi_newton_worker</option>
<option>multi_newton_combine</option>
<option>---..xout</option>
<option>Source</option>
</select>
</td>
</tr></table><br/>




<center><b><big><big>Multi-threading Newton Method Utility Routines</big></big></b></center>
<br/>
<b><big><a name="Syntax" id="Syntax">Syntax</a></big></b>

<br/>

<code><i><font color="black"><span style='white-space: nowrap'>ok</span></font></i><font color="blue"><span style='white-space: nowrap'>&#xA0;=&#xA0;multi_newton_setup(<br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>fun</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>num_sub</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xlow</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xup</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>epsilon</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>max_itr</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>num_threads</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
)</span></font></code>


<code><font color="blue"><span style='white-space: nowrap'>multi_newton_worker()<br/>
</span></font></code>

<code><i><font color="black"><span style='white-space: nowrap'>ok</span></font></i><font color="blue"><span style='white-space: nowrap'>&#xA0;=&#xA0;multi_newton_combine(</span></font><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i><font color="blue"><span style='white-space: nowrap'>)<br/>
</span></font></code>
<br/>
<b><big><a name="Purpose" id="Purpose">Purpose</a></big></b>
<br/>
These routines aid in the computation of multiple zeros of a function

<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>f</mi>
<mo stretchy="false">:</mo>
<mo stretchy="false">[</mo>
<mi mathvariant='italic'>a</mi>
<mo stretchy="false">,</mo>
<mi mathvariant='italic'>b</mi>
<mo stretchy="false">]</mo>
<mo stretchy="false">&#x02192;</mo>
<mrow><mstyle mathvariant='bold'><mi mathvariant='bold'>R</mi>
</mstyle></mrow>
</mrow></math>

.

<br/>
<br/>
<b><big><a name="multi_newton_setup" id="multi_newton_setup">multi_newton_setup</a></big></b>
<br/>
Calling this functions splits up the computation of the zeros
into different sub-intervals for each thread.

<br/>
<br/>
<b><a name="multi_newton_setup.fun" id="multi_newton_setup.fun">fun</a></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>fun</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;void&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>fun</span></font></i><font color="blue"><span style='white-space: nowrap'>&#xA0;(double&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>x</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;double&amp;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>f</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;double&amp;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>df</span></font></i><font color="blue"><span style='white-space: nowrap'>)<br/>
</span></font></code>
This argument must evaluate the function 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>f</mi>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
</mrow></math>

,
and its derivative 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<msup><mi mathvariant='italic'>f</mi>
<mrow><mo stretchy="false">(</mo>
<mn>1</mn>
<mo stretchy="false">)</mo>
</mrow>
</msup>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
</mrow></math>

, 
using the syntax

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>fun</span></font></i><font color="blue"><span style='white-space: nowrap'>(</span></font><i><font color="black"><span style='white-space: nowrap'>x</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>f</span></font></i><font color="blue"><span style='white-space: nowrap'>,&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>df</span></font></i><font color="blue"><span style='white-space: nowrap'>)<br/>
</span></font></code>
where the arguments have the prototypes

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&#xA0;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>x</span></font></i><font color="blue"><span style='white-space: nowrap'>&#xA0;<br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&amp;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>f</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&amp;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>df</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
.
The input values of 
<code><i><font color="black"><span style='white-space: nowrap'>f</span></font></i></code>
 and 
<code><i><font color="black"><span style='white-space: nowrap'>df</span></font></i></code>
 do not matter.
Upon return they are 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>f</mi>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
</mrow></math>

 and 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<msup><mi mathvariant='italic'>f</mi>
<mrow><mo stretchy="false">(</mo>
<mn>1</mn>
<mo stretchy="false">)</mo>
</mrow>
</msup>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
</mrow></math>

 respectively.

<br/>
<br/>
<b><a name="multi_newton_setup.num_sub" id="multi_newton_setup.num_sub">num_sub</a></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>num_sub</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;size_t&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>num_sub</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the number of sub-intervals; i.e., 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>n</mi>
</mrow></math>

 
in to split the calculation into.

<br/>
<br/>
<b><big><a name="xlow" id="xlow">xlow</a></big></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>xlow</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xlow</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the lower limit for the entire search; i.e., 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>a</mi>
</mrow></math>

.

<br/>
<br/>
<b><a name="xlow.xup" id="xlow.xup">xup</a></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>xup</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xup</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the upper limit for the entire search; i.e., 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>b</mi>
</mrow></math>

.

<br/>
<br/>
<b><a name="xlow.epsilon" id="xlow.epsilon">epsilon</a></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>epsilon</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;double&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>epsilon</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the convergence criteria for Newton's method in terms
of how small the function value 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mo stretchy="false">|</mo>
<mi mathvariant='italic'>f</mi>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
<mo stretchy="false">|</mo>
<mo stretchy="false">&#x02264;</mo>
<mi mathvariant='normal'>&#x003B5;</mi>
</mrow></math>

.

<br/>
<br/>
<b><big><a name="max_itr" id="max_itr">max_itr</a></big></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>max_itr</span></font></i></code>
 has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;size_t&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>max_itr</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the maximum number of iterations of Newton's method to try
before giving up on convergence.

<br/>
<br/>
<b><a name="max_itr.num_threads" id="max_itr.num_threads">num_threads</a></b>
<br/>
This argument has prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;size_t&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>num_threads</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
It specifies the number of threads that are available for this test.
If it is zero, the test is run without multi-threading.

<br/>
<br/>
<b><big><a name="multi_newton_worker" id="multi_newton_worker">multi_newton_worker</a></big></b>
<br/>
Calling this function does the computation for one thread.
Following a call to <code><font color="blue">multi_newton_setup</font></code>,
this function should be called by each of the 
<code><i><font color="black"><span style='white-space: nowrap'>num_threads</span></font></i></code>
 threads. 

<br/>
<br/>
<b><big><a name="multi_newton_combine" id="multi_newton_combine">multi_newton_combine</a></big></b>
<br/>
After the 
<code><i><font color="black"><span style='white-space: nowrap'>num_threads</span></font></i></code>
 threads have completed their
calls to <code><font color="blue">multi_newton_worker</font></code>,
this function call will combine the results and return the final 
set of approximate zeros for 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>f</mi>
<mo stretchy="false">(</mo>
<mi mathvariant='italic'>x</mi>
<mo stretchy="false">)</mo>
</mrow></math>

.

<br/>
<br/>
<b><a name="multi_newton_combine.xout" id="multi_newton_combine.xout">xout</a></b>
<br/>
The argument 
<code><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i></code>
 has the prototype

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;CppAD::vector&lt;double&gt;&amp;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
The input size and value of the elements of 
<code><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i></code>
 do not matter.
Upon return from <code><font color="blue">multi_newton_combine</font></code>,
the size of 
<code><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i></code>
 is less than or equal 
<math xmlns="http://www.w3.org/1998/Math/MathML" display="inline"><mrow>
<mi mathvariant='italic'>n</mi>
</mrow></math>

 and

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;|&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>f</span></font></i><font color="blue"><span style='white-space: nowrap'>(&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i><font color="blue"><span style='white-space: nowrap'>[</span></font><i><font color="black"><span style='white-space: nowrap'>i</span></font></i><font color="blue"><span style='white-space: nowrap'>]&#xA0;)&#xA0;|&#xA0;&lt;=&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>epsilon</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
for each valid index 
<code><i><font color="black"><span style='white-space: nowrap'>i</span></font></i></code>
. 
In addition, the elements of 
<code><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i></code>
 are in ascending order and

<code><font color="blue"><span style='white-space: nowrap'><br/>
&#xA0;&#xA0;&#xA0;&#xA0;&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i><font color="blue"><span style='white-space: nowrap'>[i+1]&#xA0;-&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xout</span></font></i><font color="blue"><span style='white-space: nowrap'>[</span></font><i><font color="black"><span style='white-space: nowrap'>i</span></font></i><font color="blue"><span style='white-space: nowrap'>]&#xA0;&gt;=&#xA0;&#xA0;0.5&#xA0;*&#xA0;(</span></font><i><font color="black"><span style='white-space: nowrap'>xup</span></font></i><font color="blue"><span style='white-space: nowrap'>&#xA0;-&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>xlow</span></font></i><font color="blue"><span style='white-space: nowrap'>)&#xA0;/&#xA0;</span></font><i><font color="black"><span style='white-space: nowrap'>num_sub</span></font></i><font color="blue"><span style='white-space: nowrap'><br/>
</span></font></code>
<br/>
<b><big><a name="Source" id="Source">Source</a></big></b>

<code><font color="blue">
<br/>
<pre style='display:inline'> 
# include &lt;cppad/cppad.hpp&gt;
# include &quot;multi_newton_work.hpp&quot;

# define USE_THREAD_ALLOC_FOR_WORK_ALL 1

namespace {
	using CppAD::thread_alloc;

	// This vector template class frees all memory when resized to zero.
	// In addition, its memory allocation works well during multi-threading.
	using CppAD::vector;

	// number of threads in previous call to multi_newton_setup
	size_t num_threads_ = 0;
	// convergence criteria in previous call to multi_newton_setup
	double epsilon_ = 0.;
	// maximum number of iterations in previous call to multi_newton_setup
	size_t max_itr_ = 0;
	// length for all sub-intervals
	double sub_length_ = 0.;
	// function we are finding zeros of in previous call to multi_newton_setup
	void (*fun_)(double x, double&amp; f, double&amp; df) = 0;

	// structure with information for one thread
	typedef struct {
		// number of sub intervals (worker input)
		size_t num_sub;
		// beginning of interval (worker input)
		double xlow;
		// end of interval (worker input)
		double xup; 
		// vector of zero candidates (worker output)
		// after call to multi_newton_setup:   x.size() == 0
		// after call to multi_newton_work:    x.size() is number of zeros
		// after call to multi_newton_combine: x.size() == 0
		vector&lt;double&gt; x;  
		// false if an error occurs, true otherwise (worker output)
		bool   ok;
	} work_one_t;
	// vector with information for all threads
	// after call to multi_newton_setup:   work_all.size() == num_threads
	// after call to multi_newton_combine: work_all.size() == 0
	// (use pointers instead of values to avoid false sharing)
	vector&lt;work_one_t*&gt; work_all_;
}
// -----------------------------------------------------------------------
// do the work for one thread
void multi_newton_worker(void)
{	using CppAD::vector;

	// Split [xlow, xup] into num_sub intervales and
	// look for one zero in each sub-interval.
	size_t thread_num    = thread_alloc::thread_num();
	size_t num_threads   = std::max(num_threads_, size_t(1));
	bool   ok            = thread_num &lt; num_threads;
	size_t num_sub       = work_all_[thread_num]-&gt;num_sub;
	double xlow          = work_all_[thread_num]-&gt;xlow;
	double xup           = work_all_[thread_num]-&gt;xup;
	vector&lt;double&gt;&amp; x    = work_all_[thread_num]-&gt;x;

	// check arguments
	ok &amp;= max_itr_ &gt; 0;
	ok &amp;= num_sub &gt; 0;
	ok &amp;= xlow &lt; xup;
	ok &amp;= x.size() == 0;

	// check for special case where there is nothing for this thread to do
	if( num_sub == 0 )
	{	work_all_[thread_num]-&gt;ok = ok;
		return;
	}

	// check for a zero on each sub-interval
	size_t i;
	double xlast = xlow - 2 * sub_length_; // over sub_length_ away from x_low
	double flast = 2 * epsilon_;           // any value &gt; epsilon_ would do
	for(i = 0; i &lt; num_sub; i++)
	{
		// note that when i == 0, xlow_i == xlow (exactly)
		double xlow_i = xlow + i * sub_length_;

		// note that when i == num_sub - 1, xup_i = xup (exactly)
		double xup_i  = xup  - (num_sub - i - 1) * sub_length_;

		// initial point for Newton iterations
		double xcur = (xup_i + xlow_i) / 2.;

		// Newton iterations
		bool more_itr = true;
		size_t itr    = 0;
		double fcur, dfcur;
		while( more_itr )
		{	fun_(xcur, fcur, dfcur);

			// check end of iterations
			if( fabs(fcur) &lt;= epsilon_ )
				more_itr = false;
			if( (xcur == xlow_i ) &amp; (fcur * dfcur &gt; 0.) )
				more_itr = false; 
			if( (xcur == xup_i)   &amp; (fcur * dfcur &lt; 0.) )
				more_itr = false; 

			// next Newton iterate
			if( more_itr )
			{	xcur = xcur - fcur / dfcur;
				// keep in bounds
				xcur = std::max(xcur, xlow_i);
				xcur = std::min(xcur, xup_i);

				more_itr = ++itr &lt; max_itr_;
			}
		}
		if( fabs( fcur ) &lt;= epsilon_ )
		{	// check for case where xcur is lower bound for this 
			// sub-interval and upper bound for previous sub-interval
			if( fabs(xcur - xlast) &gt;= sub_length_ )
			{	x.push_back( xcur );
				xlast = xcur;
				flast = fcur;
			} 
			else if( fabs(fcur) &lt; fabs(flast) )
			{	x[ x.size() - 1] = xcur;
				xlast            = xcur;
				flast            = fcur;
			}
		}
	}
	work_all_[thread_num]-&gt;ok = ok;
}
// -----------------------------------------------------------------------
// setup the work up for multiple threads
bool multi_newton_setup(
	void (fun)(double x, double&amp; f, double&amp; df) ,
	size_t num_sub                              , 
	double xlow                                 ,
	double xup                                  ,
	double epsilon                              ,
	size_t max_itr                              ,
	size_t num_threads                          )
{
	num_threads_ = num_threads;
	num_threads  = std::max(num_threads_, size_t(1));
	bool ok      = num_threads == thread_alloc::num_threads();

	// inputs that are same for all threads
	epsilon_ = epsilon;
	max_itr_ = max_itr;
	fun_     = fun;

	// resize the work vector to accomidate the number of threads
	ok &amp;= work_all_.size() == 0;
	work_all_.resize(num_threads);

	// length of each sub interval
	sub_length_ = (xup - xlow) / double(num_sub);

	// determine values that are specific to each thread
	size_t num_min   = num_sub / num_threads; // minimum num_sub 
	size_t num_more  = num_sub % num_threads; // number that have one more
	size_t sum_num   = 0;  // sum with respect to thread of num_sub
	size_t thread_num, num_sub_thread;
	for(thread_num = 0; thread_num &lt; num_threads; thread_num++)
	{
# if  USE_THREAD_ALLOC_FOR_WORK_ALL
		// allocate separate memory for this thread to avoid false sharing
		size_t min_bytes(sizeof(work_one_t)), cap_bytes;
		void* v_ptr = thread_alloc::get_memory(min_bytes, cap_bytes);
		work_all_[thread_num] = static_cast&lt;work_one_t*&gt;(v_ptr);

		// thread_alloc is a raw memory allocator; i.e., it does not call
		// the constructor for the objects it creates. The CppAD::vector
		// class requires it's constructor to be called so we do it here
		new(&amp; (work_all_[thread_num]-&gt;x) ) vector&lt;double&gt;();
# else
		work_all_[thread_num] = new work_one_t;
# endif

		// number of sub-intervalse for this thread
		if( thread_num &lt; num_more  )
			num_sub_thread = num_min + 1;
		else	num_sub_thread = num_min;

		// when thread_num == 0, xlow_thread == xlow
		double xlow_thread = xlow + sum_num * sub_length_;

		// when thread_num == num_threads - 1, xup_thread = xup 
		double xup_thread = xlow + (sum_num + num_sub_thread) * sub_length_;
		if( thread_num == num_threads - 1 )
			xup_thread = xup;

		// update sum_num for next time through loop
		sum_num += num_sub_thread;

		// input information specific to this thread
		work_all_[thread_num]-&gt;num_sub = num_sub_thread;
		work_all_[thread_num]-&gt;xlow    = xlow_thread;
		work_all_[thread_num]-&gt;xup     = xup_thread;
		ok &amp;= work_all_[thread_num]-&gt;x.size() == 0;

		// in case this thread does not get called
		work_all_[thread_num]-&gt;ok = false;
	}
	ok &amp;= sum_num == num_sub;
	return ok;
}
// -----------------------------------------------------------------------
// get the result of the work 
bool multi_newton_combine(CppAD::vector&lt;double&gt;&amp; xout)
{	// number of threads in the calculation
	size_t num_threads  = std::max(num_threads_, size_t(1));

	// remove duplicates and points that are not solutions
	xout.resize(0);
	bool   ok = true;
	size_t thread_num;

	// initialize as more that sub_lenght_ / 2 from any possible solution 
	double xlast = - sub_length_; 
	for(thread_num = 0; thread_num &lt; num_threads; thread_num++)
	{	vector&lt;double&gt;&amp; x = work_all_[thread_num]-&gt;x;

		size_t i;
		for(i = 0; i &lt; x.size(); i++)
		{	// check for case where this point is lower limit for this
			// thread and upper limit for previous thread
			if( fabs(x[i] - xlast) &gt;= sub_length_ )  
			{	xout.push_back( x[i] );
				xlast = x[i];
			}
			else
			{	double fcur, flast, df;
				fun_(x[i],   fcur, df);
				fun_(xlast, flast, df);
				if( fabs(fcur) &lt; fabs(flast) )
				{	xout[ xout.size() - 1] = x[i];
					xlast                  = x[i];
				}
			}
		}
		ok &amp;= work_all_[thread_num]-&gt;ok;
	}

	// go down so free memory for other threads before memory for master
	thread_num = num_threads;
	while(thread_num--)
	{
# if USE_THREAD_ALLOC_FOR_WORK_ALL
		// call the destructor for CppAD::vector destructor
		work_all_[thread_num]-&gt;x.~vector&lt;double&gt;();
		// delete the raw memory allocation 
		void* v_ptr = static_cast&lt;void*&gt;( work_all_[thread_num] );
		thread_alloc::return_memory( v_ptr );
# else
		delete work_all_[thread_num];
# endif
		// Note that xout corresponds to memroy that is inuse by master
		// (so we can only chech have freed all their memory). 
		if( thread_num &gt; 0 )
		{	// check that there is no longer any memory inuse by this thread
			ok &amp;= thread_alloc::inuse(thread_num) == 0;
			// return all memory being held for future use by this thread
			thread_alloc::free_available(thread_num);
		}
	}
	// now we are done with the work_all_ vector so free its memory
	// (becasue it is a static variable)
	work_all_.clear();

	return ok;
}
</pre>

</font></code>



<hr/>Input File: multi_thread/multi_newton_work.cpp

</body>
</html>