Codebase list bitseq / HEAD lowess.cpp
HEAD

Tree @HEAD (Download .tar.gz)

lowess.cpp @HEADraw · history · blame

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
/*
 *  c++ implementation of Lowess weighted regression by 
 *  Peter Glaus http://www.cs.man.ac.uk/~glausp/
 *
 *
 *  Based on fortran code by Cleveland downloaded from:
 *  http://netlib.org/go/lowess.f
 *  original author:
* wsc@research.bell-labs.com Mon Dec 30 16:55 EST 1985
* W. S. Cleveland
* Bell Laboratories
* Murray Hill NJ 07974
 *  
 *  See original documentation below the code for details.
 * 
 */
#include<algorithm>
#include<cmath>
#include<fstream>

using namespace std;

#include "lowess.h"
#include "common.h"

void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, vector<double> &ys){//{{{
   vector<double> rw,res;
   lowess(x,y,f,nsteps,0.,ys,rw,res);
}//}}}
void lowess(const vector<double> &x, const vector<double> &y, double f, long nsteps, double delta, vector<double> &ys, vector<double> &rw, vector<double>&res){ //{{{
   long n=(long)x.size();
   bool ok=false;
   long nleft,nright, i, j, iter, last, m1, m2, ns;
   double cut, cmad, r, d1, d2, c1, c9, alpha, denom;
   if((n==0)||((long)y.size()!=n)) return;
   ys.resize(n);
   rw.resize(n);
   res.resize(n);
   if(n==1){
      ys[0]=y[0];
      return;
   }
   // ns - at least 2, at most n
   ns = max(min((long)(f*n),n),(long)2);
   for(iter=0;iter<nsteps+1; iter++){
      // robustnes iterations
      nleft = 0;
      nright = ns-1;
      // index of last estimated point
      last = -1;
      // index of current point
      i=0;
      do{
         while(nright<n-1){
            // move <nleft,nright> right, while radius decreases
            d1 = x[i]-x[nleft];
            d2 = x[nright+1] - x[i];
            if(d1<=d2)break;
            nleft++;
            nright++;
         }
         // fit value at x[i]
         lowest(x,y,x[i],ys[i],nleft,nright,res,iter>0,rw,ok);
         if(!ok) ys[i]=y[i];
         if(last<i-1){
            // interpolate skipped points
            if(last<0){
               warning("Lowess: out of range.\n");
            }
            denom = x[i] - x[last];
            for(j=last+1;j<i;j++){
               alpha = (x[j]-x[last])/denom;
               ys[j] = alpha * ys[i] + (1.0-alpha)*ys[last];
            }
         }
         last = i;
         cut = x[last]+delta;
         for(i=last+1;i<n;i++){
            if(x[i]>cut)break;
            if(x[i]==x[last]){
               ys[i]=ys[last];
               last=i;
            }
         }
         i=max(last+1,i-1);
      }while(last<n-1);
      for(i=0;i<n;i++)
         res[i] = y[i]-ys[i];
      if(iter==nsteps)break ;
      for(i=0;i<n;i++)
         rw[i]=abs(res[i]);
      sort(rw.begin(),rw.end());
      m1 = n/2+1;
      m2 = n-m1;
      m1 --;
      cmad = 3.0 *(rw[m1]+rw[m2]);
      c9 = .999*cmad;
      c1 = .001*cmad;
      for(i=0;i<n;i++){
         r = abs(res[i]);
         if(r<=c1) rw[i]=1;
         else if(r>c9) rw[i]=0;
         else rw[i] = (1.0-(r/cmad)*(r/cmad))*(1.0-(r/cmad)*(r/cmad));
      }
   }
}//}}}

void lowest(const vector<double> &x, const vector<double> &y, double xs, double &ys, long nleft, long nright, vector<double> &w, bool userw,  vector<double> &rw, bool &ok){//{{{
   long n = (long)x.size();
   long nrt, j;
   double a, b, c, h, r, h1, h9, range;
   range = x[n-1]-x[0];
   h = max(xs-x[nleft],x[nright]-xs);
   h9 = 0.999*h;
   h1 = 0.001*h;
   // sum of weights
   a = 0; 
   for(j=nleft;j<n;j++){
      // compute weights (pick up all ties on right)
      w[j]=0.;
      r = abs(x[j]-xs);
      if(r<=h9){
         // small enough for non-zero weight
         if(r>h1) w[j] = (1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h))*(1.0-(r/h)*(r/h)*(r/h));
         else w[j] = 1.;
         if(userw) w[j] *= rw[j];
         a += w[j];
      }else if(x[j]>xs) break; // get out at first zero wt on right
   }
   nrt = j-1;
   // rightmost pt (may be greater than nright because of ties)
   if(a<=0.) ok = false;
   else{
      // weighted least squares
      ok = true;
      // normalize weights
      for(j=nleft;j<=nrt;j++)
         w[j] /= a;
      if(h>0.){
         // use linear fit
         a = 0.;
         for(j=nleft;j<=nrt;j++)
            a += w[j]*x[j]; // weighted centre of values
         b = xs-a;
         c = 0;
         for(j=nleft;j<=nrt;j++)
            c += w[j]*(x[j]-a)*(x[j]-a);
         if(sqrt(c)>0.001*range){
            // points are spread enough to compute slope
            b /= c;
            for(j=nleft;j<=nrt;j++)
               w[j] *= (1.0+b*(x[j]-a));
         }
      }
      ys = 0;
      for(j=nleft;j<=nrt;j++)
         ys += w[j]*y[j];
   }
}//}}}

/* {{{ Documentation
* wsc@research.bell-labs.com Mon Dec 30 16:55 EST 1985
* W. S. Cleveland
* Bell Laboratories
* Murray Hill NJ 07974
* 
* outline of this file:
*    lines 1-72   introduction
*        73-177   documentation for lowess
*       178-238   ratfor version of lowess
*       239-301   documentation for lowest
*       302-350   ratfor version of lowest
*       351-end   test driver and fortran version of lowess and lowest
* 
*   a multivariate version is available by "send dloess from a"
* 
*              COMPUTER PROGRAMS FOR LOCALLY WEIGHTED REGRESSION
* 
*             This package consists  of  two  FORTRAN  programs  for
*        smoothing    scatterplots   by   robust   locally   weighted
*        regression, or lowess.   The  principal  routine  is  LOWESS
*        which   computes   the  smoothed  values  using  the  method
*        described in The Elements of Graphing Data, by William S.
*        Cleveland    (Wadsworth,    555 Morego   Street,   Monterey,
*        California 93940).
* 
*             LOWESS calls a support routine, LOWEST, the code for
*        which is included. LOWESS also calls a routine  SORT,  which
*        the user must provide.
* 
*             To reduce the computations, LOWESS  requires  that  the
*        arrays  X  and  Y,  which  are  the  horizontal and vertical
*        coordinates, respectively, of the scatterplot, be such  that
*        X  is  sorted  from  smallest  to  largest.   The  user must
*        therefore use another sort routine which will sort X  and  Y
*        according  to X.
*             To summarize the scatterplot, YS,  the  fitted  values,
*        should  be  plotted  against X.   No  graphics  routines are
*        available in the package and must be supplied by the user.
* 
*             The FORTRAN code for the routines LOWESS and LOWEST has
*        been   generated   from   higher   level   RATFOR   programs
*        (B. W. Kernighan, ``RATFOR:  A Preprocessor for  a  Rational
*        Fortran,''  Software Practice and Experience, Vol. 5 (1975),
*        which are also included.
* 
*             The following are data and output from LOWESS that  can
*        be  used  to check your implementation of the routines.  The
*        notation (10)v means 10 values of v.
* 
* 
* 
* 
*        X values:
*          1  2  3  4  5  (10)6  8  10  12  14  50
* 
*        Y values:
*           18  2  15  6  10  4  16  11  7  3  14  17  20  12  9  13  1  8  5  19
* 
* 
*        YS values with F = .25, NSTEPS = 0, DELTA = 0.0
*         13.659  11.145  8.701  9.722  10.000  (10)11.300  13.000  6.440  5.596
*           5.456  18.998
* 
*        YS values with F = .25, NSTEPS = 0 ,  DELTA = 3.0
*          13.659  12.347  11.034  9.722  10.511  (10)11.300  13.000  6.440  5.596
*            5.456  18.998
* 
*        YS values with F = .25, NSTEPS = 2, DELTA = 0.0
*          14.811  12.115  8.984  9.676  10.000  (10)11.346  13.000  6.734  5.744
*            5.415  18.998
* 
* 
* 
* 
*                                   LOWESS
* 
* 
* 
*        Calling sequence
* 
*        CALL LOWESS(X,Y,N,F,NSTEPS,DELTA,YS,RW,RES)
* 
*        Purpose
* 
*        LOWESS computes the smooth of a scatterplot of Y  against  X
*        using  robust  locally  weighted regression.  Fitted values,
*        YS, are computed at each of the  values  of  the  horizontal
*        axis in X.
* 
*        Argument description
* 
*              X = Input; abscissas of the points on the
*                  scatterplot; the values in X must be ordered
*                  from smallest to largest.
*              Y = Input; ordinates of the points on the
*                  scatterplot.
*              N = Input; dimension of X,Y,YS,RW, and RES.
*              F = Input; specifies the amount of smoothing; F is
*                  the fraction of points used to compute each
*                  fitted value; as F increases the smoothed values
*                  become smoother; choosing F in the range .2 to
*                  .8 usually results in a good fit; if you have no
*                  idea which value to use, try F = .5.
*         NSTEPS = Input; the number of iterations in the robust
*                  fit; if NSTEPS = 0, the nonrobust fit is
*                  returned; setting NSTEPS equal to 2 should serve
*                  most purposes.
*          DELTA = input; nonnegative parameter which may be used
*                  to save computations; if N is less than 100, set
*                  DELTA equal to 0.0; if N is greater than 100 you
*                  should find out how DELTA works by reading the
*                  additional instructions section.
*             YS = Output; fitted values; YS(I) is the fitted value
*                  at X(I); to summarize the scatterplot, YS(I)
*                  should be plotted against X(I).
*             RW = Output; robustness weights; RW(I) is the weight
*                  given to the point (X(I),Y(I)); if NSTEPS = 0,
*                  RW is not used.
*            RES = Output; residuals; RES(I) = Y(I)-YS(I).
* 
* 
*        Other programs called
* 
*               LOWEST
*               SSORT
* 
*        Additional instructions
* 
*        DELTA can be used to save computations.   Very  roughly  the
*        algorithm  is  this:   on the initial fit and on each of the
*        NSTEPS iterations locally weighted regression fitted  values
*        are computed at points in X which are spaced, roughly, DELTA
*        apart; then the fitted values at the  remaining  points  are
*        computed  using  linear  interpolation.   The  first locally
*        weighted regression (l.w.r.) computation is carried  out  at
*        X(1)  and  the  last  is  carried  out at X(N).  Suppose the
*        l.w.r. computation is carried out at  X(I).   If  X(I+1)  is
*        greater  than  or  equal  to  X(I)+DELTA,  the  next  l.w.r.
*        computation is carried out at X(I+1).   If  X(I+1)  is  less
*        than X(I)+DELTA, the next l.w.r.  computation is carried out
*        at the largest X(J) which is greater than or equal  to  X(I)
*        but  is not greater than X(I)+DELTA.  Then the fitted values
*        for X(K) between X(I)  and  X(J),  if  there  are  any,  are
*        computed  by  linear  interpolation  of the fitted values at
*        X(I) and X(J).  If N is less than 100 then DELTA can be  set
*        to  0.0  since  the  computation time will not be too great.
*        For larger N it is typically not necessary to carry out  the
*        l.w.r.  computation for all points, so that much computation
*        time can be saved by taking DELTA to be  greater  than  0.0.
*        If  DELTA =  Range  (X)/k  then,  if  the  values  in X were
*        uniformly  scattered  over  the  range,  the   full   l.w.r.
*        computation  would be carried out at approximately k points.
*        Taking k to be 50 often works well.
* 
*        Method
* 
*        The fitted values are computed by using the nearest neighbor
*        routine  and  robust locally weighted regression of degree 1
*        with the tricube weight function.  A few additional features
*        have  been  added.  Suppose r is FN truncated to an integer.
*        Let  h  be  the  distance  to  the  r-th  nearest   neighbor
*        from X(I).   All  points within h of X(I) are used.  Thus if
*        the r-th nearest neighbor is exactly the  same  distance  as
*        other  points,  more  than r points can possibly be used for
*        the smooth at  X(I).   There  are  two  cases  where  robust
*        locally  weighted regression of degree 0 is actually used at
*        X(I).  One case occurs when  h  is  0.0.   The  second  case
*        occurs  when  the  weighted  standard error of the X(I) with
*        respect to the weights w(j) is  less  than  .001  times  the
*        range  of the X(I), where w(j) is the weight assigned to the
*        j-th point of X (the tricube  weight  times  the  robustness
*        weight)  divided by the sum of all of the weights.  Finally,
*        if the w(j) are all zero for the smooth at X(I), the  fitted
*        value is taken to be Y(I).
* 
* 
* 
* 
*  subroutine lowess(x,y,n,f,nsteps,delta,ys,rw,res)
*  real x(n),y(n),ys(n),rw(n),res(n)
*  logical ok
*  if (n<2){ ys(1) = y(1); return }
*  ns = max0(min0(ifix(f*float(n)),n),2)  # at least two, at most n points
*  for(iter=1; iter<=nsteps+1; iter=iter+1){      # robustness iterations
*         nleft = 1; nright = ns
*         last = 0        # index of prev estimated point
*         i = 1   # index of current point
*         repeat{
*                 while(nright<n){
*  # move nleft, nright to right if radius decreases
*                         d1 = x(i)-x(nleft)
*                         d2 = x(nright+1)-x(i)
*  # if d1<=d2 with x(nright+1)==x(nright), lowest fixes
*                         if (d1<=d2) break
*  # radius will not decrease by move right
*                         nleft = nleft+1
*                         nright = nright+1
*                         }
*                 call lowest(x,y,n,x(i),ys(i),nleft,nright,res,iter>1,rw,ok)
*  # fitted value at x(i)
*                 if (!ok) ys(i) = y(i)
*  # all weights zero - copy over value (all rw==0)
*                 if (last<i-1) { # skipped points -- interpolate
*                         denom = x(i)-x(last)    # non-zero - proof?
*                         for(j=last+1; j<i; j=j+1){
*                                 alpha = (x(j)-x(last))/denom
*                                 ys(j) = alpha*ys(i)+(1.0-alpha)*ys(last)
*                                 }
*                         }
*                 last = i        # last point actually estimated
*                 cut = x(last)+delta     # x coord of close points
*                 for(i=last+1; i<=n; i=i+1){     # find close points
*                         if (x(i)>cut) break     # i one beyond last pt within cut
*                         if(x(i)==x(last)){      # exact match in x
*                                 ys(i) = ys(last)
*                                 last = i
*                                 }
*                         }
*                 i=max0(last+1,i-1)
*  # back 1 point so interpolation within delta, but always go forward
*                 } until(last>=n)
*         do i = 1,n      # residuals
*                 res(i) = y(i)-ys(i)
*         if (iter>nsteps) break  # compute robustness weights except last time
*         do i = 1,n
*                 rw(i) = abs(res(i))
*         call sort(rw,n)
*         m1 = 1+n/2; m2 = n-m1+1
*         cmad = 3.0*(rw(m1)+rw(m2))      # 6 median abs resid
*         c9 = .999*cmad; c1 = .001*cmad
*         do i = 1,n {
*                 r = abs(res(i))
*                 if(r<=c1) rw(i)=1.      # near 0, avoid underflow
*                 else if(r>c9) rw(i)=0.  # near 1, avoid underflow
*                 else rw(i) = (1.0-(r/cmad)**2)**2
*                 }
*         }
*  return
*  end
* 
* 
* 
* 
*                                   LOWEST
* 
* 
* 
*        Calling sequence
* 
*        CALL LOWEST(X,Y,N,XS,YS,NLEFT,NRIGHT,W,USERW,RW,OK)
* 
*        Purpose
* 
*        LOWEST is a support routine for LOWESS and  ordinarily  will
*        not  be  called  by  the  user.   The  fitted  value, YS, is
*        computed  at  the  value,  XS,  of  the   horizontal   axis.
*        Robustness  weights,  RW,  can  be employed in computing the
*        fit.
* 
*        Argument description
* 
* 
*              X = Input; abscissas of the points on the
*                  scatterplot; the values in X must be ordered
*                  from smallest to largest.
*              Y = Input; ordinates of the points on the
*                  scatterplot.
*              N = Input; dimension of X,Y,W, and RW.
*             XS = Input; value of the horizontal axis at which the
*                  smooth is computed.
*             YS = Output; fitted value at XS.
*          NLEFT = Input; index of the first point which should be
*                  considered in computing the fitted value.
*         NRIGHT = Input; index of the last point which should be
*                  considered in computing the fitted value.
*              W = Output; W(I) is the weight for Y(I) used in the
*                  expression for YS, which is the sum from
*                  I = NLEFT to NRIGHT of W(I)*Y(I); W(I) is
*                  defined only at locations NLEFT to NRIGHT.
*          USERW = Input; logical variable; if USERW is .TRUE., a
*                  robust fit is carried out using the weights in
*                  RW; if USERW is .FALSE., the values in RW are
*                  not used.
*             RW = Input; robustness weights.
*             OK = Output; logical variable; if the weights for the
*                  smooth are all 0.0, the fitted value, YS, is not
*                  computed and OK is set equal to .FALSE.; if the
*                  fitted value is computed OK is set equal to
* 
* 
*        Method
* 
*        The smooth at XS is computed using (robust) locally weighted
*        regression of degree 1.  The tricube weight function is used
*        with h equal to the maximum of XS-X(NLEFT) and X(NRIGHT)-XS.
*        Two  cases  where  the  program  reverts to locally weighted
*        regression of degree 0 are described  in  the  documentation
*        for LOWESS.
* 
* 
* 
* 
*  subroutine lowest(x,y,n,xs,ys,nleft,nright,w,userw,rw,ok)
*  real x(n),y(n),w(n),rw(n)
*  logical userw,ok
*  range = x(n)-x(1)
*  h = amax1(xs-x(nleft),x(nright)-xs)
*  h9 = .999*h
*  h1 = .001*h
*  a = 0.0        # sum of weights
*  for(j=nleft; j<=n; j=j+1){     # compute weights (pick up all ties on right)
*         w(j)=0.
*         r = abs(x(j)-xs)
*         if (r<=h9) {    # small enough for non-zero weight
*                 if (r>h1) w(j) = (1.0-(r/h)**3)**3
*                 else      w(j) = 1.
*                 if (userw) w(j) = rw(j)*w(j)
*                 a = a+w(j)
*                 }
*         else if(x(j)>xs)break   # get out at first zero wt on right
*         }
*  nrt=j-1        # rightmost pt (may be greater than nright because of ties)
*  if (a<=0.0) ok = FALSE
*  else { # weighted least squares
*         ok = TRUE
*         do j = nleft,nrt
*                 w(j) = w(j)/a   # make sum of w(j) == 1
*         if (h>0.) {     # use linear fit
*                 a = 0.0
*                 do j = nleft,nrt
*                         a = a+w(j)*x(j) # weighted center of x values
*                 b = xs-a
*                 c = 0.0
*                 do j = nleft,nrt
*                         c = c+w(j)*(x(j)-a)**2
*                 if(sqrt(c)>.001*range) {
*  # points are spread out enough to compute slope
*                         b = b/c
*                         do j = nleft,nrt
*                                 w(j) = w(j)*(1.0+b*(x(j)-a))
*                         }
*                 }
*         ys = 0.0
*         do j = nleft,nrt
*                 ys = ys+w(j)*y(j)
*         }
*  return
*  end
* 
}}}*/