[16] | 1 | /******************************************************************** |
---|
| 2 | * * |
---|
| 3 | * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * |
---|
| 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
---|
| 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
---|
| 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
---|
| 7 | * * |
---|
| 8 | * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * |
---|
| 9 | * by the Xiph.Org Foundation http://www.xiph.org/ * |
---|
| 10 | * * |
---|
| 11 | ******************************************************************** |
---|
| 12 | |
---|
| 13 | function: build a VQ codebook and the encoding decision 'tree' |
---|
| 14 | last mod: $Id: vqsplit.c 13293 2007-07-24 00:09:47Z xiphmont $ |
---|
| 15 | |
---|
| 16 | ********************************************************************/ |
---|
| 17 | |
---|
| 18 | /* This code is *not* part of libvorbis. It is used to generate |
---|
| 19 | trained codebooks offline and then spit the results into a |
---|
| 20 | pregenerated codebook that is compiled into libvorbis. It is an |
---|
| 21 | expensive (but good) algorithm. Run it on big iron. */ |
---|
| 22 | |
---|
| 23 | /* There are so many optimizations to explore in *both* stages that |
---|
| 24 | considering the undertaking is almost withering. For now, we brute |
---|
| 25 | force it all */ |
---|
| 26 | |
---|
| 27 | #include <stdlib.h> |
---|
| 28 | #include <stdio.h> |
---|
| 29 | #include <math.h> |
---|
| 30 | #include <string.h> |
---|
| 31 | #include <sys/time.h> |
---|
| 32 | |
---|
| 33 | #include "vqgen.h" |
---|
| 34 | #include "vqsplit.h" |
---|
| 35 | #include "bookutil.h" |
---|
| 36 | |
---|
| 37 | /* Codebook generation happens in two steps: |
---|
| 38 | |
---|
| 39 | 1) Train the codebook with data collected from the encoder: We use |
---|
| 40 | one of a few error metrics (which represent the distance between a |
---|
| 41 | given data point and a candidate point in the training set) to |
---|
| 42 | divide the training set up into cells representing roughly equal |
---|
| 43 | probability of occurring. |
---|
| 44 | |
---|
| 45 | 2) Generate the codebook and auxiliary data from the trained data set |
---|
| 46 | */ |
---|
| 47 | |
---|
| 48 | /* Building a codebook from trained set ********************************** |
---|
| 49 | |
---|
| 50 | The codebook in raw form is technically finished once it's trained. |
---|
| 51 | However, we want to finalize the representative codebook values for |
---|
| 52 | each entry and generate auxiliary information to optimize encoding. |
---|
| 53 | We generate the auxiliary coding tree using collected data, |
---|
| 54 | probably the same data as in the original training */ |
---|
| 55 | |
---|
| 56 | /* At each recursion, the data set is split in half. Cells with data |
---|
| 57 | points on side A go into set A, same with set B. The sets may |
---|
| 58 | overlap. If the cell overlaps the deviding line only very slightly |
---|
| 59 | (provided parameter), we may choose to ignore the overlap in order |
---|
| 60 | to pare the tree down */ |
---|
| 61 | |
---|
| 62 | long *isortvals; |
---|
| 63 | int iascsort(const void *a,const void *b){ |
---|
| 64 | long av=isortvals[*((long *)a)]; |
---|
| 65 | long bv=isortvals[*((long *)b)]; |
---|
| 66 | return(av-bv); |
---|
| 67 | } |
---|
| 68 | |
---|
| 69 | static float _Ndist(int el,float *a, float *b){ |
---|
| 70 | int i; |
---|
| 71 | float acc=0.f; |
---|
| 72 | for(i=0;i<el;i++){ |
---|
| 73 | float val=(a[i]-b[i]); |
---|
| 74 | acc+=val*val; |
---|
| 75 | } |
---|
| 76 | return sqrt(acc); |
---|
| 77 | } |
---|
| 78 | |
---|
| 79 | #define _Npoint(i) (pointlist+dim*(i)) |
---|
| 80 | #define _Nnow(i) (entrylist+dim*(i)) |
---|
| 81 | |
---|
| 82 | |
---|
| 83 | /* goes through the split, but just counts it and returns a metric*/ |
---|
| 84 | int vqsp_count(float *entrylist,float *pointlist,int dim, |
---|
| 85 | long *membership,long *reventry, |
---|
| 86 | long *entryindex,long entries, |
---|
| 87 | long *pointindex,long points,int splitp, |
---|
| 88 | long *entryA,long *entryB, |
---|
| 89 | long besti,long bestj, |
---|
| 90 | long *entriesA,long *entriesB,long *entriesC){ |
---|
| 91 | long i,j; |
---|
| 92 | long A=0,B=0,C=0; |
---|
| 93 | long pointsA=0; |
---|
| 94 | long pointsB=0; |
---|
| 95 | long *temppointsA=NULL; |
---|
| 96 | long *temppointsB=NULL; |
---|
| 97 | |
---|
| 98 | if(splitp){ |
---|
| 99 | temppointsA=_ogg_malloc(points*sizeof(long)); |
---|
| 100 | temppointsB=_ogg_malloc(points*sizeof(long)); |
---|
| 101 | } |
---|
| 102 | |
---|
| 103 | memset(entryA,0,sizeof(long)*entries); |
---|
| 104 | memset(entryB,0,sizeof(long)*entries); |
---|
| 105 | |
---|
| 106 | /* Do the points belonging to this cell occur on sideA, sideB or |
---|
| 107 | both? */ |
---|
| 108 | |
---|
| 109 | for(i=0;i<points;i++){ |
---|
| 110 | float *ppt=_Npoint(pointindex[i]); |
---|
| 111 | long firstentry=membership[pointindex[i]]; |
---|
| 112 | |
---|
| 113 | if(firstentry==besti){ |
---|
| 114 | entryA[reventry[firstentry]]=1; |
---|
| 115 | if(splitp)temppointsA[pointsA++]=pointindex[i]; |
---|
| 116 | continue; |
---|
| 117 | } |
---|
| 118 | if(firstentry==bestj){ |
---|
| 119 | entryB[reventry[firstentry]]=1; |
---|
| 120 | if(splitp)temppointsB[pointsB++]=pointindex[i]; |
---|
| 121 | continue; |
---|
| 122 | } |
---|
| 123 | { |
---|
| 124 | float distA=_Ndist(dim,ppt,_Nnow(besti)); |
---|
| 125 | float distB=_Ndist(dim,ppt,_Nnow(bestj)); |
---|
| 126 | if(distA<distB){ |
---|
| 127 | entryA[reventry[firstentry]]=1; |
---|
| 128 | if(splitp)temppointsA[pointsA++]=pointindex[i]; |
---|
| 129 | }else{ |
---|
| 130 | entryB[reventry[firstentry]]=1; |
---|
| 131 | if(splitp)temppointsB[pointsB++]=pointindex[i]; |
---|
| 132 | } |
---|
| 133 | } |
---|
| 134 | } |
---|
| 135 | |
---|
| 136 | /* The entry splitting isn't total, so that storage has to be |
---|
| 137 | allocated for recursion. Reuse the entryA/entryB vectors */ |
---|
| 138 | /* keep the entries in ascending order (relative to the original |
---|
| 139 | list); we rely on that stability when ordering p/q choice */ |
---|
| 140 | for(j=0;j<entries;j++){ |
---|
| 141 | if(entryA[j] && entryB[j])C++; |
---|
| 142 | if(entryA[j])entryA[A++]=entryindex[j]; |
---|
| 143 | if(entryB[j])entryB[B++]=entryindex[j]; |
---|
| 144 | } |
---|
| 145 | *entriesA=A; |
---|
| 146 | *entriesB=B; |
---|
| 147 | *entriesC=C; |
---|
| 148 | if(splitp){ |
---|
| 149 | memcpy(pointindex,temppointsA,sizeof(long)*pointsA); |
---|
| 150 | memcpy(pointindex+pointsA,temppointsB,sizeof(long)*pointsB); |
---|
| 151 | free(temppointsA); |
---|
| 152 | free(temppointsB); |
---|
| 153 | } |
---|
| 154 | return(pointsA); |
---|
| 155 | } |
---|
| 156 | |
---|
| 157 | int lp_split(float *pointlist,long totalpoints, |
---|
| 158 | codebook *b, |
---|
| 159 | long *entryindex,long entries, |
---|
| 160 | long *pointindex,long points, |
---|
| 161 | long *membership,long *reventry, |
---|
| 162 | long depth, long *pointsofar){ |
---|
| 163 | |
---|
| 164 | encode_aux_nearestmatch *t=b->c->nearest_tree; |
---|
| 165 | |
---|
| 166 | /* The encoder, regardless of book, will be using a straight |
---|
| 167 | euclidian distance-to-point metric to determine closest point. |
---|
| 168 | Thus we split the cells using the same (we've already trained the |
---|
| 169 | codebook set spacing and distribution using special metrics and |
---|
| 170 | even a midpoint division won't disturb the basic properties) */ |
---|
| 171 | |
---|
| 172 | int dim=b->dim; |
---|
| 173 | float *entrylist=b->valuelist; |
---|
| 174 | long ret; |
---|
| 175 | long *entryA=_ogg_calloc(entries,sizeof(long)); |
---|
| 176 | long *entryB=_ogg_calloc(entries,sizeof(long)); |
---|
| 177 | long entriesA=0; |
---|
| 178 | long entriesB=0; |
---|
| 179 | long entriesC=0; |
---|
| 180 | long pointsA=0; |
---|
| 181 | long i,j,k; |
---|
| 182 | |
---|
| 183 | long besti=-1; |
---|
| 184 | long bestj=-1; |
---|
| 185 | |
---|
| 186 | char spinbuf[80]; |
---|
| 187 | sprintf(spinbuf,"splitting [%ld left]... ",totalpoints-*pointsofar); |
---|
| 188 | |
---|
| 189 | /* one reverse index needed */ |
---|
| 190 | for(i=0;i<b->entries;i++)reventry[i]=-1; |
---|
| 191 | for(i=0;i<entries;i++)reventry[entryindex[i]]=i; |
---|
| 192 | |
---|
| 193 | /* We need to find the dividing hyperplane. find the median of each |
---|
| 194 | axis as the centerpoint and the normal facing farthest point */ |
---|
| 195 | |
---|
| 196 | /* more than one way to do this part. For small sets, we can brute |
---|
| 197 | force it. */ |
---|
| 198 | |
---|
| 199 | if(entries<8 || (float)points*entries*entries<16.f*1024*1024){ |
---|
| 200 | /* try every pair possibility */ |
---|
| 201 | float best=0; |
---|
| 202 | float this; |
---|
| 203 | for(i=0;i<entries-1;i++){ |
---|
| 204 | for(j=i+1;j<entries;j++){ |
---|
| 205 | spinnit(spinbuf,entries-i); |
---|
| 206 | vqsp_count(b->valuelist,pointlist,dim, |
---|
| 207 | membership,reventry, |
---|
| 208 | entryindex,entries, |
---|
| 209 | pointindex,points,0, |
---|
| 210 | entryA,entryB, |
---|
| 211 | entryindex[i],entryindex[j], |
---|
| 212 | &entriesA,&entriesB,&entriesC); |
---|
| 213 | this=(entriesA-entriesC)*(entriesB-entriesC); |
---|
| 214 | |
---|
| 215 | /* when choosing best, we also want some form of stability to |
---|
| 216 | make sure more branches are pared later; secondary |
---|
| 217 | weighting isn;t needed as the entry lists are in ascending |
---|
| 218 | order, and we always try p/q in the same sequence */ |
---|
| 219 | |
---|
| 220 | if( (besti==-1) || |
---|
| 221 | (this>best) ){ |
---|
| 222 | |
---|
| 223 | best=this; |
---|
| 224 | besti=entryindex[i]; |
---|
| 225 | bestj=entryindex[j]; |
---|
| 226 | |
---|
| 227 | } |
---|
| 228 | } |
---|
| 229 | } |
---|
| 230 | }else{ |
---|
| 231 | float *p=alloca(dim*sizeof(float)); |
---|
| 232 | float *q=alloca(dim*sizeof(float)); |
---|
| 233 | float best=0.f; |
---|
| 234 | |
---|
| 235 | /* try COG/normal and furthest pairs */ |
---|
| 236 | /* meanpoint */ |
---|
| 237 | /* eventually, we want to select the closest entry and figure n/c |
---|
| 238 | from p/q (because storing n/c is too large */ |
---|
| 239 | for(k=0;k<dim;k++){ |
---|
| 240 | spinnit(spinbuf,entries); |
---|
| 241 | |
---|
| 242 | p[k]=0.f; |
---|
| 243 | for(j=0;j<entries;j++) |
---|
| 244 | p[k]+=b->valuelist[entryindex[j]*dim+k]; |
---|
| 245 | p[k]/=entries; |
---|
| 246 | |
---|
| 247 | } |
---|
| 248 | |
---|
| 249 | /* we go through the entries one by one, looking for the entry on |
---|
| 250 | the other side closest to the point of reflection through the |
---|
| 251 | center */ |
---|
| 252 | |
---|
| 253 | for(i=0;i<entries;i++){ |
---|
| 254 | float *ppi=_Nnow(entryindex[i]); |
---|
| 255 | float ref_best=0.f; |
---|
| 256 | float ref_j=-1; |
---|
| 257 | float this; |
---|
| 258 | spinnit(spinbuf,entries-i); |
---|
| 259 | |
---|
| 260 | for(k=0;k<dim;k++) |
---|
| 261 | q[k]=2*p[k]-ppi[k]; |
---|
| 262 | |
---|
| 263 | for(j=0;j<entries;j++){ |
---|
| 264 | if(j!=i){ |
---|
| 265 | float this=_Ndist(dim,q,_Nnow(entryindex[j])); |
---|
| 266 | if(ref_j==-1 || this<=ref_best){ /* <=, not <; very important */ |
---|
| 267 | ref_best=this; |
---|
| 268 | ref_j=entryindex[j]; |
---|
| 269 | } |
---|
| 270 | } |
---|
| 271 | } |
---|
| 272 | |
---|
| 273 | vqsp_count(b->valuelist,pointlist,dim, |
---|
| 274 | membership,reventry, |
---|
| 275 | entryindex,entries, |
---|
| 276 | pointindex,points,0, |
---|
| 277 | entryA,entryB, |
---|
| 278 | entryindex[i],ref_j, |
---|
| 279 | &entriesA,&entriesB,&entriesC); |
---|
| 280 | this=(entriesA-entriesC)*(entriesB-entriesC); |
---|
| 281 | |
---|
| 282 | /* when choosing best, we also want some form of stability to |
---|
| 283 | make sure more branches are pared later; secondary |
---|
| 284 | weighting isn;t needed as the entry lists are in ascending |
---|
| 285 | order, and we always try p/q in the same sequence */ |
---|
| 286 | |
---|
| 287 | if( (besti==-1) || |
---|
| 288 | (this>best) ){ |
---|
| 289 | |
---|
| 290 | best=this; |
---|
| 291 | besti=entryindex[i]; |
---|
| 292 | bestj=ref_j; |
---|
| 293 | |
---|
| 294 | } |
---|
| 295 | } |
---|
| 296 | if(besti>bestj){ |
---|
| 297 | long temp=besti; |
---|
| 298 | besti=bestj; |
---|
| 299 | bestj=temp; |
---|
| 300 | } |
---|
| 301 | |
---|
| 302 | } |
---|
| 303 | |
---|
| 304 | /* find cells enclosing points */ |
---|
| 305 | /* count A/B points */ |
---|
| 306 | |
---|
| 307 | pointsA=vqsp_count(b->valuelist,pointlist,dim, |
---|
| 308 | membership,reventry, |
---|
| 309 | entryindex,entries, |
---|
| 310 | pointindex,points,1, |
---|
| 311 | entryA,entryB, |
---|
| 312 | besti,bestj, |
---|
| 313 | &entriesA,&entriesB,&entriesC); |
---|
| 314 | |
---|
| 315 | /* fprintf(stderr,"split: total=%ld depth=%ld set A=%ld:%ld:%ld=B\n", |
---|
| 316 | entries,depth,entriesA-entriesC,entriesC,entriesB-entriesC);*/ |
---|
| 317 | { |
---|
| 318 | long thisaux=t->aux++; |
---|
| 319 | if(t->aux>=t->alloc){ |
---|
| 320 | t->alloc*=2; |
---|
| 321 | t->ptr0=_ogg_realloc(t->ptr0,sizeof(long)*t->alloc); |
---|
| 322 | t->ptr1=_ogg_realloc(t->ptr1,sizeof(long)*t->alloc); |
---|
| 323 | t->p=_ogg_realloc(t->p,sizeof(long)*t->alloc); |
---|
| 324 | t->q=_ogg_realloc(t->q,sizeof(long)*t->alloc); |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | t->p[thisaux]=besti; |
---|
| 328 | t->q[thisaux]=bestj; |
---|
| 329 | |
---|
| 330 | if(entriesA==1){ |
---|
| 331 | ret=1; |
---|
| 332 | t->ptr0[thisaux]=entryA[0]; |
---|
| 333 | *pointsofar+=pointsA; |
---|
| 334 | }else{ |
---|
| 335 | t->ptr0[thisaux]= -t->aux; |
---|
| 336 | ret=lp_split(pointlist,totalpoints,b,entryA,entriesA,pointindex,pointsA, |
---|
| 337 | membership,reventry,depth+1,pointsofar); |
---|
| 338 | } |
---|
| 339 | if(entriesB==1){ |
---|
| 340 | ret++; |
---|
| 341 | t->ptr1[thisaux]=entryB[0]; |
---|
| 342 | *pointsofar+=points-pointsA; |
---|
| 343 | }else{ |
---|
| 344 | t->ptr1[thisaux]= -t->aux; |
---|
| 345 | ret+=lp_split(pointlist,totalpoints,b,entryB,entriesB,pointindex+pointsA, |
---|
| 346 | points-pointsA,membership,reventry, |
---|
| 347 | depth+1,pointsofar); |
---|
| 348 | } |
---|
| 349 | } |
---|
| 350 | free(entryA); |
---|
| 351 | free(entryB); |
---|
| 352 | return(ret); |
---|
| 353 | } |
---|
| 354 | |
---|
| 355 | static int _node_eq(encode_aux_nearestmatch *v, long a, long b){ |
---|
| 356 | long Aptr0=v->ptr0[a]; |
---|
| 357 | long Aptr1=v->ptr1[a]; |
---|
| 358 | long Bptr0=v->ptr0[b]; |
---|
| 359 | long Bptr1=v->ptr1[b]; |
---|
| 360 | |
---|
| 361 | /* the possibility of choosing the same p and q, but switched, can;t |
---|
| 362 | happen because we always look for the best p/q in the same search |
---|
| 363 | order and the search is stable */ |
---|
| 364 | |
---|
| 365 | if(Aptr0==Bptr0 && Aptr1==Bptr1) |
---|
| 366 | return(1); |
---|
| 367 | |
---|
| 368 | return(0); |
---|
| 369 | } |
---|
| 370 | |
---|
| 371 | void vqsp_book(vqgen *v, codebook *b, long *quantlist){ |
---|
| 372 | long i,j; |
---|
| 373 | static_codebook *c=(static_codebook *)b->c; |
---|
| 374 | encode_aux_nearestmatch *t; |
---|
| 375 | |
---|
| 376 | memset(b,0,sizeof(codebook)); |
---|
| 377 | memset(c,0,sizeof(static_codebook)); |
---|
| 378 | b->c=c; |
---|
| 379 | t=c->nearest_tree=_ogg_calloc(1,sizeof(encode_aux_nearestmatch)); |
---|
| 380 | c->maptype=2; |
---|
| 381 | |
---|
| 382 | /* make sure there are no duplicate entries and that every |
---|
| 383 | entry has points */ |
---|
| 384 | |
---|
| 385 | for(i=0;i<v->entries;){ |
---|
| 386 | /* duplicate? if so, eliminate */ |
---|
| 387 | for(j=0;j<i;j++){ |
---|
| 388 | if(_Ndist(v->elements,_now(v,i),_now(v,j))==0.f){ |
---|
| 389 | fprintf(stderr,"found a duplicate entry! removing...\n"); |
---|
| 390 | v->entries--; |
---|
| 391 | memcpy(_now(v,i),_now(v,v->entries),sizeof(float)*v->elements); |
---|
| 392 | memcpy(quantlist+i*v->elements,quantlist+v->entries*v->elements, |
---|
| 393 | sizeof(long)*v->elements); |
---|
| 394 | break; |
---|
| 395 | } |
---|
| 396 | } |
---|
| 397 | if(j==i)i++; |
---|
| 398 | } |
---|
| 399 | |
---|
| 400 | { |
---|
| 401 | v->assigned=_ogg_calloc(v->entries,sizeof(long)); |
---|
| 402 | for(i=0;i<v->points;i++){ |
---|
| 403 | float *ppt=_point(v,i); |
---|
| 404 | float firstmetric=_Ndist(v->elements,_now(v,0),ppt); |
---|
| 405 | long firstentry=0; |
---|
| 406 | |
---|
| 407 | if(!(i&0xff))spinnit("checking... ",v->points-i); |
---|
| 408 | |
---|
| 409 | for(j=0;j<v->entries;j++){ |
---|
| 410 | float thismetric=_Ndist(v->elements,_now(v,j),ppt); |
---|
| 411 | if(thismetric<firstmetric){ |
---|
| 412 | firstmetric=thismetric; |
---|
| 413 | firstentry=j; |
---|
| 414 | } |
---|
| 415 | } |
---|
| 416 | |
---|
| 417 | v->assigned[firstentry]++; |
---|
| 418 | } |
---|
| 419 | |
---|
| 420 | for(j=0;j<v->entries;){ |
---|
| 421 | if(v->assigned[j]==0){ |
---|
| 422 | fprintf(stderr,"found an unused entry! removing...\n"); |
---|
| 423 | v->entries--; |
---|
| 424 | memcpy(_now(v,j),_now(v,v->entries),sizeof(float)*v->elements); |
---|
| 425 | v->assigned[j]=v->assigned[v->elements]; |
---|
| 426 | memcpy(quantlist+j*v->elements,quantlist+v->entries*v->elements, |
---|
| 427 | sizeof(long)*v->elements); |
---|
| 428 | continue; |
---|
| 429 | } |
---|
| 430 | j++; |
---|
| 431 | } |
---|
| 432 | } |
---|
| 433 | |
---|
| 434 | fprintf(stderr,"Building a book with %ld unique entries...\n",v->entries); |
---|
| 435 | |
---|
| 436 | { |
---|
| 437 | long *entryindex=_ogg_malloc(v->entries*sizeof(long *)); |
---|
| 438 | long *pointindex=_ogg_malloc(v->points*sizeof(long)); |
---|
| 439 | long *membership=_ogg_malloc(v->points*sizeof(long)); |
---|
| 440 | long *reventry=_ogg_malloc(v->entries*sizeof(long)); |
---|
| 441 | long pointssofar=0; |
---|
| 442 | |
---|
| 443 | for(i=0;i<v->entries;i++)entryindex[i]=i; |
---|
| 444 | for(i=0;i<v->points;i++)pointindex[i]=i; |
---|
| 445 | |
---|
| 446 | t->alloc=4096; |
---|
| 447 | t->ptr0=_ogg_malloc(sizeof(long)*t->alloc); |
---|
| 448 | t->ptr1=_ogg_malloc(sizeof(long)*t->alloc); |
---|
| 449 | t->p=_ogg_malloc(sizeof(long)*t->alloc); |
---|
| 450 | t->q=_ogg_malloc(sizeof(long)*t->alloc); |
---|
| 451 | t->aux=0; |
---|
| 452 | c->dim=v->elements; |
---|
| 453 | c->entries=v->entries; |
---|
| 454 | c->lengthlist=_ogg_calloc(c->entries,sizeof(long)); |
---|
| 455 | b->valuelist=v->entrylist; /* temporary; replaced later */ |
---|
| 456 | b->dim=c->dim; |
---|
| 457 | b->entries=c->entries; |
---|
| 458 | |
---|
| 459 | for(i=0;i<v->points;i++)membership[i]=-1; |
---|
| 460 | for(i=0;i<v->points;i++){ |
---|
| 461 | float *ppt=_point(v,i); |
---|
| 462 | long firstentry=0; |
---|
| 463 | float firstmetric=_Ndist(v->elements,_now(v,0),ppt); |
---|
| 464 | |
---|
| 465 | if(!(i&0xff))spinnit("assigning... ",v->points-i); |
---|
| 466 | |
---|
| 467 | for(j=1;j<v->entries;j++){ |
---|
| 468 | if(v->assigned[j]!=-1){ |
---|
| 469 | float thismetric=_Ndist(v->elements,_now(v,j),ppt); |
---|
| 470 | if(thismetric<=firstmetric){ |
---|
| 471 | firstmetric=thismetric; |
---|
| 472 | firstentry=j; |
---|
| 473 | } |
---|
| 474 | } |
---|
| 475 | } |
---|
| 476 | |
---|
| 477 | membership[i]=firstentry; |
---|
| 478 | } |
---|
| 479 | |
---|
| 480 | fprintf(stderr,"Leaves added: %d \n", |
---|
| 481 | lp_split(v->pointlist,v->points, |
---|
| 482 | b,entryindex,v->entries, |
---|
| 483 | pointindex,v->points, |
---|
| 484 | membership,reventry, |
---|
| 485 | 0,&pointssofar)); |
---|
| 486 | |
---|
| 487 | free(pointindex); |
---|
| 488 | free(membership); |
---|
| 489 | free(reventry); |
---|
| 490 | |
---|
| 491 | fprintf(stderr,"Paring/rerouting redundant branches... "); |
---|
| 492 | |
---|
| 493 | /* The tree is likely big and redundant. Pare and reroute branches */ |
---|
| 494 | { |
---|
| 495 | int changedflag=1; |
---|
| 496 | |
---|
| 497 | while(changedflag){ |
---|
| 498 | changedflag=0; |
---|
| 499 | |
---|
| 500 | /* span the tree node by node; list unique decision nodes and |
---|
| 501 | short circuit redundant branches */ |
---|
| 502 | |
---|
| 503 | for(i=0;i<t->aux;){ |
---|
| 504 | int k; |
---|
| 505 | |
---|
| 506 | /* check list of unique decisions */ |
---|
| 507 | for(j=0;j<i;j++) |
---|
| 508 | if(_node_eq(t,i,j))break; |
---|
| 509 | |
---|
| 510 | if(j<i){ |
---|
| 511 | /* a redundant entry; find all higher nodes referencing it and |
---|
| 512 | short circuit them to the previously noted unique entry */ |
---|
| 513 | changedflag=1; |
---|
| 514 | for(k=0;k<t->aux;k++){ |
---|
| 515 | if(t->ptr0[k]==-i)t->ptr0[k]=-j; |
---|
| 516 | if(t->ptr1[k]==-i)t->ptr1[k]=-j; |
---|
| 517 | } |
---|
| 518 | |
---|
| 519 | /* Now, we need to fill in the hole from this redundant |
---|
| 520 | entry in the listing. Insert the last entry in the list. |
---|
| 521 | Fix the forward pointers to that last entry */ |
---|
| 522 | t->aux--; |
---|
| 523 | t->ptr0[i]=t->ptr0[t->aux]; |
---|
| 524 | t->ptr1[i]=t->ptr1[t->aux]; |
---|
| 525 | t->p[i]=t->p[t->aux]; |
---|
| 526 | t->q[i]=t->q[t->aux]; |
---|
| 527 | for(k=0;k<t->aux;k++){ |
---|
| 528 | if(t->ptr0[k]==-t->aux)t->ptr0[k]=-i; |
---|
| 529 | if(t->ptr1[k]==-t->aux)t->ptr1[k]=-i; |
---|
| 530 | } |
---|
| 531 | /* hole plugged */ |
---|
| 532 | |
---|
| 533 | }else |
---|
| 534 | i++; |
---|
| 535 | } |
---|
| 536 | |
---|
| 537 | fprintf(stderr,"\rParing/rerouting redundant branches... " |
---|
| 538 | "%ld remaining ",t->aux); |
---|
| 539 | } |
---|
| 540 | fprintf(stderr,"\n"); |
---|
| 541 | } |
---|
| 542 | } |
---|
| 543 | |
---|
| 544 | /* run all training points through the decision tree to get a final |
---|
| 545 | probability count */ |
---|
| 546 | { |
---|
| 547 | long *probability=_ogg_malloc(c->entries*sizeof(long)); |
---|
| 548 | for(i=0;i<c->entries;i++)probability[i]=1; /* trivial guard */ |
---|
| 549 | b->dim=c->dim; |
---|
| 550 | |
---|
| 551 | /* sigh. A necessary hack */ |
---|
| 552 | for(i=0;i<t->aux;i++)t->p[i]*=c->dim; |
---|
| 553 | for(i=0;i<t->aux;i++)t->q[i]*=c->dim; |
---|
| 554 | |
---|
| 555 | for(i=0;i<v->points;i++){ |
---|
| 556 | /* we use the linear matcher regardless becuase the trainer |
---|
| 557 | doesn't convert log to linear */ |
---|
| 558 | int ret=_best(b,v->pointlist+i*v->elements,1); |
---|
| 559 | probability[ret]++; |
---|
| 560 | if(!(i&0xff))spinnit("counting hits... ",v->points-i); |
---|
| 561 | } |
---|
| 562 | for(i=0;i<t->aux;i++)t->p[i]/=c->dim; |
---|
| 563 | for(i=0;i<t->aux;i++)t->q[i]/=c->dim; |
---|
| 564 | |
---|
| 565 | build_tree_from_lengths(c->entries,probability,c->lengthlist); |
---|
| 566 | |
---|
| 567 | free(probability); |
---|
| 568 | } |
---|
| 569 | |
---|
| 570 | /* Sort the entries by codeword length, short to long (eases |
---|
| 571 | assignment and packing to do it now) */ |
---|
| 572 | { |
---|
| 573 | long *wordlen=c->lengthlist; |
---|
| 574 | long *index=_ogg_malloc(c->entries*sizeof(long)); |
---|
| 575 | long *revindex=_ogg_malloc(c->entries*sizeof(long)); |
---|
| 576 | int k; |
---|
| 577 | for(i=0;i<c->entries;i++)index[i]=i; |
---|
| 578 | isortvals=c->lengthlist; |
---|
| 579 | qsort(index,c->entries,sizeof(long),iascsort); |
---|
| 580 | |
---|
| 581 | /* rearrange storage; ptr0/1 first as it needs a reverse index */ |
---|
| 582 | /* n and c stay unchanged */ |
---|
| 583 | for(i=0;i<c->entries;i++)revindex[index[i]]=i; |
---|
| 584 | for(i=0;i<t->aux;i++){ |
---|
| 585 | if(!(i&0x3f))spinnit("sorting... ",t->aux-i); |
---|
| 586 | |
---|
| 587 | if(t->ptr0[i]>=0)t->ptr0[i]=revindex[t->ptr0[i]]; |
---|
| 588 | if(t->ptr1[i]>=0)t->ptr1[i]=revindex[t->ptr1[i]]; |
---|
| 589 | t->p[i]=revindex[t->p[i]]; |
---|
| 590 | t->q[i]=revindex[t->q[i]]; |
---|
| 591 | } |
---|
| 592 | free(revindex); |
---|
| 593 | |
---|
| 594 | /* map lengthlist and vallist with index */ |
---|
| 595 | c->lengthlist=_ogg_calloc(c->entries,sizeof(long)); |
---|
| 596 | b->valuelist=_ogg_malloc(sizeof(float)*c->entries*c->dim); |
---|
| 597 | c->quantlist=_ogg_malloc(sizeof(long)*c->entries*c->dim); |
---|
| 598 | for(i=0;i<c->entries;i++){ |
---|
| 599 | long e=index[i]; |
---|
| 600 | for(k=0;k<c->dim;k++){ |
---|
| 601 | b->valuelist[i*c->dim+k]=v->entrylist[e*c->dim+k]; |
---|
| 602 | c->quantlist[i*c->dim+k]=quantlist[e*c->dim+k]; |
---|
| 603 | } |
---|
| 604 | c->lengthlist[i]=wordlen[e]; |
---|
| 605 | } |
---|
| 606 | |
---|
| 607 | free(wordlen); |
---|
| 608 | } |
---|
| 609 | |
---|
| 610 | fprintf(stderr,"Done. \n\n"); |
---|
| 611 | } |
---|
| 612 | |
---|