1 | /******************************************************************** |
---|
2 | * * |
---|
3 | * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * |
---|
4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
---|
5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
---|
6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
---|
7 | * * |
---|
8 | * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * |
---|
9 | * by the Xiph.Org Foundation http://www.xiph.org/ * |
---|
10 | * * |
---|
11 | ******************************************************************** |
---|
12 | |
---|
13 | function: utility for finding the distribution in a data set |
---|
14 | last mod: $Id: distribution.c 13293 2007-07-24 00:09:47Z xiphmont $ |
---|
15 | |
---|
16 | ********************************************************************/ |
---|
17 | |
---|
18 | #include <stdlib.h> |
---|
19 | #include <stdio.h> |
---|
20 | #include <math.h> |
---|
21 | #include <string.h> |
---|
22 | #include <errno.h> |
---|
23 | #include "bookutil.h" |
---|
24 | |
---|
25 | /* command line: |
---|
26 | distribution file.vqd |
---|
27 | */ |
---|
28 | |
---|
29 | int ascend(const void *a,const void *b){ |
---|
30 | return(**((long **)a)-**((long **)b)); |
---|
31 | } |
---|
32 | |
---|
33 | int main(int argc,char *argv[]){ |
---|
34 | FILE *in; |
---|
35 | long lines=0; |
---|
36 | float min; |
---|
37 | float max; |
---|
38 | long bins=-1; |
---|
39 | int flag=0; |
---|
40 | long *countarray; |
---|
41 | long total=0; |
---|
42 | char *line; |
---|
43 | |
---|
44 | if(argv[1]==NULL){ |
---|
45 | fprintf(stderr,"Usage: distribution {data.vqd [bins]| book.vqh} \n\n"); |
---|
46 | exit(1); |
---|
47 | } |
---|
48 | if(argv[2]!=NULL) |
---|
49 | bins=atoi(argv[2])-1; |
---|
50 | |
---|
51 | in=fopen(argv[1],"r"); |
---|
52 | if(!in){ |
---|
53 | fprintf(stderr,"Could not open input file %s\n",argv[1]); |
---|
54 | exit(1); |
---|
55 | } |
---|
56 | |
---|
57 | if(strrchr(argv[1],'.') && strcmp(strrchr(argv[1],'.'),".vqh")==0){ |
---|
58 | /* load/decode a book */ |
---|
59 | |
---|
60 | codebook *b=codebook_load(argv[1]); |
---|
61 | static_codebook *c=(static_codebook *)(b->c); |
---|
62 | float delta; |
---|
63 | int i; |
---|
64 | fclose(in); |
---|
65 | |
---|
66 | switch(c->maptype){ |
---|
67 | case 0: |
---|
68 | printf("entropy codebook only; no mappings\n"); |
---|
69 | exit(0); |
---|
70 | break; |
---|
71 | case 1: |
---|
72 | bins=_book_maptype1_quantvals(c); |
---|
73 | break; |
---|
74 | case 2: |
---|
75 | bins=c->entries*c->dim; |
---|
76 | break; |
---|
77 | } |
---|
78 | |
---|
79 | max=min=_float32_unpack(c->q_min); |
---|
80 | delta=_float32_unpack(c->q_delta); |
---|
81 | |
---|
82 | for(i=0;i<bins;i++){ |
---|
83 | float val=c->quantlist[i]*delta+min; |
---|
84 | if(val>max)max=val; |
---|
85 | } |
---|
86 | |
---|
87 | printf("Minimum scalar value: %f\n",min); |
---|
88 | printf("Maximum scalar value: %f\n",max); |
---|
89 | |
---|
90 | switch(c->maptype){ |
---|
91 | case 1: |
---|
92 | { |
---|
93 | /* lattice codebook. dump it. */ |
---|
94 | int j,k; |
---|
95 | long maxcount=0; |
---|
96 | long **sort=calloc(bins,sizeof(long *)); |
---|
97 | long base=c->lengthlist[0]; |
---|
98 | countarray=calloc(bins,sizeof(long)); |
---|
99 | |
---|
100 | for(i=0;i<bins;i++)sort[i]=c->quantlist+i; |
---|
101 | qsort(sort,bins,sizeof(long *),ascend); |
---|
102 | |
---|
103 | for(i=0;i<b->entries;i++) |
---|
104 | if(c->lengthlist[i]>base)base=c->lengthlist[i]; |
---|
105 | |
---|
106 | /* dump a full, correlated count */ |
---|
107 | for(j=0;j<b->entries;j++){ |
---|
108 | if(c->lengthlist[j]){ |
---|
109 | int indexdiv=1; |
---|
110 | printf("%4d: ",j); |
---|
111 | for(k=0;k<b->dim;k++){ |
---|
112 | int index= (j/indexdiv)%bins; |
---|
113 | printf("%+3.1f,", c->quantlist[index]*_float32_unpack(c->q_delta)+ |
---|
114 | _float32_unpack(c->q_min)); |
---|
115 | indexdiv*=bins; |
---|
116 | } |
---|
117 | printf("\t|"); |
---|
118 | for(k=0;k<base-c->lengthlist[j];k++)printf("*"); |
---|
119 | printf("\n"); |
---|
120 | } |
---|
121 | } |
---|
122 | |
---|
123 | /* do a rough count */ |
---|
124 | for(j=0;j<b->entries;j++){ |
---|
125 | int indexdiv=1; |
---|
126 | for(k=0;k<b->dim;k++){ |
---|
127 | if(c->lengthlist[j]){ |
---|
128 | int index= (j/indexdiv)%bins; |
---|
129 | countarray[index]+=(1<<(base-c->lengthlist[j])); |
---|
130 | indexdiv*=bins; |
---|
131 | } |
---|
132 | } |
---|
133 | } |
---|
134 | |
---|
135 | /* dump the count */ |
---|
136 | |
---|
137 | { |
---|
138 | long maxcount=0,i,j; |
---|
139 | for(i=0;i<bins;i++) |
---|
140 | if(countarray[i]>maxcount)maxcount=countarray[i]; |
---|
141 | |
---|
142 | for(i=0;i<bins;i++){ |
---|
143 | int ptr=sort[i]-c->quantlist; |
---|
144 | int stars=rint(50./maxcount*countarray[ptr]); |
---|
145 | printf("%+08f (%8ld) |",c->quantlist[ptr]*delta+min,countarray[ptr]); |
---|
146 | for(j=0;j<stars;j++)printf("*"); |
---|
147 | printf("\n"); |
---|
148 | } |
---|
149 | } |
---|
150 | } |
---|
151 | break; |
---|
152 | case 2: |
---|
153 | { |
---|
154 | /* trained, full mapping codebook. */ |
---|
155 | printf("Can't do probability dump of a trained [type 2] codebook (yet)\n"); |
---|
156 | } |
---|
157 | break; |
---|
158 | } |
---|
159 | }else{ |
---|
160 | /* load/count a data file */ |
---|
161 | |
---|
162 | /* do it the simple way; two pass. */ |
---|
163 | line=setup_line(in); |
---|
164 | while(line){ |
---|
165 | float code; |
---|
166 | char buf[80]; |
---|
167 | lines++; |
---|
168 | |
---|
169 | sprintf(buf,"getting min/max (%.2f::%.2f). lines...",min,max); |
---|
170 | if(!(lines&0xff))spinnit(buf,lines); |
---|
171 | |
---|
172 | while(!flag && sscanf(line,"%f",&code)==1){ |
---|
173 | line=strchr(line,','); |
---|
174 | min=max=code; |
---|
175 | flag=1; |
---|
176 | } |
---|
177 | |
---|
178 | while(line && sscanf(line,"%f",&code)==1){ |
---|
179 | line=strchr(line,','); |
---|
180 | if(line)line++; |
---|
181 | if(code<min)min=code; |
---|
182 | if(code>max)max=code; |
---|
183 | } |
---|
184 | |
---|
185 | line=setup_line(in); |
---|
186 | } |
---|
187 | |
---|
188 | if(bins<1){ |
---|
189 | if((int)(max-min)==min-max){ |
---|
190 | bins=max-min; |
---|
191 | }else{ |
---|
192 | bins=25; |
---|
193 | } |
---|
194 | } |
---|
195 | |
---|
196 | printf("\r \r"); |
---|
197 | printf("Minimum scalar value: %f\n",min); |
---|
198 | printf("Maximum scalar value: %f\n",max); |
---|
199 | |
---|
200 | if(argv[2]){ |
---|
201 | |
---|
202 | printf("\n counting hits into %ld bins...\n",bins+1); |
---|
203 | countarray=calloc(bins+1,sizeof(long)); |
---|
204 | |
---|
205 | rewind(in); |
---|
206 | line=setup_line(in); |
---|
207 | while(line){ |
---|
208 | float code; |
---|
209 | lines--; |
---|
210 | if(!(lines&0xff))spinnit("counting distribution. lines so far...",lines); |
---|
211 | |
---|
212 | while(line && sscanf(line,"%f",&code)==1){ |
---|
213 | line=strchr(line,','); |
---|
214 | if(line)line++; |
---|
215 | |
---|
216 | code-=min; |
---|
217 | code/=(max-min); |
---|
218 | code*=bins; |
---|
219 | countarray[(int)rint(code)]++; |
---|
220 | total++; |
---|
221 | } |
---|
222 | |
---|
223 | line=setup_line(in); |
---|
224 | } |
---|
225 | |
---|
226 | /* make a pretty graph */ |
---|
227 | { |
---|
228 | long maxcount=0,i,j; |
---|
229 | for(i=0;i<bins+1;i++) |
---|
230 | if(countarray[i]>maxcount)maxcount=countarray[i]; |
---|
231 | |
---|
232 | printf("\r \r"); |
---|
233 | printf("Total scalars: %ld\n",total); |
---|
234 | for(i=0;i<bins+1;i++){ |
---|
235 | int stars=rint(50./maxcount*countarray[i]); |
---|
236 | printf("%08f (%8ld) |",(max-min)/bins*i+min,countarray[i]); |
---|
237 | for(j=0;j<stars;j++)printf("*"); |
---|
238 | printf("\n"); |
---|
239 | } |
---|
240 | } |
---|
241 | } |
---|
242 | |
---|
243 | fclose(in); |
---|
244 | |
---|
245 | } |
---|
246 | printf("\nDone.\n"); |
---|
247 | exit(0); |
---|
248 | } |
---|