1 | /* ============================================================= |
2 | * SmallSQL : a free Java DBMS library for the Java(tm) platform |
3 | * ============================================================= |
4 | * |
5 | * (C) Copyright 2004-2006, by Volker Berlin. |
6 | * |
7 | * Project Info: http://www.smallsql.de/ |
8 | * |
9 | * This library is free software; you can redistribute it and/or modify it |
10 | * under the terms of the GNU Lesser General Public License as published by |
11 | * the Free Software Foundation; either version 2.1 of the License, or |
12 | * (at your option) any later version. |
13 | * |
14 | * This library is distributed in the hope that it will be useful, but |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
16 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
17 | * License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with this library; if not, write to the Free Software |
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, |
22 | * USA. |
23 | * |
24 | * [Java is a trademark or registered trademark of Sun Microsystems, Inc. |
25 | * in the United States and other countries.] |
26 | * |
27 | * --------------- |
28 | * Index.java |
29 | * --------------- |
30 | * Author: Volker Berlin |
31 | * |
32 | */ |
33 | package smallsql.database; |
34 | |
35 | import java.sql.SQLException; |
36 | import java.util.ArrayList; |
37 | |
38 | |
39 | /** |
40 | * To index data there need to solv the follow problems |
41 | * - change the values that need to save in the index to a value with sort order that is compatible |
42 | * with the index algorithmus. |
43 | * - multiple column index need to support. There should no identical save with combinations of values. |
44 | * - The data type for column should be const. |
45 | * - the data need to save fast. |
46 | * - the size of the index should be small (also with a small count of values) |
47 | * - It should use for unique index and nor unique. The unique index can save only one rowOffset. |
48 | * The non unique can save multiple rowOffsets in a LongTreeList. |
49 | * - Problem ORDER BY with Joins? There are more as one rowOffset per row. |
50 | * |
51 | * |
52 | * Algorithmus: |
53 | * - convert the values that the binary order is equals to the value order. We need to handle |
54 | * sign, floating numbers, case insensitive, different binary length (MutableNumeric). |
55 | * - create a 256 byte large mask for the first byte. |
56 | * - create a 256 byte large status mask |
57 | * - create a 256 large Object array |
58 | * |
59 | * |
60 | * @author Volker Berlin |
61 | * |
62 | */ |
63 | final class Index { |
64 | |
65 | /*static public void main(String args[]) throws Exception{ |
66 | Index index = new Index(false); |
67 | Expressions expressions = new Expressions(); |
68 | ExpressionValue value = new ExpressionValue(); |
69 | expressions.add(value); |
70 | value.set( "150", SQLTokenizer.VARCHAR); |
71 | index.addValues(1, expressions); |
72 | value.set( "15", SQLTokenizer.VARCHAR); |
73 | index.addValues(2, expressions); |
74 | print(index,expressions); |
75 | } |
76 | |
77 | static void print(Index index, Expressions expressions){ |
78 | index.reset(expressions); |
79 | long l; |
80 | while((l= index.getRowOffset(expressions, true)) >=0){ |
81 | System.out.println(l); |
82 | } |
83 | System.out.println("============================"); |
84 | }*/ |
85 | |
86 | private final IndexNode rootPage; |
87 | |
88 | /** |
89 | * Create a Index. A Index is like a sorted list. |
90 | * @param unique true if there are no duplicated values allow. |
91 | */ |
92 | Index(boolean unique){ |
93 | rootPage = new IndexNode(unique, (char)-1); |
94 | } |
95 | |
96 | |
97 | IndexScrollStatus createScrollStatus(Expressions expressions){ |
98 | return new IndexScrollStatus(rootPage, expressions); |
99 | } |
100 | |
101 | /** |
102 | * Returns a Long (unigue) or a LongTreeList with rowOffsets. If the value in expressions |
103 | * does not exist then it return a null. |
104 | * @param expressions The value that are search in the Index. |
105 | */ |
106 | final Object findRows( Expressions expressions, ArrayList nodeList ) throws Exception{ |
107 | IndexNode page = rootPage; |
108 | int count = expressions.size(); |
109 | for(int i=0; i<count; i++){ |
110 | page = findRows( page, expressions.get(i), nodeList); |
111 | if(page == null) |
112 | return null; |
113 | if(i+1 == count) |
114 | return page.getValue(); |
115 | else |
116 | page = (IndexNode)page.getValue(); |
117 | } |
118 | throw new Error(); |
119 | } |
120 | |
121 | |
122 | /** |
123 | * Returns a Long (unigue) or a LongTreeList with rowOffsets. If the value in expressions |
124 | * does not exist then it return a null. |
125 | * @param expressions The value that are search in the Index. |
126 | */ |
127 | final Object findRows( Expression[] expressions, ArrayList nodeList ) throws Exception{ |
128 | IndexNode page = rootPage; |
129 | int count = expressions.length; |
130 | for(int i=0; i<count; i++){ |
131 | page = findRows( page, expressions[i], nodeList); |
132 | if(page == null) |
133 | return null; |
134 | if(i+1 == count) |
135 | return page.getValue(); |
136 | else |
137 | page = (IndexNode)page.getValue(); |
138 | } |
139 | throw new Error(); |
140 | } |
141 | |
142 | |
143 | /** Return the last IndexNode for the expression */ |
144 | final private IndexNode findRows(IndexNode page, Expression expr, ArrayList nodeList) throws Exception{ |
145 | if(expr.isNull()){ |
146 | page = findNull(page); |
147 | }else{ |
148 | switch(expr.getDataType()){ |
149 | case SQLTokenizer.REAL: |
150 | page = find( page, floatToBinarySortOrder( expr.getFloat()), 2, nodeList ); |
151 | break; |
152 | case SQLTokenizer.DOUBLE: |
153 | case SQLTokenizer.FLOAT: |
154 | page = find( page, doubleToBinarySortOrder( expr.getDouble()), 4, nodeList ); |
155 | break; |
156 | case SQLTokenizer.TINYINT: |
157 | page = find( page, expr.getInt(), 1, nodeList ); |
158 | break; |
159 | case SQLTokenizer.SMALLINT: |
160 | page = find( page, shortToBinarySortOrder( expr.getInt()), 1, nodeList ); |
161 | break; |
162 | case SQLTokenizer.INT: |
163 | page = find( page, intToBinarySortOrder( expr.getInt()), 2, nodeList ); |
164 | break; |
165 | case SQLTokenizer.BIGINT: |
166 | case SQLTokenizer.DATE: |
167 | case SQLTokenizer.TIME: |
168 | case SQLTokenizer.TIMESTAMP: |
169 | case SQLTokenizer.SMALLDATETIME: |
170 | case SQLTokenizer.MONEY: |
171 | case SQLTokenizer.SMALLMONEY: |
172 | page = find( page, longToBinarySortOrder( expr.getLong()), 4, nodeList ); |
173 | break; |
174 | case SQLTokenizer.VARCHAR: |
175 | case SQLTokenizer.NVARCHAR: |
176 | case SQLTokenizer.LONGVARCHAR: |
177 | case SQLTokenizer.LONGNVARCHAR: |
178 | case SQLTokenizer.CLOB: |
179 | page = find( page, stringToBinarySortOrder( expr.getString(), false ), nodeList ); |
180 | break; |
181 | case SQLTokenizer.NCHAR: |
182 | case SQLTokenizer.CHAR: |
183 | page = find( page, stringToBinarySortOrder( expr.getString(), true ), nodeList ); |
184 | break; |
185 | case SQLTokenizer.VARBINARY: |
186 | case SQLTokenizer.BINARY: |
187 | case SQLTokenizer.LONGVARBINARY: |
188 | case SQLTokenizer.BLOB: |
189 | case SQLTokenizer.UNIQUEIDENTIFIER: |
190 | page = find( page, bytesToBinarySortOrder( expr.getBytes()), nodeList ); |
191 | break; |
192 | case SQLTokenizer.BIT: |
193 | case SQLTokenizer.BOOLEAN: |
194 | page = find( page, expr.getBoolean() ? 2 : 1, 1, nodeList ); |
195 | break; |
196 | case SQLTokenizer.NUMERIC: |
197 | case SQLTokenizer.DECIMAL: |
198 | page = find( page, numericToBinarySortOrder( expr.getNumeric() ), nodeList ); |
199 | break; |
200 | default: |
201 | //TODO weitere Datentypen |
202 | throw new Error(String.valueOf(expr.getDataType())); |
203 | } |
204 | } |
205 | return page; |
206 | } |
207 | |
208 | |
209 | /** |
210 | * Add a value to the index. |
211 | * @param rowOffset Is the value that is save in the index. It is typical a row number or a rowOffset. |
212 | * @param expressions This is the list of ORDER BY columns and descript the position in the index. |
213 | */ |
214 | final void addValues( long rowOffset, Expressions expressions ) throws Exception{ |
215 | IndexNode page = this.rootPage; |
216 | int count = expressions.size(); |
217 | for(int i=0; i<count; i++){ |
218 | Expression expr = expressions.get(i); |
219 | boolean isLastValues = (i == count-1); |
220 | if(expr.isNull()){ |
221 | page = addNull(page, rowOffset, isLastValues); |
222 | }else{ |
223 | switch(expr.getDataType()){ |
224 | case SQLTokenizer.REAL: |
225 | page = add( page, rowOffset, floatToBinarySortOrder( expr.getFloat()), isLastValues, 2 ); |
226 | break; |
227 | case SQLTokenizer.DOUBLE: |
228 | case SQLTokenizer.FLOAT: |
229 | page = add( page, rowOffset, doubleToBinarySortOrder( expr.getDouble()), isLastValues, 4 ); |
230 | break; |
231 | case SQLTokenizer.TINYINT: |
232 | page = add( page, rowOffset, expr.getInt(), isLastValues, 1 ); |
233 | break; |
234 | case SQLTokenizer.SMALLINT: |
235 | page = add( page, rowOffset, shortToBinarySortOrder( expr.getInt()), isLastValues, 1 ); |
236 | break; |
237 | case SQLTokenizer.INT: |
238 | page = add( page, rowOffset, intToBinarySortOrder( expr.getInt()), isLastValues, 2 ); |
239 | break; |
240 | case SQLTokenizer.BIGINT: |
241 | case SQLTokenizer.DATE: |
242 | case SQLTokenizer.TIME: |
243 | case SQLTokenizer.TIMESTAMP: |
244 | case SQLTokenizer.SMALLDATETIME: |
245 | case SQLTokenizer.MONEY: |
246 | case SQLTokenizer.SMALLMONEY: |
247 | page = add( page, rowOffset, longToBinarySortOrder( expr.getLong()), isLastValues, 4 ); |
248 | break; |
249 | case SQLTokenizer.VARCHAR: |
250 | case SQLTokenizer.NVARCHAR: |
251 | case SQLTokenizer.LONGVARCHAR: |
252 | case SQLTokenizer.LONGNVARCHAR: |
253 | page = add( page, rowOffset, stringToBinarySortOrder( expr.getString(), false ), isLastValues ); |
254 | break; |
255 | case SQLTokenizer.NCHAR: |
256 | case SQLTokenizer.CHAR: |
257 | page = add( page, rowOffset, stringToBinarySortOrder( expr.getString(), true ), isLastValues ); |
258 | break; |
259 | case SQLTokenizer.VARBINARY: |
260 | case SQLTokenizer.BINARY: |
261 | case SQLTokenizer.LONGVARBINARY: |
262 | case SQLTokenizer.BLOB: |
263 | case SQLTokenizer.UNIQUEIDENTIFIER: |
264 | page = add( page, rowOffset, bytesToBinarySortOrder( expr.getBytes()), isLastValues ); |
265 | break; |
266 | case SQLTokenizer.BIT: |
267 | case SQLTokenizer.BOOLEAN: |
268 | page = add( page, rowOffset, expr.getBoolean() ? 2 : 1, isLastValues, 1 ); |
269 | break; |
270 | case SQLTokenizer.NUMERIC: |
271 | case SQLTokenizer.DECIMAL: |
272 | page = add( page, rowOffset, numericToBinarySortOrder( expr.getNumeric()), isLastValues ); |
273 | break; |
274 | default: |
275 | //TODO weitere Datentypen |
276 | throw new Error(String.valueOf(expr.getDataType())); |
277 | } |
278 | } |
279 | } |
280 | } |
281 | |
282 | |
283 | final void removeValue( long rowOffset, Expressions expressions ) throws Exception{ |
284 | ArrayList nodeList = new ArrayList(); |
285 | Object obj = findRows(expressions, nodeList); |
286 | if(!rootPage.getUnique()){ |
287 | LongTreeList list = (LongTreeList)obj; |
288 | list.remove(rowOffset); |
289 | if(list.getSize() > 0) return; |
290 | } |
291 | IndexNode node = (IndexNode)nodeList.get(nodeList.size()-1);; |
292 | node.clearValue(); |
293 | for(int i = nodeList.size()-2; i >= 0; i--){ |
294 | if(!node.isEmpty()) |
295 | break; |
296 | IndexNode parent = (IndexNode)nodeList.get(i); |
297 | parent.removeNode( node.getDigit() ); |
298 | node = parent; |
299 | } |
300 | } |
301 | |
302 | |
303 | final private IndexNode findNull(IndexNode page) throws SQLException{ |
304 | return page.getChildNode( (char)0 ); |
305 | } |
306 | |
307 | |
308 | final private IndexNode addNull(IndexNode page, long rowOffset, boolean isLastValue) throws SQLException{ |
309 | if(isLastValue){ |
310 | page.addNode( (char)0, rowOffset ); |
311 | return null; |
312 | }else |
313 | return page.addRoot((char)0); |
314 | } |
315 | |
316 | |
317 | final private IndexNode find(IndexNode node, long key, int digitCount, ArrayList nodeList) throws SQLException{ |
318 | for(int i=digitCount-1; i>=0; i--){ |
319 | char digit = (char)(key >> (i<<4)); |
320 | node = node.getChildNode(digit); |
321 | |
322 | if(node == null) return null; |
323 | if(nodeList != null) nodeList.add(node); |
324 | |
325 | if(equals(node.getRemainderValue(), key, i)){ |
326 | return node; |
327 | } |
328 | } |
329 | return node; |
330 | } |
331 | |
332 | |
333 | /** |
334 | * The key has a binary sort order. This means the most significate byte is in the high byte. |
335 | * @param digitCount The count of 16Bit digits. |
336 | */ |
337 | final private IndexNode add(IndexNode node, long rowOffset, long key, boolean isLastValue, int digitCount) throws SQLException{ |
338 | for(int i=digitCount-1; i>=0; i--){ |
339 | char digit = (char)(key >> (i<<4)); |
340 | if(i == 0){ |
341 | if(isLastValue){ |
342 | node.addNode( digit, rowOffset ); |
343 | return null; |
344 | } |
345 | return node.addRoot(digit); |
346 | } |
347 | node = node.addNode(digit); |
348 | if(node.isEmpty()){ |
349 | if(isLastValue){ |
350 | node.addRemainderKey( rowOffset, key, i ); |
351 | return null; |
352 | } |
353 | return node.addRootValue( key, i); |
354 | }else |
355 | if(equals(node.getRemainderValue(), key, i)){ |
356 | if(isLastValue){ |
357 | node.saveValue( rowOffset); |
358 | return null; |
359 | } |
360 | return node.addRoot(); |
361 | } |
362 | } |
363 | throw new Error(); |
364 | } |
365 | |
366 | |
367 | final private IndexNode find(IndexNode node, char[] key, ArrayList nodeList) throws SQLException{ |
368 | int length = key.length; |
369 | int i=-1; |
370 | while(true){ |
371 | // the first digit include 0-null; 1-empty; 2 another value |
372 | char digit = (i<0) ? (length == 0 ? (char)1 : 2) |
373 | : (key[i]); |
374 | node = node.getChildNode(digit); |
375 | |
376 | if(node == null) return null; |
377 | if(nodeList != null) nodeList.add(node); |
378 | if(++i == length){ |
379 | return node; |
380 | } |
381 | |
382 | if(equals(node.getRemainderValue(), key, i)){ |
383 | return node; |
384 | } |
385 | } |
386 | } |
387 | |
388 | |
389 | /** |
390 | * Add a byte array to the Index. |
391 | */ |
392 | final private IndexNode add(IndexNode node, long rowOffset, char[] key, boolean isLast) throws SQLException{ |
393 | int length = key.length; |
394 | int i=-1; |
395 | while(true){ |
396 | // the first digit include 0-null; 1-empty; 2 another value |
397 | char digit = (i<0) ? (length == 0 ? (char)1 : 2) |
398 | : (key[i]); |
399 | if(++i == length){ |
400 | if(isLast){ |
401 | node.addNode( digit, rowOffset ); |
402 | return null; |
403 | } |
404 | return node.addRoot(digit); |
405 | } |
406 | node = node.addNode(digit); |
407 | if(node.isEmpty()){ |
408 | if(isLast){ |
409 | node.addRemainderKey( rowOffset, key, i ); |
410 | return null; |
411 | } |
412 | return node.addRootValue( key, i ); |
413 | }else |
414 | if(equals(node.getRemainderValue(), key, i)){ |
415 | if(isLast){ |
416 | node.saveValue(rowOffset); |
417 | return null; |
418 | } |
419 | return node.addRoot(); |
420 | } |
421 | } |
422 | } |
423 | |
424 | |
425 | /** |
426 | * Remove all enties |
427 | */ |
428 | final void clear(){ |
429 | rootPage.clear(); |
430 | } |
431 | /*================================================================ |
432 | * Normalize functions |
433 | * convert the value to a binary with identical sort order |
434 | * like the orginal values. |
435 | ================================================================*/ |
436 | |
437 | |
438 | final static private int floatToBinarySortOrder(float value){ |
439 | int intValue = Float.floatToIntBits(value); |
440 | return (intValue<0) ? |
441 | ~intValue : |
442 | intValue ^ 0x80000000; |
443 | } |
444 | |
445 | final static private long doubleToBinarySortOrder(double value){ |
446 | long intValue = Double.doubleToLongBits(value); |
447 | return (intValue<0) ? |
448 | ~intValue : |
449 | intValue ^ 0x8000000000000000L; |
450 | } |
451 | |
452 | final static private int shortToBinarySortOrder(int value){ |
453 | return value ^ 0x8000; |
454 | } |
455 | |
456 | final static private int intToBinarySortOrder(int value){ |
457 | return value ^ 0x80000000; |
458 | } |
459 | |
460 | final static private long longToBinarySortOrder(long value){ |
461 | return value ^ 0x8000000000000000L; |
462 | } |
463 | |
464 | |
465 | final static private char[] stringToBinarySortOrder(String value, boolean needTrim){ |
466 | int length = value.length(); |
467 | if(needTrim){ |
468 | while(length > 0 && value.charAt(length-1) == ' ') length--; |
469 | } |
470 | char[] puffer = new char[length]; |
471 | for(int i=0; i<length; i++){ |
472 | puffer[i] = Character.toLowerCase(Character.toUpperCase( value.charAt(i) )); |
473 | } |
474 | return puffer; |
475 | } |
476 | |
477 | |
478 | final static private char[] bytesToBinarySortOrder(byte[] value){ |
479 | int length = value.length; |
480 | char[] puffer = new char[length]; |
481 | for(int i=0; i<length; i++){ |
482 | puffer[i] = (char)(value[i] & 0xFF); |
483 | } |
484 | return puffer; |
485 | } |
486 | |
487 | |
488 | final static private char[] numericToBinarySortOrder(MutableNumeric numeric){ |
489 | int[] value = numeric.value; |
490 | int count = 1; |
491 | int i; |
492 | for(i=0; i<value.length; i++){ |
493 | if(value[i] != 0){ |
494 | count = 2*(value.length - i)+1; |
495 | break; |
496 | } |
497 | } |
498 | char[] puffer = new char[count]; |
499 | puffer[0] = (char)count; |
500 | for(int c=1; c<count;){ |
501 | puffer[c++] = (char)(value[i] >> 16); |
502 | puffer[c++] = (char)value[i++]; |
503 | } |
504 | return puffer; |
505 | } |
506 | |
507 | |
508 | /*================================================================ |
509 | * |
510 | * Functions for reading the index. |
511 | * |
512 | ================================================================*/ |
513 | |
514 | |
515 | |
516 | private final boolean equals(char[] src1, char[] src2, int offset2){ |
517 | if(src1 == null) return false; |
518 | int length = src1.length; |
519 | if(length != src2.length - offset2) return false; |
520 | for(int i=0; i<length; i++){ |
521 | if(src1[i] != src2[i+offset2]) return false; |
522 | } |
523 | return true; |
524 | } |
525 | |
526 | |
527 | private final boolean equals(char[] src1, long src2, int charCount){ |
528 | if(src1 == null) return false; |
529 | int length = src1.length; |
530 | if(length != charCount) return false; |
531 | for(int i=0, d = charCount-1; i<length; i++){ |
532 | if(src1[i] != (char)((src2 >> (d-- << 4)))) return false; |
533 | } |
534 | return true; |
535 | } |
536 | } |