package joins; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.Random; public class Iterator { int size; ArrayList data; Random r = new Random(); Iterator(int range, int size){ this.size = size; this.data = new ArrayList(); for (int i = 0; i < size; i ++) { data.add((int)(r.nextDouble()*range)); } } Iterator(ArrayList data){ this.data = data; this.size = data.size(); } public int getSize() { return size; } public int getItem(int i) { return data.get(i); } public ArrayList getData(){ return data; } public Iterator nestedLoopsJoin(Iterator other){ ArrayList result = new ArrayList(); for (int i = 0; i < size; i ++) { for (int j = 0; j < other.getSize(); j ++) { if (data.get(i) == other.getItem(j)) { result.add(data.get(i)); } } } return new Iterator(result); } static void run1NestedLoopsJoin(Iterator it1, Iterator it2) { long start = System.currentTimeMillis(); Iterator resJoin = it1.nestedLoopsJoin(it2); long stop = System.currentTimeMillis(); System.out.println((stop - start) + " ms, " + resJoin.getSize() + " results"); } static void run2NestedLoopsJoins(Iterator it1, Iterator it2, Iterator it3) { long start = System.currentTimeMillis(); Iterator resJoin = it1.nestedLoopsJoin(it2).nestedLoopsJoin(it3); long stop = System.currentTimeMillis(); System.out.println((stop - start) + " ms, " + resJoin.getSize() + " results"); } public Iterator hashJoin(Iterator other) { Iterator inner, outer; if (this.size > other.getSize()) { outer = this; inner = other; } else { outer = other; inner = this; } HashMap hashTable = new HashMap(); fillHashTable(hashTable, inner); ArrayList result = new ArrayList(); for (int i = 0; i < outer.getSize(); i ++) { Integer countI = hashTable.get(outer.getItem(i)); if (countI != null) { for (int j = 1; j <= countI; j ++) { result.add(outer.getItem(i)); } } } return new Iterator(result); } private void fillHashTable(HashMap hashTable, Iterator iter) { for (int i = 0; i < iter.getSize(); i ++) { Integer countI = hashTable.get(iter.getItem(i)); if (countI == null) { countI = new Integer(1); } else { countI = new Integer(countI + 1); } hashTable.put(iter.getItem(i), countI); } } static void run1HashJoin(Iterator it1, Iterator it2) { long start = System.currentTimeMillis(); Iterator resJoin = it1.hashJoin(it2); long stop = System.currentTimeMillis(); System.out.println((stop - start) + " ms, " + resJoin.getSize() + " results"); } static void run2HashJoins(Iterator it1, Iterator it2, Iterator it3) { long start = System.currentTimeMillis(); Iterator resJoin = it1.hashJoin(it2).hashJoin(it3); long stop = System.currentTimeMillis(); System.out.println((stop - start) + " ms, " + resJoin.getSize() + " results"); } public static void main(String[] argv) throws IOException { Iterator it1 = new Iterator(4000, 1000000); Iterator it2 = new Iterator(50, 1000); Iterator it3 = new Iterator(50, 1000); System.out.println("\nNestedLoops(it1, it2)"); run1NestedLoopsJoin(it1, it2); System.out.println("\nNestedLoops(NestedLoops(it1, it2), it3): "); run2NestedLoopsJoins(it1, it2, it3); System.out.println("\nNestedLoops(NestedLoops(it3, it2), it1): "); run2NestedLoopsJoins(it3, it2, it1); System.out.println("\nHashJoin(it1, it2)"); run1HashJoin(it1, it2); System.out.println("\nHashJoin(HashJoin(it1, it2), it3): "); run2HashJoins(it1, it2, it3); // I had the plan to also dump these into files and let Postgres do the // join, for comparison, but I didn't code that yet. // it1.dump("/Users/ioanamanolescu/data/it1.csv"); // it2.dump("/Users/ioanamanolescu/data/it2.csv"); // it3.dump("/Users/ioanamanolescu/data/it3.csv"); } public void dump(String fileName) throws IOException { OutputStream os = new FileOutputStream(new File(fileName)); for (Integer i: data) { os.write(i); os.write('\n'); } os.close(); } }