001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019package org.apache.commons.net.nntp;
020
021/**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025 *
026 * @author rwinston <rwinston@checkfree.com>
027 *
028 */
029
030import java.util.HashMap;
031import java.util.Iterator;
032import java.util.List;
033
034public class Threader {
035    private ThreadContainer root;
036    private HashMap<String,ThreadContainer> idTable;
037    private int bogusIdCount = 0;
038
039    /**
040     * The client passes in a list of Threadable objects, and
041     * the Threader constructs a connected 'graph' of messages
042     * @param messages list of messages to thread
043     * @return null if messages == null or root.child == null
044     * @since 2.2
045     */
046    public Threadable thread(List<? extends Threadable> messages) {
047        return thread((Iterable<? extends Threadable>)messages);
048    }
049
050    /**
051     * The client passes in a list of Iterable objects, and
052     * the Threader constructs a connected 'graph' of messages
053     * @param messages iterable of messages to thread
054     * @return null if messages == null or root.child == null
055     * @since 3.0
056     */
057    public Threadable thread(Iterable<? extends Threadable> messages) {
058        if (messages == null) {
059            return null;
060        }
061
062        idTable = new HashMap<String,ThreadContainer>();
063
064        // walk through each Threadable element
065        for (Threadable t : messages) {
066            if (!t.isDummy()) {
067                buildContainer(t);
068            }
069        }
070
071        root = findRootSet();
072        idTable.clear();
073        idTable = null;
074
075        pruneEmptyContainers(root);
076
077        root.reverseChildren();
078        gatherSubjects();
079
080        if (root.next != null) {
081            throw new RuntimeException("root node has a next:" + root);
082        }
083
084        for (ThreadContainer r = root.child; r != null; r = r.next) {
085            if (r.threadable == null) {
086                r.threadable = r.child.threadable.makeDummy();
087            }
088        }
089
090        Threadable result = (root.child == null ? null : root.child.threadable);
091        root.flush();
092        root = null;
093
094        return result;
095    }
096
097    /**
098     *
099     * @param threadable
100     */
101    private void buildContainer(Threadable threadable) {
102        String id = threadable.messageThreadId();
103        ThreadContainer container = idTable.get(id);
104
105        // A ThreadContainer exists for this id already. This should be a forward reference, but may
106        // be a duplicate id, in which case we will need to generate a bogus placeholder id
107        if (container != null) {
108            if (container.threadable != null) { // oops! duplicate ids...
109                id = "<Bogus-id:" + (bogusIdCount++) + ">";
110                container = null;
111            } else {
112                // The container just contained a forward reference to this message, so let's
113                // fill in the threadable field of the container with this message
114                container.threadable = threadable;
115            }
116        }
117
118        // No container exists for that message Id. Create one and insert it into the hash table.
119        if (container == null) {
120            container = new ThreadContainer();
121            container.threadable = threadable;
122            idTable.put(id, container);
123        }
124
125        // Iterate through all of the references and create ThreadContainers for any references that
126        // don't have them.
127        ThreadContainer parentRef = null;
128        {
129            String[] references = threadable.messageThreadReferences();
130            for (int i = 0; i < references.length; ++i) {
131                String refString = references[i];
132                ThreadContainer ref = idTable.get(refString);
133
134                // if this id doesnt have a container, create one
135                if (ref == null) {
136                    ref = new ThreadContainer();
137                    idTable.put(refString, ref);
138                }
139
140                // Link references together in the order they appear in the References: header,
141                // IF they dont have a have a parent already &&
142                // IF it will not cause a circular reference
143                if ((parentRef != null)
144                    && (ref.parent == null)
145                    && (parentRef != ref)
146                    && !(ref.findChild(parentRef))) {
147                    // Link ref into the parent's child list
148                    ref.parent = parentRef;
149                    ref.next = parentRef.child;
150                    parentRef.child = ref;
151                }
152                parentRef = ref;
153            }
154        }
155
156        // parentRef is now set to the container of the last element in the references field. make that
157        // be the parent of this container, unless doing so causes a circular reference
158        if (parentRef != null
159            && (parentRef == container || container.findChild(parentRef)))
160        {
161            parentRef = null;
162        }
163
164        // if it has a parent already, its because we saw this message in a References: field, and presumed
165        // a parent based on the other entries in that field. Now that we have the actual message, we can
166        // throw away the old parent and use this new one
167        if (container.parent != null) {
168            ThreadContainer rest, prev;
169
170            for (prev = null, rest = container.parent.child;
171                rest != null;
172                prev = rest, rest = rest.next) {
173                if (rest == container) {
174                    break;
175                }
176            }
177
178            if (rest == null) {
179                throw new RuntimeException(
180                    "Didnt find "
181                        + container
182                        + " in parent"
183                        + container.parent);
184            }
185
186            // Unlink this container from the parent's child list
187            if (prev == null) {
188                container.parent.child = container.next;
189            } else {
190                prev.next = container.next;
191            }
192
193            container.next = null;
194            container.parent = null;
195        }
196
197        // If we have a parent, link container into the parents child list
198        if (parentRef != null) {
199            container.parent = parentRef;
200            container.next = parentRef.child;
201            parentRef.child = container;
202        }
203    }
204
205    /**
206     * Find the root set of all existing ThreadContainers
207     * @return root the ThreadContainer representing the root node
208     */
209    private ThreadContainer findRootSet() {
210        ThreadContainer root = new ThreadContainer();
211        Iterator<String> iter = idTable.keySet().iterator();
212
213        while (iter.hasNext()) {
214            Object key = iter.next();
215            ThreadContainer c = idTable.get(key);
216            if (c.parent == null) {
217                if (c.next != null) {
218                    throw new RuntimeException(
219                            "c.next is " + c.next.toString());
220                }
221                c.next = root.child;
222                root.child = c;
223            }
224        }
225        return root;
226    }
227
228    /**
229     * Delete any empty or dummy ThreadContainers
230     * @param parent
231     */
232    private void pruneEmptyContainers(ThreadContainer parent) {
233        ThreadContainer container, prev, next;
234        for (prev = null, container = parent.child, next = container.next;
235            container != null;
236            prev = container,
237                container = next,
238                next = (container == null ? null : container.next)) {
239
240            // Is it empty and without any children? If so,delete it
241            if (container.threadable == null && container.child == null) {
242                if (prev == null) {
243                    parent.child = container.next;
244                } else {
245                    prev.next = container.next;
246                }
247
248                // Set container to prev so that prev keeps its same value the next time through the loop
249                container = prev;
250            }
251
252            // Else if empty, with kids, and (not at root or only one kid)
253            else if (
254                container.threadable == null
255                    && container.child != null
256                    && (container.parent != null
257                        || container.child.next == null)) {
258                // We have an invalid/expired message with kids. Promote the kids to this level.
259                ThreadContainer tail;
260                ThreadContainer kids = container.child;
261
262                // Remove this container and replace with 'kids'.
263                if (prev == null) {
264                    parent.child = kids;
265                } else {
266                    prev.next = kids;
267                }
268
269                // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
270                // i.e. splice kids into the list in place of container
271                for (tail = kids; tail.next != null; tail = tail.next) {
272                    tail.parent = container.parent;
273                }
274
275                tail.parent = container.parent;
276                tail.next = container.next;
277
278                // next currently points to the item after the inserted items in the chain - reset that so we process the newly
279                // promoted items next time round
280                next = kids;
281
282                // Set container to prev so that prev keeps its same value the next time through the loop
283                container = prev;
284            } else if (container.child != null) {
285                // A real message , with kids
286                // Iterate over the children
287                pruneEmptyContainers(container);
288            }
289        }
290    }
291
292    /**
293     *  If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
294     */
295    private void gatherSubjects() {
296
297        int count = 0;
298
299        for (ThreadContainer c = root.child; c != null; c = c.next) {
300            count++;
301        }
302
303        // TODO verify this will avoid rehashing
304        HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
305        count = 0;
306
307        for (ThreadContainer c = root.child; c != null; c = c.next) {
308            Threadable threadable = c.threadable;
309
310            // No threadable? If so, it is a dummy node in the root set.
311            // Only root set members may be dummies, and they alway have at least 2 kids
312            // Take the first kid as representative of the subject
313            if (threadable == null) {
314                threadable = c.child.threadable;
315            }
316
317            String subj = threadable.simplifiedSubject();
318
319            if (subj == null || subj == "") {
320                continue;
321            }
322
323            ThreadContainer old = subjectTable.get(subj);
324
325            // Add this container to the table iff:
326            // - There exists no container with this subject
327            // - or this is a dummy container and the old one is not - the dummy one is
328            // more interesting as a root, so put it in the table instead
329            // - The container in the table has a "Re:" version of this subject, and
330            // this container has a non-"Re:" version of this subject. The non-"Re:" version
331            // is the more interesting of the two.
332            if (old == null
333                || (c.threadable == null && old.threadable != null)
334                || (old.threadable != null
335                    && old.threadable.subjectIsReply()
336                    && c.threadable != null
337                    && !c.threadable.subjectIsReply())) {
338                subjectTable.put(subj, c);
339                count++;
340            }
341        }
342
343        // If the table is empty, we're done
344        if (count == 0) {
345            return;
346        }
347
348        // subjectTable is now populated with one entry for each subject which occurs in the
349        // root set. Iterate over the root set, and gather together the difference.
350        ThreadContainer prev, c, rest;
351        for (prev = null, c = root.child, rest = c.next;
352            c != null;
353            prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
354            Threadable threadable = c.threadable;
355
356            // is it a dummy node?
357            if (threadable == null) {
358                threadable = c.child.threadable;
359            }
360
361            String subj = threadable.simplifiedSubject();
362
363            // Dont thread together all subjectless messages
364            if (subj == null || subj == "") {
365                continue;
366            }
367
368            ThreadContainer old = subjectTable.get(subj);
369
370            if (old == c) { // That's us
371                continue;
372            }
373
374            // We have now found another container in the root set with the same subject
375            // Remove the "second" message from the root set
376            if (prev == null) {
377                root.child = c.next;
378            } else {
379                prev.next = c.next;
380            }
381            c.next = null;
382
383            if (old.threadable == null && c.threadable == null) {
384                // both dummies - merge them
385                ThreadContainer tail;
386                for (tail = old.child;
387                    tail != null && tail.next != null;
388                    tail = tail.next) {
389                    // do nothing
390                }
391
392                if (tail != null) { // protect against possible NPE
393                    tail.next = c.child;
394                }
395
396                for (tail = c.child; tail != null; tail = tail.next) {
397                    tail.parent = old;
398                }
399
400                c.child = null;
401            } else if (
402                old.threadable == null
403                    || (c.threadable != null
404                        && c.threadable.subjectIsReply()
405                        && !old.threadable.subjectIsReply())) {
406                // Else if old is empty, or c has "Re:" and old does not  ==> make this message a child of old
407                c.parent = old;
408                c.next = old.child;
409                old.child = c;
410            } else {
411                // else make the old and new messages be children of a new dummy container.
412                // We create a new container object for old.msg and empty the old container
413                ThreadContainer newc = new ThreadContainer();
414                newc.threadable = old.threadable;
415                newc.child = old.child;
416
417                for (ThreadContainer tail = newc.child;
418                    tail != null;
419                    tail = tail.next) 
420                {
421                    tail.parent = newc;
422                }
423
424                old.threadable = null;
425                old.child = null;
426
427                c.parent = old;
428                newc.parent = old;
429
430                // Old is now a dummy- give it 2 kids , c and newc
431                old.child = c;
432                c.next = newc;
433            }
434            // We've done a merge, so keep the same prev
435            c = prev;
436        }
437
438        subjectTable.clear();
439        subjectTable = null;
440
441    }
442
443
444    // DEPRECATED METHODS - for API compatibility only - DO NOT USE
445
446    /**
447     * The client passes in an array of Threadable objects, and
448     * the Threader constructs a connected 'graph' of messages
449     * @param messages array of messages to thread
450     * @return null if messages == null or root.child == null
451     * @deprecated (2.2) prefer {@link #thread(List)}
452     */
453    @Deprecated
454    public Threadable thread(Threadable[] messages) {
455        return thread(java.util.Arrays.asList(messages));
456    }
457
458}