public class ChunkanizerTest extends TestCase {
    
    /*
     * If anyone ever cares, this is taken from the URL below. If you haven't
     * read it, now is a good time.
     * http://weblog.raganwald.com/2007/07/abbreviation-accidental-complexity-and.html
     * Has a slight tweak to allow testing. Probably the author doesn't mind ;-)
     */
    private final static String TEXT =
        "Abbreviations are useful. They can make code more readable by " +
        "putting the all of the essential workings in one visible chunk. But " +
        "they aren't as powerful as constructs that remove accidental " +
        "complexity or provide abstractions.\n" +
        "\n" +
        "And some times, abbreviations are even harmful. If the pr0grammer " +
        "reading code must understand what is being abbreviated in order to " +
        "understand the code, then the abbreviation merely forces the " +
        "programmer to jump around the code to figure anything out. When " +
        "programs are written like this as a matter of course, the poor " +
        "programmer is forced to rely on pow3rful IDEs that can jump to " +
        "method definitions or find references quickly. She has to have " +
        "these tools, because she must read all of the code to understand " +
        "what it does.\n" +
        "\n" +
        "The abbreviations have introduced complexity, not removed it.\n" +
        "\n" +
        "Where do such programs come from, programs where the abbreviations " +
        "are not useful abstractions? From those same IDEs, of course, from " +
        "mindlessly refactoring to eliminate duplicate code without stopping " +
        "to design the program’s mental model.\n" +
        "\n" +
        "This is not a knock against powerful IDEs, far from it. But we " +
        "should realize that all the same arguments raised about powerful " +
        "programming languages ('operator overloading is dangerous in the " +
        "hands of mediocre programmers, 'macros enable people to write " +
        "unreadable programs,' and so forth) apply to tools that shuffle " +
        "code around, especially when the same tools seem to make it easy to " +
        "navigate the shuffled program.\n" +
        "\n" +
        "When composing our own programs, when using these tools, it is not " +
        "enough to merely seek to eliminate duplication. We must be mindful " +
        "of the distinction between abbreviation, removing accidental " +
        "complexity, and introducing useful abstractions.\n" +
        "\n" +
        "It is not wrong to eliminate redundancy in code. But when we do so, " +
        "we mustn't follow the path of least resistance and mindlessly " +
        "perform the refactorings suggested by our tools. This argument " +
        "exactly parallels the argument about making code shorter for its " +
        "own sake. Code brevity in and of itself is not desirable, " +
        "well-abstracted code with a minimum of accidental complexity is " +
        "desirable, and brevity follows when these goals are attained.";
    
    private static List<String> words(String... words) {
        return Arrays.asList(words);
    }
    
    private static String chunk(String query) {
        return new Chunkanizer(QueryTerm.parseQuery(query)).findChunk(TEXT);
    }
    
    private static void assertPartialChunk(String message, String query, String partial) {
        assertTrue(message, chunk(query).contains(highlight(partial)));
    }
    
    private static String highlight(String query) {
        return query.replaceAll("\\|(.*?)\\|", "<strong>$1</strong>");
    }
    
    public void testHighlightingTwice() throws Exception {
        String query = "parallels AND resistance";
        String expected = 
            "We must be mindful of the distinction between abbreviation, removing " +
            "accidental complexity, and introducing useful abstractions. " +
            "It is not wrong to eliminate redundancy in code. But when we do so, " +
            "we mustn't follow the path of least |resistance| and mindlessly " +
            "perform the refactorings suggested by our tools. This argument " +
            "exactly |parallels| the argument about making code shorter for its " +
            "own sake. Code brevity in and of itself is not desirable, " +
            "well-abstracted code with a minimum of";
        
        assertEquals("Should highlight around the first occurence it text, " +
        		"should take one sentence before and one after, and should " +
        		"highlight the other word.",
                highlight(expected), chunk(query));
    }
    
    public void testNotGettingNot() throws Exception {
        String query = "chunk OR (essential NOT Abbreviations)";
        String expected = 
            "Abbreviations are useful. They can make code more readable by " +
            "putting the all of the |essential| workings in one visible |chunk|. But " +
            "they aren't as powerful as constructs that remove accidental " +
            "complexity or provide abstractions. And some times, abbreviations " +
            "are even harmful.";
        
        assertEquals("Should take 1-3 sentences only, should highlight " +
                "'chunk' and 'essential'. It should not highlight 'abbreviations', " +
                "because it's within a NOT clause.",
                highlight(expected), chunk(query));
    }
    
    public void testNearSearch() throws Exception {
        String query = "brevity NEAR4 complexity";
        String expected =
            "mindlessly perform the refactorings suggested by our tools. This " +
            "argument exactly parallels the argument about making code shorter for its " +
            "own sake. Code brevity in and of itself is not desirable, " +
            "well-abstracted code with a minimum of accidental |complexity| is " +
            "desirable, and |brevity| follows when these goals are attained.";
        
        assertEquals("Should match in last sentence, should include the second last," +
            "should match the last occurence of 'brevity' and should work with " +
            "swapped order",
            highlight(expected), chunk(query));
        
        query = "brevity NEAR10 desirable OR itself";
        expected =
            "But when we do so, we mustn't follow the path of least resistance and " +
            "mindlessly perform the refactorings suggested by our tools. This " +
            "argument exactly parallels the argument about making code shorter for its " +
            "own sake. Code |brevity| in and of |itself| is not |desirable|, " +
            "well-abstracted code with a minimum of accidental complexity is " +
            "|desirable|, and |brevity| follows when these goals are attained.";
        
        assertEquals("Should match last sentence, take the one before and highlight " +
        		"both occurence of near hits. Should also highlight 'itself'",
        		highlight(expected), chunk(query));
    }
    
    public void testPhrase() throws Exception {
        String query = "\"UNREADABLE PROGRAMS\"";
        String expected = "enable people to write |unreadable programs|,' and so";
        
        assertPartialChunk("Should match phrase case-insensitive", query, expected);
        
        query = "\"it but\" OR realize";
        expected = "powerful IDEs, far from |it. But| we should |realize| that";
        
        assertPartialChunk("Should match phrases accross sentences", query, expected);
    }
    
    public void testNearAndPhrases() throws Exception {
        String query = "\"written like\" NEAR10 \"poor programmer\"";
        String expected = "When programs are |written like| this as a matter of course, the |poor programmer| is forced";
        
        assertPartialChunk("Should highlight both phrases", query, expected);
    }

    public void testCase() throws Exception {
        String query = "CASE Code";
        String expected = "own sake. |Code| brevity in and of";
        
        assertPartialChunk("Should match the only capitalized 'Code'", query, expected);
    }
    
    public void testChunkPosition() throws Exception {
        String query = "minimum AND eliminate";
        String expected = "refactoring to |eliminate| duplicate";
        
        assertPartialChunk("Should always give the first occurence in the document", 
                query, expected);
    }
    
    public void testTitle() throws Exception {
        String query = "understand AND TITLE must";
        String expected = "the pr0grammer reading code must |understand| what";
        
        assertPartialChunk("'must' should not be highlighted, because it is not relevant",
                query, expected);
    }

    public void testNoMatch() throws Exception {
        String query = "CASE ides"; // Won't find anything
        String expected = "Abbreviations are useful. They can make code more " +
        	"readable by putting the all of the essential workings in one visible chunk. " +
            "But they aren't as powerful as constructs that remove accidental " +
            "complexity or provide abstractions.";
        
        assertEquals("When fails to match, it should return the first two sentences.",
                expected, chunk(query));
    }
    
    public void testSpecialCharacters() throws Exception {
        String query = "pow#rful";
        String expected = "programmer is forced to rely on |pow3rful| IDEs that can jump";
        assertPartialChunk("Must match 'pow3rful'", query, expected);
        
        query = "pr!grammer";
        expected = "abbreviation merely forces the |programmer| to jump";
        assertPartialChunk("Must match 'programmer'", query, expected);
        
        query = "p??grammer";
        expected = "And some times, abbreviations are even harmful. " +
        		"If the |pr0grammer| reading code must";
        assertPartialChunk("Must match the first occurence and include the sentence before",
                query, expected);
        
        query = "?!w#rf*"; // pow3rful
        expected = "programmer is forced to rely on |pow3rful| IDEs that can jump";
        assertPartialChunk("Should match 'pow3rful'", query, expected);
    }
    
    
    
    
    // --- Sentence Trimming ---------------------------------------------------
    
    public void testLeftTrimming() throws Exception {
        int position = TEXT.indexOf("constructs");
        
        String result = SentenceExtractor.trimLeft(TEXT, position, 2, 300);
        String expected = "They can make code more readable by " +
            "putting the all of the essential workings in one visible chunk. But " +
            "they aren't as powerful as constructs";
        assertTrue("Should take the previous sentence", result.startsWith(expected));
        
        result = SentenceExtractor.trimLeft(TEXT, position, 1, 300);
        expected = "But they aren't as powerful as constructs";
        assertTrue("Should take one sentence", result.startsWith(expected));
        
        position = TEXT.indexOf("harmful");
        
        result = SentenceExtractor.trimLeft(TEXT, position, 2, 300);
        expected = "But they aren't as powerful as constructs that " +
            "remove accidental complexity or provide abstractions.\n\nAnd some " +
            "times, abbreviations are even harmful.";
        assertTrue("Should carry accross newlines", result.startsWith(expected));
    }
    
    public void testLeftTrimInsideSentence() throws Exception {
        int position = TEXT.indexOf("chunk");
        String expected = "workings in one visible ";
        int maxChars = expected.length() + 5; // In the middle of 'essential'
        String result = SentenceExtractor.trimLeft(TEXT, position, 2, maxChars);
        
        assertTrue("Should start on a word boundary", result.startsWith(expected));
    }
    
    public void testRightTrimming() throws Exception {
        int position = TEXT.indexOf("useful");
        
        String result = SentenceExtractor.trimRight(TEXT, position, 2, 300);
        String expected = "Abbreviations are useful. They can make code more readable " +
        		"by putting the all of the essential workings in one visible chunk.";
        
        assertEquals("Should take the next sentence", expected, result);
        
        position = TEXT.indexOf("readable");
        result = SentenceExtractor.trimRight(TEXT, position, 1, 300);
        expected = "readable by putting the all of the essential workings in one visible chunk.";
        assertTrue("Should take one sentence", result.endsWith(expected));
        
        position = TEXT.indexOf("accidental");
        
        result = SentenceExtractor.trimRight(TEXT, position, 2, 300);
        expected = "But they aren't as powerful as constructs that " +
            "remove accidental complexity or provide abstractions.\n\nAnd some " +
            "times, abbreviations are even harmful.";
        assertTrue("Should carry accross newlines", result.endsWith(expected));
    }
    
    public void testRightTrimInsideSentence() throws Exception {
        int position = TEXT.indexOf("workings");
        String expected = "workings in one visible";
        int maxChars = expected.length() + 3; // In the middle of 'chunk'
        String result = SentenceExtractor.trimRight(TEXT, position, 2, maxChars);
        
        assertTrue("Should start on a word boundary", result.endsWith(expected));
    }
    
    public void testBounadries() throws Exception {
        int position = TEXT.indexOf("code");
        
        assertEquals("Exact left boundary",
                TEXT, SentenceExtractor.trimLeft(TEXT, position, 2, position + 1));
        assertEquals("Exact left boundary",
                TEXT, SentenceExtractor.trimLeft(TEXT, 0, 2, 10));

        position = TEXT.lastIndexOf("attained");
        assertEquals("Exact right boundary",
                TEXT, SentenceExtractor.trimRight(TEXT, position, 2, position + 1));
        
        assertEquals("Exact right boundary",
                TEXT, SentenceExtractor.trimRight(TEXT, TEXT.length() - 1, 2, 20));
    }
    
    
    public static void main(String[] args) {
        System.out.println(TEXT);
    }
    
    
    
    
    // --- Matcher -------------------------------------------------------------
    
    public void testWordMatching() throws Exception {
        WordMatcher matcher = new WordMatcher(words("in"));
        matcher.match(TEXT);
        int position = TEXT.indexOf(" in ") + 1;
        
        assertEquals("Should match whole words only", position, matcher.getStart());
        
        matcher = new WordMatcher(words("and"));
        matcher.match(TEXT);
        position = TEXT.indexOf("And");
        assertEquals("Should be case-insensitive", position, matcher.getStart());
        
        matcher = new WordMatcher(words("sugar", "abbreviations", "complexity"));
        matcher.match(TEXT);
        assertEquals("Should match 'Abbreviations' in the beginning", 0, matcher.getStart());
        assertEquals("abbreviations".length(), matcher.getEnd());
        
        matcher = new WordMatcher(words("fu#ba*", "a??r!vi*", "c.mpoxx*"));
        matcher.match(TEXT);
        assertEquals("Should match 'Abbreviations' in the beginning", 0, matcher.getStart());
        assertEquals("abbreviations".length(), matcher.getEnd());
        
        matcher = new WordMatcher(words("pr#gra*"));
        matcher.match(TEXT);
        position = TEXT.indexOf("pr0grammer");
        assertEquals("Should match 'pr0grammer'", position, matcher.getStart());
    }
    
    public void testCaseWordMatching() throws Exception {
        CaseWordMatcher matcher = new CaseWordMatcher(words("abbreviations"));
        matcher.match(TEXT);
        int position = TEXT.indexOf("abbreviations");
        
        assertEquals("Should match case-sensitive", position, matcher.getStart());
        
        matcher = new CaseWordMatcher(words("ab?r!evia*", "c?mpl!xi*"));
        matcher.match(TEXT);
        position = TEXT.indexOf("complexity"); 
        
        assertEquals("Should match first occurence of 'complexity'", position, matcher.getStart());
    }
    
    public void testNearMatching() throws Exception {
        NearMatcher matcher = new NearMatcher("programs", "abbreviations", 5);
        String phrase = "programs where the abbreviations";
        int start = TEXT.indexOf(phrase);
        int end = start + phrase.length();
        matcher.match(TEXT);
        
        assertEquals("Should match start", start, matcher.getStart());
        assertEquals("Should match end", end, matcher.getEnd());
        
        matcher = new NearMatcher("programs", "abbreviations", 10);
        phrase = "abbreviations have introduced complexity, not removed it.\n\n" +
                "Where do such programs";
        start = TEXT.indexOf(phrase);
        end = start + phrase.length();
        matcher.match(TEXT);
        
        assertEquals("Should work in reverse and across newlines", start, matcher.getStart());
        assertEquals("Should work in reverse and across newlines", end, matcher.getEnd());
    }
    
    public void testPhraseMatching() throws Exception {
        PhraseMatcher matcher = new PhraseMatcher("are even harmful");
        String phrase = "are even harmful";
        int start = TEXT.indexOf(phrase);
        int end = start + phrase.length();
        matcher.match(TEXT);
        
        assertEquals("Should match phrase", start, matcher.getStart());
        assertEquals("Should match phrase", end, matcher.getEnd());
        
        matcher = new PhraseMatcher("shu!!led pr?gram w??n COMP*");
        phrase = "shuffled program.\n\nWhen composing";
        start = TEXT.indexOf(phrase);
        end = start + phrase.length();
        matcher.match(TEXT);
        
        assertEquals("Case insensitive, works across lines and puctuation and works with wildcards",
                start, matcher.getStart());
        assertEquals("Case insensitive, works across lines and puctuation and works with wildcards", 
                end, matcher.getEnd());
    }
    
}