#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

/* OVERVIEW:
 * Phylogeny reads a file containing a list of species names and genomes.
 * It then arranges the species into a phylogeny tree that minimizes
 * the number of mutations required to produce the tree.
 */

#include "Species.h"
#include "SpeciesSet.h"
#include "SpeciesTree.h"

static unsigned mutationDistance(const char *left,const char *right) {
/* EFFECTS: Returns the number of point mutations required to turn 
 *          the left genome into the right genome.
 *          Each mutation changes the value of one base.  
 *          (We are not allowing insertions, deletions, etc. for now.)
 */
    unsigned mutations;
    unsigned size;
    unsigned i;

    if (strlen(left) > strlen(right)) {
        mutations = (unsigned) (strlen(left)-strlen(right));
        size = (unsigned) strlen(right);
    } else {
        mutations = (unsigned) (strlen(right)-strlen(left));
        size = (unsigned) strlen(left);
    }

    for (i=0; i < size; i++)
        if (left[i] != right[i])
            mutations++;

    return (unsigned)mutations;
}

static unsigned parsimonyValue (SpeciesTree tree) {
    /* EFFECTS: Returns the parsimony value of tree. */

    unsigned score = 0;
    SpeciesTree left = SpeciesTree_getLeft(tree);
    SpeciesTree right = SpeciesTree_getRight(tree);
    Species root = SpeciesTree_getRoot(tree);

    if (!SpeciesTree_isNull (left)) {
        score = score + mutationDistance (Species_getGenome(root),
                                          Species_getGenome(SpeciesTree_getRoot(left)))
                      + parsimonyValue (left);
    }

    if (!SpeciesTree_isNull (right)) {
        score = score + mutationDistance (Species_getGenome(root),
                                          Species_getGenome(SpeciesTree_getRoot(right)))
                      + parsimonyValue (right);
    }

    return score;
}

/* Prototype for chooseRoot, defined below.
 * This is needed because search_rooted_trees and chooseRoot
 * are mutually recursive.
 * Note: if you need to add an annotation to chooseRoot, you must
 * change its definition in both places.
 */

static /*@only@*/ SpeciesTree chooseRoot (SpeciesSet set, Species grandparent);

static /*@only@*/ SpeciesTree findBestTreeRoot (SpeciesSet children,
                                 /*@dependent@*/ Species root) {
/* REQUIRES: root != NULL
 * EFFECTS:  Returns the best-scoring tree with the specified <root>
 *           and <children>.
 */
    unsigned best_score = (unsigned)-1;
    /*@only@*/ SpeciesTree best_tree = SpeciesTree_NULL;
    unsigned size = SpeciesSet_size(children);
    unsigned n_splits = (1 << size);
    unsigned i;

    /* Iterate over each of the n_splits ways of splitting the children
     * into two sets.
     */
    for (i=0; i < n_splits; i++) {
        unsigned score;
        SpeciesSet left = SpeciesSet_new ();
        SpeciesSet right = SpeciesSet_new ();
        SpeciesTree tree = SpeciesTree_new (root);

        /* Split the children into two sets */
        SpeciesSet_split(children,i,left,right);

        /* Arrange each subset into a subtree */
        SpeciesTree_setLeft (tree, chooseRoot (left, root));
        SpeciesTree_setRight (tree, chooseRoot (right, root));

        score = parsimonyValue (tree);

        if (score < best_score) {
            /* We must release the old best_tree before we assign
             * something else to it (and lose the reference */
            SpeciesTree_free(best_tree);
            best_score = score;
            best_tree = tree;
            tree = SpeciesTree_NULL;
        } else {
          /* Otherwise, we must free the temporary tree */
          SpeciesTree_free(tree);
          tree = SpeciesTree_NULL;
        }

        SpeciesSet_free(left);
        SpeciesSet_free(right);
    }

    if (!SpeciesTree_isEmpty(best_tree)) {
        return best_tree;
    } else {
        printf("BUG: best_tree is NULL\n");
        exit(EXIT_FAILURE);
    }
}

/*@only@*/ SpeciesTree chooseRoot (SpeciesSet set, Species grandparent) {
    /* REQUIRES: grandparent != NULL
    ** EFFECTS:  Determines which Species in <set> would make the best root
    **         under <grandparent>, and returns a tree with that root and all
    **         other elements of <set> as its children.
    */
    
    unsigned best_score = (unsigned)-1;
    /*@only@*/ SpeciesTree best_tree = NULL;
    
    if (SpeciesSet_isEmpty (set)) {
        return best_tree;
    }
    
    /* Iterate over each potential root of the tree */
    SpeciesSet_forall(set,root) {
        unsigned score;
        SpeciesTree tree;
        SpeciesSet children;
        
        /* Create a copy of the set of nodes, and remove the root */
        children = SpeciesSet_copy (set);
        SpeciesSet_remove (children, root);
        
        /* Find the best subtree with that root */
        tree = findBestTreeRoot (children, root);
        
        /* Compute the score of this subtree, and the mutationDistance
        * between this subtree's root and the node it branches off of 
        */
        score = parsimonyValue (tree) +
          mutationDistance (Species_getGenome(root), Species_getGenome(grandparent));
        
        if (score < best_score) {
            /* We must release the old best_tree before we assign
            * something else to it (and lose the reference */
            SpeciesTree_free(best_tree);
            best_score = score;
            best_tree = tree;
            tree = SpeciesTree_NULL;
        } else {
            /* Otherwise, we must free the temporary tree */
            SpeciesTree_free(tree);
            tree = SpeciesTree_NULL;
        }

        SpeciesSet_free(children);
    } end_SpeciesSet_forall;
    
    return best_tree;
}

# define MAX_LINE_LENGTH 1024

int main(int argc, char *argv[]) {
    if (argc != 2) {
        printf("Usage: phylogeny <file>\n");
        exit (EXIT_FAILURE);
    } else {
        /* Read species names and genomes from file */
        FILE *f;
        char *name;
        char *genome;
        char line[MAX_LINE_LENGTH];
        SpeciesSet set = SpeciesSet_new();
        Species root;
        bool gotroot = FALSE;
        
        f = fopen(argv[1], "rt");

        if (f == NULL) {
            fprintf(stderr,"%s: file not found\n",argv[1]);
            exit(EXIT_FAILURE);
        }
            
        while (fgets (line, MAX_LINE_LENGTH, f)) {
            char *genomestart;
            
            int index = 0;
            
            while (line[index] != '\0' && !isspace (line[index])) {
                index++;
            }
            
            if (line[index] == '\0' || index == 0) {
                fprintf (stderr, "Badly formatted line: %s\n", line);
                exit (EXIT_FAILURE);
            }

            name = (char *) malloc (sizeof (*name) * (index + 1));

            if (name == NULL) {
                fprintf (stderr, "Out of memory reading file.\n");
                exit (EXIT_FAILURE);
            }

            strncpy (name, line, (size_t) index);
            name[index] = '\0';
                
            if (SpeciesSet_contains (set,name) 
                  || (gotroot 
                      && /*@-usedef@*/  /* Splint reports a possibly undefined use of root here, but we know
                                           it is defined because of gotroot. */
                         (strcmp (Species_getName (root), name) == 0)
                         /*@=usedef@*/
                )) {
                fprintf (stderr, "Duplicate species in file: %s\n",name);
                exit (EXIT_FAILURE);
            } 

            /*
            ** Skip spaces
            */
            
            while (line[index] != '\0' && isspace (line[index])) {
                index++;
            }
            
            genomestart = &line[index];
            index = 0;
            
            while (genomestart[index] != '\0' && !isspace (genomestart[index])) {
                index++;
            }

            if (index == 0) {
                fprintf (stderr, "Badly formatted line (no genome): %s\n", line);
                exit (EXIT_FAILURE);
            }
            
            genome = (char *) malloc (sizeof (*genome) * (index + 1));

            if (genome == NULL) {
                fprintf (stderr, "Out of memory reading genome.\n");
                exit (EXIT_FAILURE);
            }

            strncpy (genome, genomestart, (size_t) index);
            genome[index] = '\0';
                
            if (!gotroot) {
                root = Species_new (name, genome);
                gotroot = TRUE;
            } else {
                Species snew = Species_new (name, genome);
                char *str = Species_toString(snew);
                fprintf (stderr, "Species: %s\n", str);
                free(str);
                Species_markOwned (snew);
                SpeciesSet_insert (set, snew);
            }
        }
        
        /* Find best tree with specified root, and print it */
        if (!gotroot) {
            printf("No root species (empty file)\n");
            exit(EXIT_FAILURE);
        } else {
            SpeciesTree tree;
            char *st;
            /* We know root is defined because of gotroot, but Splint can't determine this. */
            tree = findBestTreeRoot (set, /*@-usedef@*/ root /*@=usedef@*/);
            st = SpeciesTree_toString (tree);

            printf ("Best tree: \n%s\n", st);
            free (st);

            printf("Parsimony value: %u\n", parsimonyValue (tree));
            SpeciesTree_free(tree);
        }
        SpeciesSet_free(set);
        Species_free( /*@-usedef@*/ root /*@=usedef@*/ );
    }
    
    return EXIT_SUCCESS;
}