#!/usr/bin/perl
#############################################################################
#
#
#############################################################################
#############################################################################
#############################################################################
# Written for CIDR by Peter Askovich, 2015 ##################################
# Program takes list of genes as input, gets sequences from #################
# promoter2014 database and calculates clover scores for selected ###########
# promoters (based on Jaspar DB)    #########################################
# peter.askovich.cidresearch.org ##########################################
#############################################################################
#############################################################################

###############################################################################
#VARIABLE AND SUBROUTINE DECLARATIONS------------------------------------------
###############################################################################
use strict;
#$|++;
require Exporter;

use DBI;
use CGI qw(:standard);
use lib "modules";
use LWP;
# custom modules ###########
use errorHandling qw(
   printError
   $debugInfo
);
############################
#VARIABLE DECLARATIONS---------------------------------------------------------
my $testing = 0; # if true, extra information is printed
my %formValues;
my $debugInfo;
my $searchFormFile = 'files/searchForm.html';
my $resultTemplateFile = 'files/resultTemplate.html';
my $dbh;
my $tmpFastaFile;   #filename for the tmpSequenceFile (to be used for clover search)
my $tmpFilteredMatrixFile; #filename for the tmpMatrixFile (to be used for clover search)
my $tmpCloverResultFile; #filename for the output file from the clover program
#my $transfacMatrixFile = "files/TRANSFAC2010_forCloverWithNumbers.txt";
#my $transfacMatrixInfoFile  = "files/matrix.dat";
my $jasparMatrixFile = "files/jasparPfmVertebrates.forClover.2014.12.10.txt";
#my $useDB; # 1 if we are using TRANSFAC; 2 if we are using JASPAR
#my @transfacMotifList; #list of transfac motifs for filtering passed by the form to the program
my @motifList; #list of jaspar motifs for filtering passed by the form to the program
my $topMessage; #collects information throughout the program on important params and displays it on top of the results page
my $totalSequencesUsed; #total number of sequeces retrieved from the DB for searching
my %motifPubmedLink;
my %fullMotifInfo;
#my %fullMotifInfoJ;
my %motifTF;
#SUBROUTINE DECLARATIONS-------------------------------------------------------
sub connectDB ($);
sub disconnectDB ();
sub openHTMLTemplate (@);
sub closeHTMLTemplate ();
sub showMainSearchPage ();
sub getSequences ();
sub filterMotifs ();
sub runClover ();
sub loadMotifInfo ();
sub parseAndPrintCloverResults ();
sub commify ($);
###############################################################################
###############################################################################
###############################################################################
#PROGRAM STARTS HERE-----------------------------------------------------------
###############################################################################
#print "Content-type:text/html\n\n";


### Read the POST buffer then and populate $formValues
my $buffer;
read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
my @pairs = split(/&/, $buffer);
foreach my $pair (@pairs) {
   my ($name, $value) = split(/=/, $pair);
   $value =~ tr/+/ /;
   $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
   $formValues{$name} = $value;
   $debugInfo .= "NAME:$name VALUE:$value<br>\n";
   push @motifList, $value if $name eq "motif";
}
## save the ENV if in debug
if ($testing) {
   foreach my $key (keys %ENV) {
      $debugInfo .=  "Key:\.$key\. Value:\.$ENV{$key}\.<br>\n";
   }
}
################################################################################
################################################################################

#no matter what, we need this
print "Content-type:text/html\n\n";

my $action = $formValues{'action'};
### MAIN BRANCH POINT @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
### since this is if - elsif only one of the following can be executed at the time
#if ($action eq 'searchPeptides') { # if user posts login form
#   searchPeptides();
#}
if ($action eq 'getMotifPrediction') {
   #get sequences first, write time to tmpFile
   getSequences();

   #parse motif file down to motifs from the query, write it to tmpFile
   filterMotifs();

   #run clover search using parameters fromt he form, write output to a tempFile
   runClover();

   #TODOTODO
   #parse clover results and show them on the screen
   loadMotifInfo();

   #TODOTODO
   #parse clover results and show them on the screen
   parseAndPrintCloverResults();

}
else { # if no action was specified, show main page
   showMainSearchPage();
}
### END MAIN BRANCH POINT @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@


################################################################################
################################################################################

print "Debug Info: $debugInfo\n" if $testing;
print "Have a very nice day :-)\n" if $testing;
###############################################################################
#PROGRAM ENDS HERE-------------------------------------------------------------
###############################################################################
###############################################################################
###############################################################################

###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS
###SUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBSSUBS


### showMainSearchPage() ######################################################
### Prints main search page ###################################################
### First it gets information from the database for few fields and ############
### creates a form containing this info so that users can choose ##############
### some basic search parameters ##############################################
sub showMainSearchPage() {
   my $jasparForm = "         <select name=\"motif\" size=\"12\" id=\"42\" multiple=\"multiple\" style=\"width: 170px;\">\n";

   #get JASPAR motif lists
   open (JASPARMATRIXFILE, "$jasparMatrixFile")
      or printError ("Error opening search form file($jasparMatrixFile)", $debugInfo, $testing);
   while(my $tmpJLine = <JASPARMATRIXFILE>) {
     $tmpJLine =~ s/$1//g if m/(\r\n?|\n\r?)/; #instead of chomp
     if($tmpJLine =~ m/^>(.*)/) {
        $jasparForm .= "            <option value=\"$1\">$1</option>\n"
     };
   }
   close JASPARMATRIXFILE;
   $jasparForm .= "         </select><br>\n";

   # open the saarchForm.html file
   open (SEARCHFORMFILE, "$searchFormFile")
           or printError ("Error opening search form file($searchFormFile)", $debugInfo, $testing);
   while(<SEARCHFORMFILE>) {
     my $tmpLine = $_;
     $tmpLine =~ s/$1//g if m/(\r\n?|\n\r?)/; #instead of chomp
     #print "<pre>TMPLINE:$tmpLine</pre><br>\n";
      if($tmpLine eq "<!--INSERT JASPAR LIST//-->") {
         print "$jasparForm\n";
      }
      else {
         print "$tmpLine\n";
      }
   }
   close SEARCHFORMFILE
           or printError ("Error closing search form file ($searchFormFile)", $debugInfo, $testing);


}
### END of displayMainSearchPage() ############################################

### folterMotifs()  ###########################################################
### Takes the list of motifs (Transfac or Jaspar) from the formValues #########
### uses it to filter motifs from the file containing all motifs ##############
### and saves filtered list into a tmpFile for clover run #####################
sub filterMotifs() {
   #my $useDB; # 1 if we are using TRANSFAC; 2 if we are using JASPAR #defined above
   my $matrixFile; #transfac or jaspar
   #my @filterList;

#my $matrixFile = 'TRANSFAC2010_Matrices_forCloverWithNumbers.txt'; #transfac or $jasparMatrixFile
#my $listFile = 'motifList.txt'; #transfac or jaspar motif list from formValues
#my $outFile = 'downIFNAR.1m.filtered.matrix.txt'; #tmpFile


      #@filterList = @motifList;
      $matrixFile = $jasparMatrixFile;

   #create tmpFilteredMatrixFile name here
   #my $tmpFilteredMatrixFile; #declared above
   my $done = 0;
   my $n = 1;
   while (!$done) {
      $tmpFilteredMatrixFile = "tmp/tmpFilteredMatrixFile" . "$n" . ".matrix";
      if (-e $tmpFilteredMatrixFile) {$n++}
      else {$done++}
   }


   #open matrixFile for reading and tmpFilteredMatrixFile for writing
   #filter matrixFile, write output to the tmpFilteredMatrixFile
   open (MATRIXFILE, $matrixFile)
      or printError ("error opening matrix file($matrixFile)", $debugInfo, $testing);
   open (TMPFILTEREDMATRIXFILE, ">$tmpFilteredMatrixFile")
      or die ("could not open tmpFilteredMatrixFile for writing");


   my @data;   #2D array holding binding site info
   my $e = 0;  #var that counts how many TFs we worked on
   my $f = 0;  #counts how many TFs matched
   my $currentTF;

   while (<MATRIXFILE>) {
      #copy line to $line, remove new line
      my $line = $_;
      $line =~ s/$1//g if m/(\r\n?|\n\r?)/; #instead of chomp

      #if the line starts with the ">" check if the previous TF matches the list and print it out if it does
      if ($line =~ m/^>/) {
         # trim ">" for matching
         $line =~ s/^>//;
         if ($currentTF) { #this will skip first line
            foreach my $motifListItem (@motifList) { #go throug each of the motifs submitted by the web page
               if ($currentTF eq $motifListItem) { #if it matches current TF, write it up to out file
                  print TMPFILTEREDMATRIXFILE ">$currentTF\n";
                  foreach my $aref ( @data ) {print TMPFILTEREDMATRIXFILE "@$aref\n";}
                  $f ++;
                  last;
               }

            }
         }
         undef @data;

         $currentTF = $line;
         $currentTF =~ s/^>V\$//;
         $e++;

      }
      else {
         # split line into array, push it into 2D array
         my @tmp = split / /, $line;
         push @data, [ @tmp ];
      }

      #print "." if $e % 50 == 0;
      #last if $e> 10;
   }
   print "\nProcessed $e Transcription Factors $f saved to $tmpFilteredMatrixFile<br>\n" if $testing;
   $topMessage .= "<p>Total of $e motifs found in database, using $f selected ones</p>\n";

   close MATRIXFILE;
   close TMPFILTEREDMATRIXFILE;

}
### END filterMotifs () #######################################################

### getSequences() ############################################################
### Fethces sequences from DB, then saves them to the tmpFile #################
### It will use ID type, species and list of IDs to get seqs       ############
### ###########################################################################
sub getSequences() {
   # rename form hash variables for easier handling
   my $mappings = $formValues{'mappings'};
   my $longest = $formValues{'longest'};
   my $species = $formValues{'species'};
   my $before = $formValues{'before'};
   my $after = $formValues{'after'};
   my $mask = $formValues{'mask'};
   # process ids from the form in the format we can use ##############################
   my @ids = split /\r\n/, $formValues{'ids'}; # array list of IDs to look up
   my $numIDs = @ids; # number of IDs passed to the function
   my $ids; # formatted version of IDs
   foreach my $id (@ids) {
      $id =~ s/'/\\\'/g; #instead of chomp
      $id =~ s/"/\\\"/g; #instead of chomp
      $id =~ s/;/\\\;/g; #instead of chomp
      $ids .= "\'$id\', ";
   }
   $ids =~ s/\W*$/\'/;
   ###################################################################################

   my @results; # 2D array holding sequences and annotation for requested genes


   # assemble print staments to let users know what we are doing ##############################################################
   my $mappingStatus = "$mappings mappings";
   $mappingStatus .= " (longest sequence only)" if $longest;

   my $maskedStatus;
   if    ($mask == 0) {$maskedStatus = 'Nothing was masked'}
   elsif ($mask == 1) {$maskedStatus = 'Repeats were masked'}
   else  {$maskedStatus = 'unknown!'}

   #############################################################################################################################

   # put the query together now ###############################################################################
   my $longestOnly = 'AND longest=1' if $longest;
   my $getSeqSQL = "
      SELECT geneName, geneID, tss, longest, chromosome, start, stop, strand, species, genomeVersion, sequence
      FROM main
      WHERE geneName IN ($ids) AND species='$species' AND mappings='$mappings' $longestOnly
      ORDER BY geneName";
   ############################################################################################################

   # if we are in the testing mode, just pring query and return ##############
   if ($testing) {
      print "<br><pre>$getSeqSQL</pre>";
      #return;
   }
   ###########################################################################

   # now get sequences and annotation for the requested genes from the DB ############################################################
   connectDB('promoter2014');
   my ($t0, $t1, $t2, $t3, $t4, $t5, $t6, $t7, $t8, $t9, $t10);
   my $sth = $dbh->prepare ("$getSeqSQL")
          or printError("greb", "$debugInfo<br>\npreparing statement $DBI::err ($DBI::errstr)", $testing);
   $sth->execute()
          or printError("ff443s", "$debugInfo<br\nexecuting $DBI::err ($DBI::errstr)", $testing);
   # geneName, geneID, tss, longest, chromosome, start, stop, strand, species, genomeVersion, sequence
   $sth->bind_col (1,  \$t0);  # geneName
   $sth->bind_col (2,  \$t1);  # geneID
   $sth->bind_col (3,  \$t2);  # tss
   $sth->bind_col (4,  \$t3);  # longest
   $sth->bind_col (5,  \$t4);  # chromosome
   $sth->bind_col (6,  \$t5);  # start
   $sth->bind_col (7,  \$t6);  # stop
   $sth->bind_col (8,  \$t7);  # strand
   $sth->bind_col (9,  \$t8);  # species
   $sth->bind_col (10, \$t9);  # genomeVersion
   $sth->bind_col (11, \$t10); # sequence

   my $k = 0;
   while($sth->fetch) {
      $results[$k][0]  = $t0;  # geneName
      $results[$k][1]  = $t1;  # geneID
      $results[$k][2]  = $t2;  # tss
      $results[$k][3]  = $t3;  # longest
      $results[$k][4]  = $t4;  # chromosome
      $results[$k][5]  = $t5;  # start
      $results[$k][6]  = $t6;  # stop
      $results[$k][7]  = $t7;  # strand
      $results[$k][8]  = $t8;  # species
      $results[$k][9]  = $t9;  # genomeVersion
      $results[$k][10] = $t10; # sequence

      $k++;
   }
   $sth->finish ()
      or printError("ZZod003", "$debugInfo<br\nfinishing $DBI::err ($DBI::errstr)", $testing);
   disconnectDB();
   print  "Successfully completed SQL<br>\n" if ($testing);

   $topMessage .= "<p>Total of $numIDs genes were submitted for analysis, $k sequences retrieved from the DB</p>\n";
   $topMessage .= "<p>Promoter sequences were derived using $mappingStatus <br>($species - $results[0][9])</p>";
   $topMessage .= "<p>Promoter sequence length is $before before and $after after TSS ($maskedStatus)</p>\n";
   #save number of genes that were used in search for the statistcs later
   $totalSequencesUsed = $k;

   ###################################################################################################################################

   # trim sequences to desired length #########################################################
   # mask repeats if needed and format the sequence to 60 bp per line #########################
   # we are starting from 2000/2000 except in rare case when the before section is shorter ####
   $k = 0;
   while ($results[$k][10]) {
      my $seqLength = length $results[$k][10];
      if ($seqLength < 4000) {
      }
      my $offset = 2000 - $before;
      my $length = $before + $after;
      my $trimmed = substr $results[$k][10], $offset, $length;
      # mask repeats if that was requested
      if ($mask == 1) {
         $trimmed =~ tr/[atgc]/N/;
      }
      $trimmed =~ s/(.{60})/$1\n/g;
      $trimmed =~ s/\n\n/\n/; #in case number of BP is divisible by 60, end can have two \n ... maybe
      # save it back to the 2D array results
      $results[$k][10] = $trimmed;

      $k++;
   }
   ############################################################################################

   # finaly print the results to a tmpFile in tmp dir ####################################################################################
   #######################################################################################################################################

   #my $tmpFastaFile; Declared above, all the way at the top
   my $done = 0;
   my $n = 1;
   while (!$done) {
      $tmpFastaFile = "tmp/tmpSequences" . "$n" . ".fasta";
      if (-e $tmpFastaFile) {$n++}
      else {$done++}
   }
   open TMPFASTAFILE, ">$tmpFastaFile"
      or printError('123', "could not open TMPFASTAFILE $tmpFastaFile<br>\n", '123');
   $k = 0;
   while ($results[$k][0]) {
      print TMPFASTAFILE ">$results[$k][0]\|$results[$k][1]\|tss:$results[$k][2]\|chr:$results[$k][4]\|strand:$results[$k][7]\|longest:$results[$k][3]\|species:$results[$k][8]\|genomeVersion:$results[$k][9]\n";
      print TMPFASTAFILE "$results[$k][10]\n";
      $k++;
   }
   close TMPFASTAFILE;
}
### END getSequences() ########################################################

### runClover() ###############################################################
### runs clover program using two files prepared above ########################
### tmpSequenceFile and tmpMatrixFile #########################################
### ###########################################################################
sub runClover() {
   my $cloverCommandLine;

   #make tmpCloverResultFile name here
   #my $tmpCloverResultFile; #declared above
   my $done = 0;
   my $n = 1;
   while (1) {
      $tmpCloverResultFile = "tmp/tmpCloverResultFile" . "$n" . ".result";
      last unless -e $tmpCloverResultFile;
      $n++;
   }

   $cloverCommandLine = "bin/clover -u $formValues{'minCloverScore'} $tmpFilteredMatrixFile $tmpFastaFile > $tmpCloverResultFile";

   print "\n<br>Clover command line is $cloverCommandLine<br>\n" if $testing;
   my $cloverResult = `$cloverCommandLine`;
   #print "\n<br>Clover output was $cloverResult<br>\n" if $testing;
}
### END runClover() ###########################################################

### sub loadMotifInfo() #######################################################
### preloads the whole matrix.dat file into set of useful vars ################
### this is used to later display links/info etc. #############################
sub loadMotifInfo() {

      ### go through all selected motifs and retrieve information from the db
      my $jm = 0;
      while ($motifList[$jm]) {
         print "MotifList[jm] $motifList[$jm]<br>\n " if $testing;
         my $mNumber;
         my $mVersion;
         $motifList[$jm] =~ m/^([a-zA-Z0-9_]*)\.(\d)\s/;
         $mNumber = $1;
         $mVersion = $2;
         print "ID=$mNumber version=$mVersion<br>\n" if $testing;

         # put the query together now ###############################################################################
         my $getJasparInfoSQL = "
            SELECT a.ID, a.BASE_ID, a.VERSION, a.COLLECTION, a.NAME, b.ID, b.TAG, b.VAL
            FROM MATRIX a, MATRIX_ANNOTATION b
            WHERE a.BASE_ID = '$mNumber' and a.ID = b.ID and a.VERSION=$mVersion";

         ############################################################################################################
         print "<br><pre>$getJasparInfoSQL</pre>" if $testing; # testing only
         ###########################################################################

         # now get Jaspar info from the DB  ############################################################
         connectDB('Jaspar2014');
         my ($t0, $t1, $t2, $t3, $t4, $t5, $t6, $t7);
         my $sth = $dbh->prepare ("$getJasparInfoSQL") or printError("greb", "$debugInfo<br>\npreparing statement $DBI::err ($DBI::errstr)", $testing);
         $sth->execute() or printError("werwer3344", "$debugInfo<br\nexecuting $DBI::err ($DBI::errstr)", $testing);
         $sth->bind_col (1,  \$t0);  # ID - MATRIX
         $sth->bind_col (2,  \$t1);  # BASE_ID - MATRIX
         $sth->bind_col (3,  \$t2);  # VERSION - MATRIX
         $sth->bind_col (4,  \$t3);  # COLLECTION - MATRIX
         $sth->bind_col (5,  \$t4);  # NAME (TF) - MATRIX
         $sth->bind_col (6,  \$t5);  # ID - MATRIX_ANNOTATION
         $sth->bind_col (7,  \$t6);  # TAG - MATRIX_ANNOTATION
         $sth->bind_col (8,  \$t7);  # VAL - MATRIX_ANNOTATION

         while($sth->fetch) {
            my $fullMotifName = $t1 . "." . $t2;
            #print "FullMotifName =\.$fullMotifName\. t1=\.$t1\. t2=$t2 t3=$t3 t4=$t4 t5=$t5 t6=$t6 t7=$t7 <br>\n";
            $motifTF{$fullMotifName} = $t4;
            $motifPubmedLink{$fullMotifName} = "http://www.ncbi.nlm.nih.gov/pubmed?term=$t7" if $t6 eq 'medline';
            $fullMotifInfo{$fullMotifName} .= "$t6 - $t7\\n";
        }
        $sth->finish () or printError("ZZod003", "$debugInfo<br\nfinishing $DBI::err ($DBI::errstr)", $testing);
        disconnectDB();
        #print  "Successfully completed SQL<br>\n" if ($testing);

         $jm++;
      }
      #for my $key (keys %motifPubmedLink) {
      #   print "motif name = $key and link is $motifPubmedLink{$key}<br>\n";
      #}

}
### END sub loadMotifInfo() ###################################################


### parseCloverResults() ######################################################
### takes the output of the temporary result file, formats it #################
### and displays it on the screen (with added links etc. ######################
### ###########################################################################
sub parseAndPrintCloverResults() {
   my $geneName;
   my $geneID;
   my $tss;
   my $chr;
   my $strand;
   my $longest;
   my $species;
   my $genomeVersion;
   my $beforeTss = $formValues{'before'};
   my $totalSequenceLength = $formValues{'before'} + $formValues{'after'};


   my %totalMotifCount;
   my %totalMotifScore;
   my %totaGeneCount;
   my %geneMotifHash;
   my %genesWithMotif;

   my $result; # This is where all the results go, to be printed at the end of the function

   #print the file header first
   #print "<!DOCTYPE html PUBLIC \"XHTML 1.0 Transitional//EN\"  \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
   #print "<html lang=\"en\">\n";
   #print "<head>\n";
   #print "      <title>Motif Search on Penguin</title>\n";
   #print "      <link type=\"text/css\" href=\"jss/style.css\" rel=\"stylesheet\" />\n";
   #print "</head>\n";
   #print "<body>\n";


   #print topMessage first TOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGE
   $result = "\n";
   $result .= $topMessage;
   $result .= "<p>&nbsp;</p>\n";
   #end of topMessage      TOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGETOPMESSAGE

   #this will contain the complete code for the full result table, sorted by Motif
   my $resultTableByMotif = "<table class=\"detailT\" title=\"clover results\">\n";

   open (TMPCLOVERRESULTFILE, "$tmpCloverResultFile")
           or printError ("Error opening tmpCloverResultFile ($tmpCloverResultFile)", $debugInfo, $testing);
   #print "<pre>\n";
   #print "<table align=\"center\" border=\"3\" title=\"clover results\">\n";
   my $gray = 1; #every second line in the table should be gray.
   my $s = 0; #tracks 1st dimension of the fullMotifGeneSummary 2D array
   my @fullMotifGeneSummary; #2D array that holds complete information on all motifs/genes from clover results
   while(<TMPCLOVERRESULTFILE>) {
      my $line = $_;
      $line =~ s/$1//g if m/(\r\n?|\n\r?)/; #instead of chomp
      # This is why people hate Perl, but this is also why Perl is powerful. This RegEx will match and parse the whole ID line!!!
      if ($line =~ m/^>(.*)\|(.*)\|tss:(.*)\|chr:chr(.*)\|strand:(.*)\|longest:(.*)\|species:(.*)\|genomeVersion:(.*)/) {
         $geneName = $1;
         $geneID = $2;
         $tss = $3;
         $chr = $4;
         $strand = $5;
         $longest = $6;
         $species = $7;
         $genomeVersion = $8;

         $resultTableByMotif .= "<tr><td colspan=\"7\">\n";
         $resultTableByMotif .= "<h2><hr><br>$geneName</h2><h5>($geneID) Chromosome:$chr Strand:$strand TSS:" . commify($tss) . "</h5>\n";
         $resultTableByMotif .= "</td></tr>\n";
         $resultTableByMotif .= "<tr>\n";
         $resultTableByMotif .= "<th><strong>Motif</strong></th>\n";
         $resultTableByMotif .= "<th><strong>Start</strong></th>\n";
         $resultTableByMotif .= "<th><strong>Stop</strong></th>\n";
         #$resultTableByMotif .= "<td><b>Strand</b></td>\n";
         $resultTableByMotif .= "<th><strong>AbsStart</strong></th>\n";
         $resultTableByMotif .= "<th><strong>AbsStop</strong></th>\n";
         $resultTableByMotif .= "<th><strong>Sequence</strong></th>\n";
         $resultTableByMotif .= "<th><strong>Score</strong></th>\n";
         $resultTableByMotif .= "</tr>\n";
         #undef $geneName; undef $geneID; undef $tss; undef $chr; undef $strand; undef $longest; undef $species; undef $genomeVersion;

         $gray = 1;
      }
      elsif ($line =~ m/^(.*)\s+(\d+)\s+-\s+(\d+)\s+([\+-])\s+(\w+)\s+([-+]?[0-9]*\.?[0-9]+)/) {
         my $motif = $1;
         my $start = $2;
         my $stop = $3;
         #my $strand = $4; # have this from the ID line
         my $absStart;
         my $absStop;
         my $sequence = $5;
         my $score = $6;

         #cleanup for Jaspar   ###############
         $motif =~ s/\s+$//g;
         $motif =~ s/^\s+//g;
         #####################################
         #print "Motif=\.$motif\.<br>\n";

         if ($strand eq "+") {
            $start -= $beforeTss;
            $stop -= $beforeTss;
            $absStart = $tss + $start;
            $absStop = $tss + $stop;
         }
         elsif ($strand eq "-") {
            $start -= $beforeTss;
            $stop -= $beforeTss;
            $absStart = $tss - $start;
            $absStop = $tss - $stop;
         }

         my $colorCode;
         if ($gray) {
            $colorCode = "bgcolor=\"#c0c0c0\"";
            $gray = 0;
         }
         else {
            $gray++;
         }
         #add motif/gene information to the $resultTableByMotif to be displayed later   ########################################
         $resultTableByMotif .= "<tr>\n";
         $resultTableByMotif .= "<td>$motif</pre></td>\n";
         $resultTableByMotif .= "<td><pre>" . commify($start) . "</pre></td>\n";
         $resultTableByMotif .= "<td><pre>" . commify($stop) . "</pre></td>\n";
         #$resultTableByMotif .= "<td><b>$strand</b></td>\n";
         $resultTableByMotif .= "<td><pre>" . commify($absStart) . "</pre></td>\n";
         $resultTableByMotif .= "<td><pre>" . commify($absStop) . "</pre></td>\n";
         $resultTableByMotif .= "<td><pre>$sequence</pre></td>\n";
         $resultTableByMotif .= "<td><pre>$score</pre></td>\n";
         $resultTableByMotif .= "</tr>\n";
         #######################################################################################################################
         #add information to the grandSummaryArray for motif/gene statistics #####
         #########################################################################
         #$fullMotifGeneSummary[$s][0] = $geneName; #geneName
         #$fullMotifGeneSummary[$s][1] = $motif; #motif
         #$fullMotifGeneSummary[$s][2] = $score; #motif score
         $s++;
         $totalMotifCount{$motif}++; #all occurances for each motif
         $totalMotifScore{$motif} += $score; #all occurances for each motif
         $totaGeneCount{$geneName} = 1; #not sure what to do with this yet
            my $motifAndGene = $motif . "\@" . $geneName;
         $geneMotifHash{$motifAndGene}++; # gene@motif combo - used later to count numbef of genes that have each motif
         #########################################################################

      }
      else {
         #tmp
         print "ELSE LINE:$line<br>\n" if $testing;
      }

      #print "$line\n";
   }
   close TMPCLOVERRESULTFILE;
   $resultTableByMotif .= "</table>";
   #print "</pre>\n";
   #print "$resultTableByMotif"; moved to the bottom of the sub here




   ### Analyze all matches and calculate statistics for the summary-by-motif table, then print the table #############################
   ###################################################################################################################################
   ###################################################################################################################################
   $result .= "<h1 >Summary by motif</h1>\n";

   # count number of genes in which each motif is found (convoluted)
   foreach my $key (keys %geneMotifHash) {
      my $tmpMotif = $key;
      $tmpMotif =~ s/\@.*//;
      $genesWithMotif{$tmpMotif}++;
   }

   #assemble 2D array with the summary per motif
   #$motifSummaryArray[$m][0] == motif
   #$motifSummaryArray[$m][1] == TF
   #$motifSummaryArray[$m][2] == Occurences (total)
   #$motifSummaryArray[$m][3] == genes (x of y)
   #$motifSummaryArray[$m][4] == percent
   #$motifSummaryArray[$m][5] == Average motif score
   #$motifSummaryArray[$m][6] == Motifs per gene 1
   #$motifSummaryArray[$m][7] == Motifs per gene 2
   #$motifSummaryArray[$m][8] == Motifs per gene 3
   #$motifSummaryArray[$m][9] == Motifs per gene 4
   #$motifSummaryArray[$m][10] == Motifs per gene 5+
   my @motifSummaryArray;
   my $m =0;
   my $totalNumberOfMotifs = keys (%totalMotifCount); #total number of unique motifs we found
   foreach my $motifName (keys %totalMotifCount) {
      #Prepare motif name with no number for matching with the motif info from the .dat file (for TRANSFAC only, it will not affect Jaspar)
      my $motifNameOnly = $motifName;
      my $motifNumber;
      my $motifToPrint;
      # if we are using TRANSFAC
         $motifNameOnly =~ s/\s+(.*)//;
         $motifToPrint = "$motifNameOnly $1";
         #print "motifName = \.$motifName\. motifNameOnly = \.$motifNameOnly\.<br>\n";


      if($motifPubmedLink{$motifNameOnly}) {
         $motifSummaryArray[$m][0] = "<a href=\"$motifPubmedLink{$motifNameOnly}\" target=\"new\">$motifToPrint</a>";
      }
      else {
         $motifSummaryArray[$m][0] = "$motifToPrint"; #motif
      }
      $motifSummaryArray[$m][1] = $motifTF{$motifNameOnly}; #occurences
      $motifSummaryArray[$m][2] = $totalMotifCount{$motifName}; #occurences
      $motifSummaryArray[$m][3] = "$genesWithMotif{$motifName} of  $totalSequencesUsed"; #genes
      $motifSummaryArray[$m][4] =  sprintf("%.0f", ($genesWithMotif{$motifName} /  $totalSequencesUsed) * 100); #genes percent
      $motifSummaryArray[$m][5] =  sprintf("%.1f", $totalMotifScore{$motifName} /  $totalMotifCount{$motifName}); #genes percent

      $motifSummaryArray[$m][11] =  $motifNameOnly; #For Full info display below

      # finally, fill out the motifs-per-gene columns
      # geneMotifHash contains "motif@gene" as a key. First remove gene for matching
      foreach my $motifGene (keys %geneMotifHash) {
         my $tmpMotifOnly = $motifGene;
         $tmpMotifOnly =~ s/\@.*//;
         #print "tmpMotifOnly=$tmpMotifOnly and motifName=$motifName<br>\n";
         if ($tmpMotifOnly eq $motifName) {
            #print "SAME!<br>\n";
            $motifSummaryArray[$m][6]++  if($geneMotifHash{$motifGene} == 1);  #Motifs per gene 1
            $motifSummaryArray[$m][7]++  if($geneMotifHash{$motifGene} == 2);  #Motifs per gene 2
            $motifSummaryArray[$m][8]++  if($geneMotifHash{$motifGene} == 3);  #Motifs per gene 3
            $motifSummaryArray[$m][9]++  if($geneMotifHash{$motifGene} == 4);  #Motifs per gene 4
            $motifSummaryArray[$m][10]++ if($geneMotifHash{$motifGene} > 4); #Motifs per gene 5+

         }

         #$genesWithMotif{$tmpMotif}++;
      }



      $m++
   }



   $result .=  <<HTML;
<table class="resultT" title="summaryByMotif">
      <tr>
         <td class="resultT" colspan="7">
            <h3>Total of $totalNumberOfMotifs unique motifs found<br>Promoter regions of $totalSequencesUsed genes searched</h3>
         </td>
         <td class="resultT" colspan="5">
            <h3>Motifs per gene</h3>
         </td>
      </tr>
      <tr>
         <th class="tableT" >
            Full
         </th>
         <th class="tableT" >
            Motif
         </th>
         <th class="tableT" >
            TF
         </th>
         <th class="tableT" >
            Occurrences
         </th>
         <th class="tableT" >
            Genes
         </th>
         <th class="tableT" >
            %
         </th>
         <th class="tableT" >
            AvScore
         </th>
         <th class="tableT" >
            1
         </th>
         <th class="tableT" >
            2
         </th>
         <th class="tableT" >
            3
         </th>
         <th class="tableT" >
            4
         </th>
         <th class="tableT" >
            5+
         </th>
      </tr>

HTML

   #sort array here as needed
   @motifSummaryArray = sort { $b->[2] <=> $a->[2]} @motifSummaryArray;

   $m = 0;
   while ($motifSummaryArray[$m][0]) {
      $result .= "<tr class\"dd\">\n";
      $result .= "<td class=\"resultT\" align=\"center\"><input type=\"button\" onclick=\"alert(\'$fullMotifInfo{$motifSummaryArray[$m][11]}\')\" value=\"Info\" /></td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][0] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][1] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][2] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][3] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][4] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][5] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][6] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][7] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][8] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][9] </td>\n";
      $result .= "<td class=\"resultT\">$motifSummaryArray[$m][10] </td>\n";
      $result .= "</tr>\n";
      $m++;
   }
   $result .= "</table>";
   ###################################################################################################################################
   ###################################################################################################################################



   #now print the table with motif/gene details
   $result .= "<p>&nbsp;</p>";
   $result .= "<h1>Detailed results for each gene</h1>\n";
   $result .= "$resultTableByMotif";




   #tmptesttmptesttmptest###############
   #####################################
   #print "<pre>";
   #foreach my $key (keys %totalMotifCount) {
   #   print "Motif $key has total of $totalMotifCount{$key} occurences in $genesWithMotif{$key} (out of $totalSequencesUsed genes) \n";
   #}
   #foreach my $key (keys %geneMotifHash) {
   #   print "Motif-Gene $key \n";
   #}
   #print "<pre>";
   #####################################
   #####################################


   # wrap up the file
   #$result .="      </body>\n";
   #$result .="</html>\n";



   # open the saarchForm.html file
   open (RESULTTEMPLATEFILE, "$resultTemplateFile")
           or die "Error opening $resultTemplateFile";
   while(<RESULTTEMPLATEFILE>) {
     my $tmpLine = $_;
     $tmpLine =~ s/$1//g if m/(\r\n?|\n\r?)/; #instead of chomp
      if($tmpLine eq "<!-- CONTENT HERE -->") {
         print "$result\n";
      }
      else {
         print "$tmpLine\n";
      }
   }
   close RESULTTEMPLATEFILE
           or die "Error closing $resultTemplateFile";










}
### END parseCloverResults()  ##################################################

sub connectDB($) {
# We can now connect to the DB #################################################
my $dbName = shift; #store db name passed to the function into a variable dbName
#print "Connecting to the database<br>\n" if $testing;
$dbh = DBI->connect ("dbi:mysql:$dbName", 'peter', 'peter')
   or printError("Error in connecting $DBI::err ($DBI::errstr)", 1, 1);
#print  "Successfully connected<br>\n" if $testing;
################################################################################

}

sub disconnectDB() {
# All done so close the DB connection ##########################################
#print "Disconnecting<br>\n" if $testing;
$dbh->disconnect ()
   or printError("In disconnecting $DBI::err ($DBI::errstr),", 1, 1);
#print  "Successfully disconnected<br>\n"  if $testing;
################################################################################
}

### #comify() ##################################################################
### takes a number, returns same number as comma separated for thousands #######
################################################################################
sub commify($) {
  my $text = reverse $_[0];
  $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
  return scalar reverse $text;
} # commify
###  END comify()################################################################