#!/usr/bin/perl -w

#$CGI_VERSION = "(get_seq_www_results.cgi v 0.1 beta 1a)";

BEGIN{
    print "Content-type: text/html\n\n";

    # Ensure that errors will go to the web browser.
    open(STDERR, ">&STDOUT");
    $| = 1;
    print '';
}

use FileHandle;
use IPC::Open3;

use CGI;
# The CGI module is available from 
# http://www.genome.wi.mit.edu/ftp/distribution/software/WWW/

my $DATA_FILE = "holdingClones\.100219\.txt"; 
my $KIAA_FILE = "KIAA_clone\.20070830\.txt";
my $GATEWAY_FILE = "gateway_clone\.20100917\.txt";
my $MOUSE_FILE = "fantom3set_20090727\.txt";

#my $PRINT_TAG = "maker\taccession\tmaker_id\tRIKEN_cloneid\tvector\tTUID\tRelated_TUID\tgene\tGeneID\tprotein_id\tunigene_id\tunigene_title\tcell_line\tcell_type\ttissue_type\tclone_lib\tlab_host\trefseq_nuc\trefseq_pep\tstatus";
my $PRINT_TAG = "type\tmaker\taccession\tmaker_id\tclone_origin\tRIKEN_cloneid\tvector\tTUID\tRelated_TUID\tgene\tGeneID\tprotein_id\tunigene_id\tunigene_title\tcell_line\tcell_type\ttissue_type\tclone_lib\tlab_host\trefseq_nuc\trefseq_pep\trefseq_status\tstatus\tident\tgident\toverlap\tq_len\tq_start\tq_stop\ts_len\ts_start\ts_stop\tseq_verify";

main();

sub main {

  $query = new CGI;

  #if ($query->param('Reverse Complement Sequence')) {
  get_seq($query);
  #}

}

sub get_seq {
  my ( $query ) = @_;
  
  my @input_data = ();
  my $search_flgs = -1;
  my $human_flg = -1;
  my %flgs = ();
  my @names = $query->param;
  for (@names) {
    if ( /^INPUT_TEXT$/ ) {
      my $val = $query->param($_);
      my $Zspace = '(?:\xA1\xA1)';       # EUC-JP の全角スペース
      my $Zspace_sjis = '(?:\x81\x40)';  # SJIS の全角スペース
      @input_data = split(/[\s|$Zspace|$Zspace_sjis]+/, $val);
    }
    elsif ( /^HUMAN_FLG$/) {
      $human_flg = $query->param($_);
    }
    elsif ( /^SEARCH_FLG$/) {
      $search_flgs = $query->param($_);
    }
    elsif ( /_CHECK$/) {
      $flgs{$_} = $query->param($_);
    }
  }
  my $input_size = $#input_data;
  if ($input_size < 0) {
    print "error : no input data<br>\n";
    return;
  }

  # grep
  my @grep_res = ();
  # and 検索
  if ($search_flgs == 1) {
    my $grep_str = "";
    foreach my $val (@input_data) {
      my $buf = $val;
      $buf =~ s/\*//g;
      $grep_str .= " \| grep -i $buf";
    }
    # holdingClones の grep 結果取得
	my $cmd;
	if ( $human_flg == 1 ) {
    	$cmd = "cat $DATA_FILE $grep_str";
	}
	elsif ( $human_flg == 0 ) {
    	$cmd = "cat $MOUSE_FILE $grep_str";
	}
	else {
		die "ERROR: Couldn't find \$HUMAN_FLG\n";
	}
    my $childout = &getData($cmd);
    while ($cline = $childout->getline) {
      chomp($cline);
      push @grep_res, $cline;
    }
	
    # KIAA Clones の grep 結果取得
    $cmd = "cat $KIAA_FILE $grep_str";
    $childout = &getData($cmd);
    while ($cline = $childout->getline) {
      chomp($cline);
      push @grep_res, $cline;
    }
    # gatewayClones の grep 結果取得
    $cmd = "cat $GATEWAY_FILE $grep_str";
    $childout = &getData($cmd);
    while ($cline = $childout->getline) {
      chomp($cline);
      push @grep_res, $cline;
    }
  }
  # or 検索
  elsif ($search_flgs == 0) {
    # holdingClones の grep 結果取得
    foreach my $val (@input_data) {
      $val =~ s/\*//g;
      $grep_str = " \| grep -i $val";
      my $cmd;
		if ( $human_flg == 1 ) {
    		$cmd = "cat $DATA_FILE $grep_str";
		}
		elsif ( $human_flg == 0 ) {
    		$cmd = "cat $MOUSE_FILE $grep_str";
		}
		else {
			die "ERROR: Couldn't find \$HUMAN_FLG\n";
		}
      my $childout = &getData($cmd);
      while ($cline = $childout->getline) {
        chomp($cline);
        push @grep_res, $cline;
      }
    }

	if ( $human_flg == 1 ) {
    # KIAA Clones の grep 結果取得
    foreach my $val (@input_data) {
      $val =~ s/\*//g;
      my $cmd = "cat $KIAA_FILE \| grep -i $val";
      my $childout = &getData($cmd);
      while ($cline = $childout->getline) {
        chomp($cline);
        push @grep_res, $cline;
      }
    }
    # gatewayClones の grep 結果取得
    foreach my $val (@input_data) {
      $val =~ s/\*//g;
      my $cmd = "cat $GATEWAY_FILE \| grep -i $val";
      my $childout = &getData($cmd);
      while ($cline = $childout->getline) {
        chomp($cline);
        push @grep_res, $cline;
      }
    }
	}
  }

  # 重複を削除
  {
    my %tmp;
    @grep_res = sort(grep( !$tmp{$_}++, @grep_res ));
  }
  
  # holdingClones と gatewayClones はカラム数が違うので gatewayClones にそろえる
  if ( $#grep_res < 0 ) {
	print "Your search - @input_data - did not match any clones\n";
	exit;
  }
  for (my $i = 0; $i <= $#grep_res; $i++) {
    my @splits = split(/\t/, $grep_res[$i]);
    #my $column_num = $#splits;
    my $column_num = 0;
    my $buf = $grep_res[$i];
    while ($buf =~ /\t/) {
      $buf = $';
      $column_num++;
    }
    
    # 更新データ取得
    my $new_data = "";
    #if ($column_num == 19 ) {
    if ($splits[4] =~ /^p/ ) { #holdingClones
      $new_data = "cDNA clone";
      for (my $j = 0; $j <= $#splits; $j++) {
		# alignment statusが無い分追加
        $new_data .= "\t"x9 if ($j == 20);

        $new_data .= "\t$splits[$j]";
        # clone_origin が無い分追加
        $new_data .= "\t" if ($j == 2);
		# refseq_statusが無い分追加
        $new_data .= "\t" if ($j == 18);
      }
      # seq_verifyが無い分追加
      $new_data .= "\t";
    } elsif ( $splits[3] =~ /^P|^R0|^Z|^F/ ) { #fantom3set
      $new_data = "cDNA clone";
      for (my $j = 0; $j <= $#splits; $j++) {
		if ( $j == 2 || $j == 12 ) {
        	$new_data .= "\t$splits[$j]\t";
			next;
		}
		elsif ( $j == 4 ) {
        	$splits[$j] =~ s/lambda ZAP-III/modified Bluescript1/;
        	$splits[$j] =~ s/lambda F/pF/;
        	$new_data .= "\t$splits[$j]";
			next;
		}
		elsif ( $j == 6 ) {
        	$new_data .= "\t$splits[$j]";
			next;
		}
		elsif ( $j == 5 || $j == 15 ) {
			next;
		}
		
        $new_data .= "\t$splits[$j]";

      }
      # seq_verifyが無い分追加
      $new_data .= "\t";
    } else {
      # gatewayClones のデータ
      $new_data = "Entry clone\t" . $grep_res[$i];
    }

    # データ更新
    $grep_res[$i] = $new_data;
  }
  
  # 検索条件で絞込み
  my @fin_res = ();
  foreach my $one (@grep_res) {
    my @splits = split(/\t/, $one);
    my $checked_flg = 1;
    # 入力された文字で絞り込む
    foreach my $input_one (@input_data) {
      my $check_str = $input_one;
      my $checked_flg_one = 0;
      # "*" があればあいまい一致 / なければ完全一致
      my $check_flg = 0;
      if ($check_str =~ /\*$/) {
        $check_str = $`;
        $check_flg = 1;   # あいまい一致
      }
      
      # チェックされた項目どれかに一致すればOK
      foreach my $flg_one (keys %flgs) {
        my $iFlg = &check2header($flg_one);
        if ($splits[$iFlg]) {
          if ($check_flg == 0) {
            # 完全一致
            #$checked_flg_one++ if ($splits[$iFlg] eq $check_str);
            $checked_flg_one++ if ($splits[$iFlg] =~ /$check_str/i);
          } else {
            # あいまい一致
            $checked_flg_one++ if ($splits[$iFlg] =~ /$check_str/i);
          }
        }
      }
      # and 検索
      if ($search_flgs == 1) {
        $checked_flg *= $checked_flg_one;
      }
      # or 検索
      elsif ($search_flgs == 0) {
        $checked_flg += $checked_flg_one;
      }
    }
    # and 検索
    if ($search_flgs == 1) {
      push @fin_res, $one if ($checked_flg> 0);
    }
    # or 検索
    elsif ($search_flgs == 0) {
      push @fin_res, $one if ($checked_flg> 1);
    }
  }
  
  
  print <<HEAD;
<HTML>
<HEAD>
<TITLE>result</TITLE>
<style type="text/css">
<!--
BODY { font-family: "Arial";
}

TH { background-color: #DCDCDC;
     font-size: 9pt;
}
TD { font-size: 9pt;}
.cDNA_style { background-color: white; }
.cDNA_KIAA_style { background-color: #EEDDDD; }
.Entry_style { background-color: #DDDDEE; }
-->
</style>
</HEAD>

<BODY>
<table border=1 cellspacing=0 cellpadding=3 bordercolordark="#ffffff" bordercolor="#777777">
HEAD

  my $tag_num = 0;
  print "<TR>\n";
  foreach my $one (split(/\t/, $PRINT_TAG)) {
    $tag_num++;
    print "\t<TH>$one</TH>\n";
  }
  print "</TR>\n";
  foreach my $one (@fin_res) {
    print "<TR>\n";
    my @splits = split(/\t/, $one);
    for (my $i = 0; $i < $tag_num; $i++) {
      $splits[$i] = "-" if ((!$splits[$i]) or $splits[$i] =~ /^\s+$/);
      if ($i == 0) {
        if ($splits[$i] =~ /^cDNA/) {
          if ($splits[1] eq "KIAA") {
            print "\t<TD class=\"cDNA_KIAA_style\">";
          } else {
            print "\t<TD class=\"cDNA_style\">";
          }
        } else {
          print "\t<TD class=\"Entry_style\">";
        }
      } else {
        print "\t<TD>";
      }
      if ($splits[$i] ne "-" and $i == 2) {       # Link accession
		foreach my $acc ( split(/,/,$splits[$i]) ) {
        	#print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$splits[$i]\" target=\"_blank\">";
        	print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$acc\" target=\"_blank\">$acc</a><br>";
		}
		next;
      } elsif ($splits[$i] ne "-" and $i == 10) {  # Link GeneID
        print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=$splits[$i]\" target=\"_blank\">";
      } elsif ($i == 18 ) {
	#if ( $splits[0] eq "Entry clone" ) {
    #	$splits[$i] = "DH10B";
	#	print "<font color=\"#FF0000\">";
	#}
	if ( $splits[$i] eq "-" ) {
	#elsif ( $splits[$i] eq "-" ) {
		if ( $splits[1] eq "FLJ" ) {
			$splits[$i] = "EK1";
		}
		elsif ( $splits[1] eq "MGC" ) {
			$splits[$i] = "DH10B";
		}
		elsif ( $splits[1] eq "KIAA" ) {
			$splits[$i] = "DH5a";
		}
		elsif ( $splits[1] eq "RIKEN" ) {
			$splits[$i] = "DH10B";
		}
		elsif ( $splits[1] eq "IKAKEN" ) {
			$splits[$i] = "DH10B";
		}
		print "<font color=\"#FF0000\">";
	}
      } elsif ($splits[$i] ne "-" and $i == 19) { # Link refseq_nuc
        print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$splits[$i]\" target=\"_blank\">";
      } elsif ($splits[$i] ne "-" and $i == 20) { # Link refseq_pep
        print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Protein&cmd=search&term=$splits[$i]\" target=\"_blank\">";
      }
      print "$splits[$i]</TD>\n";

      # print "\t<TD>$splits[$i]</TD>\n";
    }
    print "</TR>\n";
  }

  print <<FOOT;
</TABLE>
</BODY>
</HTML>
FOOT
}



sub getData {
  my ($cmd) = @_;

  my $primer3_pid;
  my ($childin, $childout) = (FileHandle->new, FileHandle->new);
  {
    $primer3_pid = open3($childin, $childout, $childout, $cmd);
  }
  if (!$primer3_pid) {
    print "Cannot excecure $cmd:<br>$!";
    exit;
  }

  return $childout;
}


sub check2header {
  my ($str) = @_;

  if ($str eq "ACCESSION_CHECK") {
    return 2;
  } elsif ($str eq "RIKEN_CHECK") {
    return 5;
  } elsif ($str eq "TUID_CHECK") {
    return 7;
  } elsif ($str eq "RELATED_CHECK") {
    return 8;
  } elsif ($str eq "GENE_CHECK") {
    return 9;
  } elsif ($str eq "GENEID_CHECK") {
    return 10;
  } elsif ($str eq "PROTEIN_CHECK") {
    return 11;
  } elsif ($str eq "UNIGENE_CHECK") {
    return 12;
  }
}