#!/usr/bin/perl -w #$CGI_VERSION = "(get_seq_www_results.cgi v 0.1 beta 1a)"; BEGIN{ print "Content-type: text/html\n\n"; # Ensure that errors will go to the web browser. open(STDERR, ">&STDOUT"); $| = 1; print ''; } use FileHandle; use IPC::Open3; use CGI; # The CGI module is available from # http://www.genome.wi.mit.edu/ftp/distribution/software/WWW/ my $DATA_FILE = "holdingClones\.100219\.txt"; my $KIAA_FILE = "KIAA_clone\.20070830\.txt"; my $GATEWAY_FILE = "gateway_clone\.20100917\.txt"; my $MOUSE_FILE = "fantom3set_20090727\.txt"; #my $PRINT_TAG = "maker\taccession\tmaker_id\tRIKEN_cloneid\tvector\tTUID\tRelated_TUID\tgene\tGeneID\tprotein_id\tunigene_id\tunigene_title\tcell_line\tcell_type\ttissue_type\tclone_lib\tlab_host\trefseq_nuc\trefseq_pep\tstatus"; my $PRINT_TAG = "type\tmaker\taccession\tmaker_id\tclone_origin\tRIKEN_cloneid\tvector\tTUID\tRelated_TUID\tgene\tGeneID\tprotein_id\tunigene_id\tunigene_title\tcell_line\tcell_type\ttissue_type\tclone_lib\tlab_host\trefseq_nuc\trefseq_pep\trefseq_status\tstatus\tident\tgident\toverlap\tq_len\tq_start\tq_stop\ts_len\ts_start\ts_stop\tseq_verify"; main(); sub main { $query = new CGI; #if ($query->param('Reverse Complement Sequence')) { get_seq($query); #} } sub get_seq { my ( $query ) = @_; my @input_data = (); my $search_flgs = -1; my $human_flg = -1; my %flgs = (); my @names = $query->param; for (@names) { if ( /^INPUT_TEXT$/ ) { my $val = $query->param($_); my $Zspace = '(?:\xA1\xA1)'; # EUC-JP の全角スペース my $Zspace_sjis = '(?:\x81\x40)'; # SJIS の全角スペース @input_data = split(/[\s|$Zspace|$Zspace_sjis]+/, $val); } elsif ( /^HUMAN_FLG$/) { $human_flg = $query->param($_); } elsif ( /^SEARCH_FLG$/) { $search_flgs = $query->param($_); } elsif ( /_CHECK$/) { $flgs{$_} = $query->param($_); } } my $input_size = $#input_data; if ($input_size < 0) { print "error : no input data<br>\n"; return; } # grep my @grep_res = (); # and 検索 if ($search_flgs == 1) { my $grep_str = ""; foreach my $val (@input_data) { my $buf = $val; $buf =~ s/\*//g; $grep_str .= " \| grep -i $buf"; } # holdingClones の grep 結果取得 my $cmd; if ( $human_flg == 1 ) { $cmd = "cat $DATA_FILE $grep_str"; } elsif ( $human_flg == 0 ) { $cmd = "cat $MOUSE_FILE $grep_str"; } else { die "ERROR: Couldn't find \$HUMAN_FLG\n"; } my $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } # KIAA Clones の grep 結果取得 $cmd = "cat $KIAA_FILE $grep_str"; $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } # gatewayClones の grep 結果取得 $cmd = "cat $GATEWAY_FILE $grep_str"; $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } } # or 検索 elsif ($search_flgs == 0) { # holdingClones の grep 結果取得 foreach my $val (@input_data) { $val =~ s/\*//g; $grep_str = " \| grep -i $val"; my $cmd; if ( $human_flg == 1 ) { $cmd = "cat $DATA_FILE $grep_str"; } elsif ( $human_flg == 0 ) { $cmd = "cat $MOUSE_FILE $grep_str"; } else { die "ERROR: Couldn't find \$HUMAN_FLG\n"; } my $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } } if ( $human_flg == 1 ) { # KIAA Clones の grep 結果取得 foreach my $val (@input_data) { $val =~ s/\*//g; my $cmd = "cat $KIAA_FILE \| grep -i $val"; my $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } } # gatewayClones の grep 結果取得 foreach my $val (@input_data) { $val =~ s/\*//g; my $cmd = "cat $GATEWAY_FILE \| grep -i $val"; my $childout = &getData($cmd); while ($cline = $childout->getline) { chomp($cline); push @grep_res, $cline; } } } } # 重複を削除 { my %tmp; @grep_res = sort(grep( !$tmp{$_}++, @grep_res )); } # holdingClones と gatewayClones はカラム数が違うので gatewayClones にそろえる if ( $#grep_res < 0 ) { print "Your search - @input_data - did not match any clones\n"; exit; } for (my $i = 0; $i <= $#grep_res; $i++) { my @splits = split(/\t/, $grep_res[$i]); #my $column_num = $#splits; my $column_num = 0; my $buf = $grep_res[$i]; while ($buf =~ /\t/) { $buf = $'; $column_num++; } # 更新データ取得 my $new_data = ""; #if ($column_num == 19 ) { if ($splits[4] =~ /^p/ ) { #holdingClones $new_data = "cDNA clone"; for (my $j = 0; $j <= $#splits; $j++) { # alignment statusが無い分追加 $new_data .= "\t"x9 if ($j == 20); $new_data .= "\t$splits[$j]"; # clone_origin が無い分追加 $new_data .= "\t" if ($j == 2); # refseq_statusが無い分追加 $new_data .= "\t" if ($j == 18); } # seq_verifyが無い分追加 $new_data .= "\t"; } elsif ( $splits[3] =~ /^P|^R0|^Z|^F/ ) { #fantom3set $new_data = "cDNA clone"; for (my $j = 0; $j <= $#splits; $j++) { if ( $j == 2 || $j == 12 ) { $new_data .= "\t$splits[$j]\t"; next; } elsif ( $j == 4 ) { $splits[$j] =~ s/lambda ZAP-III/modified Bluescript1/; $splits[$j] =~ s/lambda F/pF/; $new_data .= "\t$splits[$j]"; next; } elsif ( $j == 6 ) { $new_data .= "\t$splits[$j]"; next; } elsif ( $j == 5 || $j == 15 ) { next; } $new_data .= "\t$splits[$j]"; } # seq_verifyが無い分追加 $new_data .= "\t"; } else { # gatewayClones のデータ $new_data = "Entry clone\t" . $grep_res[$i]; } # データ更新 $grep_res[$i] = $new_data; } # 検索条件で絞込み my @fin_res = (); foreach my $one (@grep_res) { my @splits = split(/\t/, $one); my $checked_flg = 1; # 入力された文字で絞り込む foreach my $input_one (@input_data) { my $check_str = $input_one; my $checked_flg_one = 0; # "*" があればあいまい一致 / なければ完全一致 my $check_flg = 0; if ($check_str =~ /\*$/) { $check_str = $`; $check_flg = 1; # あいまい一致 } # チェックされた項目どれかに一致すればOK foreach my $flg_one (keys %flgs) { my $iFlg = &check2header($flg_one); if ($splits[$iFlg]) { if ($check_flg == 0) { # 完全一致 #$checked_flg_one++ if ($splits[$iFlg] eq $check_str); $checked_flg_one++ if ($splits[$iFlg] =~ /$check_str/i); } else { # あいまい一致 $checked_flg_one++ if ($splits[$iFlg] =~ /$check_str/i); } } } # and 検索 if ($search_flgs == 1) { $checked_flg *= $checked_flg_one; } # or 検索 elsif ($search_flgs == 0) { $checked_flg += $checked_flg_one; } } # and 検索 if ($search_flgs == 1) { push @fin_res, $one if ($checked_flg> 0); } # or 検索 elsif ($search_flgs == 0) { push @fin_res, $one if ($checked_flg> 1); } } print <<HEAD; <HTML> <HEAD> <TITLE>result</TITLE> <style type="text/css"> <!-- BODY { font-family: "Arial"; } TH { background-color: #DCDCDC; font-size: 9pt; } TD { font-size: 9pt;} .cDNA_style { background-color: white; } .cDNA_KIAA_style { background-color: #EEDDDD; } .Entry_style { background-color: #DDDDEE; } --> </style> </HEAD> <BODY> <table border=1 cellspacing=0 cellpadding=3 bordercolordark="#ffffff" bordercolor="#777777"> HEAD my $tag_num = 0; print "<TR>\n"; foreach my $one (split(/\t/, $PRINT_TAG)) { $tag_num++; print "\t<TH>$one</TH>\n"; } print "</TR>\n"; foreach my $one (@fin_res) { print "<TR>\n"; my @splits = split(/\t/, $one); for (my $i = 0; $i < $tag_num; $i++) { $splits[$i] = "-" if ((!$splits[$i]) or $splits[$i] =~ /^\s+$/); if ($i == 0) { if ($splits[$i] =~ /^cDNA/) { if ($splits[1] eq "KIAA") { print "\t<TD class=\"cDNA_KIAA_style\">"; } else { print "\t<TD class=\"cDNA_style\">"; } } else { print "\t<TD class=\"Entry_style\">"; } } else { print "\t<TD>"; } if ($splits[$i] ne "-" and $i == 2) { # Link accession foreach my $acc ( split(/,/,$splits[$i]) ) { #print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$splits[$i]\" target=\"_blank\">"; print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$acc\" target=\"_blank\">$acc</a><br>"; } next; } elsif ($splits[$i] ne "-" and $i == 10) { # Link GeneID print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=full_report&list_uids=$splits[$i]\" target=\"_blank\">"; } elsif ($i == 18 ) { #if ( $splits[0] eq "Entry clone" ) { # $splits[$i] = "DH10B"; # print "<font color=\"#FF0000\">"; #} if ( $splits[$i] eq "-" ) { #elsif ( $splits[$i] eq "-" ) { if ( $splits[1] eq "FLJ" ) { $splits[$i] = "EK1"; } elsif ( $splits[1] eq "MGC" ) { $splits[$i] = "DH10B"; } elsif ( $splits[1] eq "KIAA" ) { $splits[$i] = "DH5a"; } elsif ( $splits[1] eq "RIKEN" ) { $splits[$i] = "DH10B"; } elsif ( $splits[1] eq "IKAKEN" ) { $splits[$i] = "DH10B"; } print "<font color=\"#FF0000\">"; } } elsif ($splits[$i] ne "-" and $i == 19) { # Link refseq_nuc print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&term=$splits[$i]\" target=\"_blank\">"; } elsif ($splits[$i] ne "-" and $i == 20) { # Link refseq_pep print "<a href=\"http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Protein&cmd=search&term=$splits[$i]\" target=\"_blank\">"; } print "$splits[$i]</TD>\n"; # print "\t<TD>$splits[$i]</TD>\n"; } print "</TR>\n"; } print <<FOOT; </TABLE> </BODY> </HTML> FOOT } sub getData { my ($cmd) = @_; my $primer3_pid; my ($childin, $childout) = (FileHandle->new, FileHandle->new); { $primer3_pid = open3($childin, $childout, $childout, $cmd); } if (!$primer3_pid) { print "Cannot excecure $cmd:<br>$!"; exit; } return $childout; } sub check2header { my ($str) = @_; if ($str eq "ACCESSION_CHECK") { return 2; } elsif ($str eq "RIKEN_CHECK") { return 5; } elsif ($str eq "TUID_CHECK") { return 7; } elsif ($str eq "RELATED_CHECK") { return 8; } elsif ($str eq "GENE_CHECK") { return 9; } elsif ($str eq "GENEID_CHECK") { return 10; } elsif ($str eq "PROTEIN_CHECK") { return 11; } elsif ($str eq "UNIGENE_CHECK") { return 12; } }