#!/usr/bin/perl -w # samster2.pl # Copyright 2002-2005 Charlie Kim # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # For a copy of the GNU General Public License, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # This program uses the following modules # Perl/Tk 800.023 Copyright (c) 1995-1996 Nick Ing-Simmons. # http://starbase.neosoft.com/%7Eclaird/comp.lang.perl.tk/ptkFAQ.html # Spreadsheet-ParseExcel-Simple 0.02 Copyright (C) 2001 Tony Bowden. # Revision 1.4 to 1.5 Feb 2004 # Updated empty column problem to accommodate stricter Cluster 3 # data input requirement # Revision 2.0 June 2005 # Updated to work with SAM 2.0 use strict; use Spreadsheet::ParseExcel::Simple; use Tk; use Tk::LabEntry; use Tk::LabFrame; use Tk::ErrorDialog; use Tk::NoteBook; &maingui; sub maingui { my $xlsfile; my $xlsposout = 1; my $xlsnegout = 1; my $mw = MainWindow->new(-title=>'SAMster 2.0'); $mw->Label(-text=>"Copyright 2002-2005 Charlie Kim GNU General Public License\n")->pack(); my $nb = $mw->NoteBook()->pack(-expand=>1, -fill=>'both', ); my $xls_p = $nb->add('excel', -label => 'Analyze Excel File'); my $txt_p = $nb->add('text', -label => 'Analyze Text Files'); # $xls_p = MainWindow->new(-title=>'SAMster'); my $xlsinputfr = $xls_p->LabFrame(-relief=>'groove', -label=>'Load File', -labelside=>'acrosstop', )->pack(-side=>'top', -anchor=>'nw', -fill=>'both', ); my $xlsfile_e = $xlsinputfr->LabEntry(-label=>'SAM-Analyzed Excel File', -labelPack=> [qw/-side left/], -background=> 'white', -textvariable=> \$xlsfile, -width=>30, )->pack(-side=>'left', -pady=>3, ); $xlsinputfr->Button(-text=>'Browse', -command=> [ \&selectsinglefile , \$xls_p, \$xlsfile_e ], -borderwidth=>1, -padx=> 0, -pady=> 0, )->pack(-side=>'left', ); # SAM Output options my $xlsopt = $xls_p->LabFrame(-label=>'Options', -labelside=>'acrosstop', )->pack(-anchor=>'nw'); $xlsopt->Checkbutton(-text=> 'Retrieve Positive Genes', -variable=> \$xlsposout, )->pack(-anchor=>'nw'); $xlsopt->Checkbutton(-text=> 'Retrieve Negative Genes', -variable=> \$xlsnegout, )->pack(-anchor=>'nw'); # Command Buttons my $xlsmw12 = $xls_p->LabFrame(-label=>'Commands', -labelside=>'acrosstop', )->pack(-anchor=>'nw',); $xlsmw12->Button(-text=>'Run SAMster', -command=> [\&samster_xls,\$xls_p,\$xlsfile,\$xlsposout,\$xlsnegout], -state=> 'normal', )->grid(-row=>12, -column=>0, -padx=>20, -pady=>10, ); $xlsmw12->Button(-text=>"Exit", -command=> sub { exit }, )->grid(-row=>12, -column=>2, -padx=>20, ); my $samfile; my $txtposout = 1; my $txtnegout = 1; my $txtinputfr = $txt_p->LabFrame(-relief=>'groove', -label=>'Load Files', -labelside=>'acrosstop', )->pack(-side=>'top', -anchor=>'nw', -fill=>'both', ); my $samfile_e = $txtinputfr->LabEntry(-label=>'SAM-Analyzed Output File', -labelPack=> [qw/-side left/], -background=> 'white', -textvariable=> \$samfile, -width=>30, )->grid(-row=>0, -column=>0, -sticky=>'ne', ); $txtinputfr->Button(-text=>'Browse', -command=> [ \&selectsinglefile , \$txt_p, \$samfile_e ], -borderwidth=>1, -padx=> 0, -pady=> 0, )->grid(-row=>0, -column=>1, ); my $rawfile; my $rawfile_e = $txtinputfr->LabEntry(-label=>'Raw Ratios Text File', -labelPack=> [qw/-side left/], -background=> 'white', -textvariable=> \$rawfile, -width=>30, )->grid(-row=>1, -column=>0, -sticky=>'ne', ); $txtinputfr->Button(-text=>'Browse', -command=> [ \&selectsinglefile , \$txt_p, \$rawfile_e ], -borderwidth=>1, -padx=> 0, -pady=> 0, )->grid(-row=>1, -column=>1, ); # SAM Output options my $txtopt = $txt_p->LabFrame(-label=>'Options', -labelside=>'acrosstop', )->pack(-anchor=>'nw'); $txtopt->Checkbutton(-text=> 'Retrieve Positive Genes', -variable=> \$txtposout, )->pack(-anchor=>'nw'); $txtopt->Checkbutton(-text=> 'Retrieve Negative Genes', -variable=> \$txtnegout, )->pack(-anchor=>'nw'); # Command Buttons my $txtmw12 = $txt_p->LabFrame(-label=>'Commands', -labelside=>'acrosstop', )->pack(-anchor=>'nw',); $txtmw12->Button(-text=>'Run SAMster', -command=> [\&samster_txt,\$txt_p,\$samfile,\$rawfile,\$txtposout,\$txtnegout], -state=> 'normal', )->grid(-row=>12, -column=>0, -padx=>20, -pady=>10, ); $txtmw12->Button(-text=>"Exit", -command=> sub { exit }, )->grid(-row=>12, -column=>2, -padx=>20, ); MainLoop; } sub samster_xls { my $mwref = shift; my $mw = $$mwref; my $xlsfileref = shift; my $xlsfile = $$xlsfileref; my $posoutref = shift; my $posout = $$posoutref; my $negoutref = shift; my $negout = $$negoutref; my $skipcount = 0; my %outids; my @orderids; my $xls = Spreadsheet::ParseExcel::Simple->read($xlsfile); # get "SAM Output" sheet & read ID's foreach my $sheet ($xls->sheets) { next unless $sheet->{'sheet'}{'Name'} eq 'SAM Output'; my $posflag = 0; my $negflag = 0; while ($sheet->has_data) { my @data = $sheet->next_row; # print "$data[0]\n"; if ($data[0] && $data[0] =~ /Positive genes/) { $posflag = 1; $negflag = 0; # print "\tFound pos\n"; next; } if ($data[0] && $data[0] =~ /Negative genes/) { $posflag = 0; $negflag = 1; # print "\tFound neg\n"; next; } next unless ($posflag + $negflag > 0); next unless $data[2]; last if $data[2] eq 'Quantiles'; next if $data[2] eq 'Gene ID'; if ($posflag && $posout) { next if !$data[2]; # print "\t$data[2]\n"; $outids{$data[2]} = 1; push @orderids, $data[2]; } if ($negflag && $negout) { next if !$data[2]; # print "\t$data[2]\n"; $outids{$data[2]} = 1; push @orderids, $data[2]; } } } # get raw data from datasheet, write to output file my $datawidth = 0; foreach my $sheet ($xls->sheets) { next if $sheet->{'sheet'}{'Name'} =~ /Sam Imputed Dataset/i; next if $sheet->{'sheet'}{'Name'} =~ /SAM Plot/i; next if $sheet->{'sheet'}{'Name'} =~ /SAM Output/i; next if $sheet->{'sheet'}{'Name'} =~ /SAM Work \(Do not edit\!\)/i; next unless $sheet->has_data; my %datalines; my $defaultout = $xlsfile; $defaultout =~ s/\.\w{3}$/\./; $defaultout =~ s/\//\\/g; $defaultout .= 'txt'; my $outfile = $mw->getSaveFile(-initialfile=> $defaultout, -defaultextension=> '.txt', ); open(OUTFILE,">$outfile") or die "Couldn't open output file\n"; my @data = $sheet->next_row; $datawidth = $#data if ($#data > $datawidth); my $header; $header .= $data[0] if $data[0]; $header .= 'UNIQID' if !$data[0]; for (my $c = 1; $c <= $#data; $c++) { $data[$c] = 'NAME' if ($c == 1 && !$data[$c]); $header .= "\t$data[$c]"; } $header =~ /\w(\t+)$/; my $deletetabs = $1; if ($deletetabs) { $header =~ s/$deletetabs//; } print OUTFILE "$header\n"; while ($sheet->has_data) { my @data = $sheet->next_row; $datawidth = $#data if ($#data > $datawidth); next if !$data[0]; print "$data[0]\n"; if ($outids{$data[0]}) { $datalines{$data[0]} = $data[0]; for (my $c = 1; $c <= $#data; $c++) { $datalines{$data[0]} .= "\t$data[$c]"; } # delete lots of extra tabs if ($deletetabs) { $datalines{$data[0]} =~ s/$deletetabs//; } # add in tabs to normalize column width for (my $d = $#data; $d < $datawidth; $d++) { $datalines{$data[0]} .= "\t"; } } } foreach (@orderids) { if (!$datalines{$_}) { $skipcount++; next; } print OUTFILE "$datalines{$_}\n"; } } close OUTFILE; $mw->bell; if ($skipcount) { $mw->messageBox(-title=>'Warning', -message=> "$skipcount ID's could not be matched and were excluded from the output", -type=>'OK'); } $mw->messageBox(-title=>'Finished', -message=>'Program Complete', -type=>'OK'); } sub selectsinglefile { my $mainwindowref = shift; my $mainwindow = $$mainwindowref; my $entryref = shift; my $entry = $$entryref; my $opentypes = [ "{All files} * ", "{PCL files} {.pcl} ", "{CDT files} {.cdt} ", "{Text files} {.txt} ", ]; my $file = $mainwindow->getOpenFile(-filetypes=>$opentypes); if (defined $file and $file ne '') { $entry->delete(0, 'end'); $entry->insert(0, $file); $entry->xview('end'); } } sub samster_txt { my $mwref = shift; my $mw = $$mwref; my $samfileref = shift; my $samfile = $$samfileref; my $rawfileref = shift; my $rawfile = $$rawfileref; my $posoutref = shift; my $posout = $$posoutref; my $negoutref = shift; my $negout = $$negoutref; my $skipcount = 0; my %outids; my @orderids; # get "SAM Output" sheet & read ID's my $posflag = 0; my $negflag = 0; open(SAMFILE,$samfile) or die "SAM output file couldn't be opened\n"; while () { chomp; my @data = split(/\t/); if ($data[0]) { if ($data[0] =~ /Positive genes/) { $posflag = 1; $negflag = 0; next; } if ($data[0] =~ /Negative genes/) { $posflag = 0; $negflag = 1; next; } } next unless ($posflag + $negflag > 0); next unless $data[2]; last if $data[2] eq 'Quantiles'; next if $data[2] eq 'Gene ID'; if ($posflag && $posout) { $outids{$data[2]} = 1; push @orderids, $data[2]; } if ($negflag && $negout) { $outids{$data[2]} = 1; push @orderids, $data[2]; } } # get raw data from datasheet, write to output file open(RAWFILE,$rawfile) or die "Couldn't open raw data file\n"; my $defaultout = $rawfile; $defaultout =~ s/\.\w{3}$/\./; $defaultout =~ s/\//\\/g; $defaultout .= 'txt'; my $outfile = $mw->getSaveFile(-initialfile=> $defaultout, -defaultextension=> '.txt', ); open(OUTFILE,">$outfile") or die "Couldn't open output file\n"; chomp($_=); my @data = split(/\t/); my $header; $header .= $data[0] if $data[0]; $header .= 'UNIQID' if !$data[0]; for (my $c = 1; $c <= $#data; $c++) { $data[$c] = 'NAME' if ($c == 1 && !$data[$c]); $header .= "\t$data[$c]"; } $header =~ /\w(\t+)$/; my $deletetabs = $1; if ($deletetabs) { $header =~ s/$deletetabs//; } print OUTFILE "$header\n"; my %datalines; my $datawidth = 0; while() { chomp; my @data = split(/\t/); $datawidth = $#data if ($#data > $datawidth); if ($outids{$data[0]}) { $datalines{$data[0]} = $data[0]; for (my $c = 1; $c <= $#data; $c++) { $datalines{$data[0]} .= "\t$data[$c]"; } if ($deletetabs) { $datalines{$data[0]} =~ s/$deletetabs//; } for (my $d = $#data; $d < $datawidth; $d++) { $datalines{$data[0]} .= "\t"; } } } foreach (@orderids) { if (!$datalines{$_}) { $skipcount++; next; } print OUTFILE "$datalines{$_}\n"; } close OUTFILE; $mw->bell; if ($skipcount) { $mw->messageBox(-title=>'Warning', -message=> "$skipcount ID's could not be matched and were excluded from the output", -type=>'OK'); } $mw->messageBox(-title=>'Finished', -message=>'Program Complete', -type=>'OK'); }