#!/usr/bin/perl -w # winlock.pl # Copyright 2002-2004 Charlie Kim # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # For a copy of the GNU General Public License, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # This program uses the following modules # Perl/Tk 800.023 Copyright (c) 1995-1996 Nick Ing-Simmons. # http://starbase.neosoft.com/%7Eclaird/comp.lang.perl.tk/ptkFAQ.html # Win32::FileOp 0.11.3 # Module built by : # Jan Krynicky # Bill Luebkert # Mike Blazer # Aldo Calpini # Michael Yamada # GD 1.33 Copyright 1995-2000, Lincoln D. Stein # Perl "Artistic License" # GDGraph 1.33 # Copyright (c) 1999 Martien Verbruggen. All rights reserved. # Statistics-Descriptive 2.4 # Copyright (c) 1997,1998 Colin Kuskie. # Copyright (c) 1998 Andrea Spinelli. # Copyright (c) 1994,1995 Jason Kastner. use strict; use Tk; use Tk::ErrorDialog; use Tk::Scale; use Win32::FileOp; &lockwin; 1; sub lockwin { my $mw = MainWindow->new; $mw->title("LOCK"); $mw->Label(-text=>'Locater of Oligomers by Charlie Kim')->pack(); $mw->Label(-text=>"Copyright 2002-2004 Charlie Kim GNU General Public License\n")->pack(); # MAIN FRAMES # my $file_fr = $mw->Frame(-relief=>'groove', -borderwidth=>2, -label=>'File Input/Output', )->pack(#-fill=>'x', ); my $lvl2fr = $mw->Frame()->pack(); my $maincol1 = $lvl2fr->Frame()->pack(-side=>'left'); my $maincol2 = $lvl2fr->Frame()->pack(-side=>'right'); my $oligo_fr = $maincol1->Frame(-relief=>'groove', -borderwidth=>2, -label=>'Input Oligo Sequences', )->pack(-side=>'top', -fill=>'y', -anchor=>'nw', ); my $raw_fr = $maincol1->Frame(-relief=>'groove', -borderwidth=>2, )->pack(-side=>'top', -fill=>'both', -anchor=>'nw', ); my $analtype_fr = $maincol2->Frame(-relief=>'groove', -borderwidth=>2, -label=>'Type of analysis', )->pack(-side=>'top', -anchor=>'nw', ); my $strand_fr = $maincol2->Frame(-relief=>'groove', -borderwidth=>2, -label=>'Strands to Analyze', )->pack(-side=>'left', -fill=>'both', ); my $bin_fr = $maincol2->Frame(-relief=>'groove', -borderwidth=>2, -label=>'Bins in Histogram', )->pack(-side=>'left', -fill=>'both', ); my $command_fr = $maincol2->Frame(-relief=>'groove', )->pack(-side=>'right', -fill=>'y', ); # LOG FRAME # my $log_f = $mw->Frame(-relief=>'groove', -borderwidth=>2, )->pack(-fill=>'both', -expand=>1, -side=>'bottom', ); my $logwin = $log_f->Scrolled("Text")->pack(-fill=>'both', -expand=>1, ); $logwin->configure(-height=>15, -width=>50, -wrap=>'none', -state=>'disabled', ); # FILE FRAME # my $filec1_fr = $file_fr->Frame()->pack(-side=>'left'); my $filec2_fr = $file_fr->Frame()->pack(-side=>'left'); my $filec3_fr = $file_fr->Frame()->pack(-side=>'left'); my $seqfile = ''; my $outdir = ''; $filec1_fr->Label(-text=>'Load Sequence File', )->pack(-anchor=>'nw', ); $filec1_fr->Label(-text=>'Output Directory', )->pack(-anchor=>'nw', ); $filec2_fr->Entry(-textvariable=> \$seqfile, )->pack(-anchor=>'nw', -pady=>2, ); $filec2_fr->Entry(-textvariable=> \$outdir, )->pack(-anchor=>'nw', -pady=>2, ); $filec3_fr->Button(-text=>'browse', -command=> [\&getfile, \$mw, \$seqfile, \$outdir], -borderwidth=>1, -pady=>0, )->pack(); $filec3_fr->Button(-text=>'browse', -command=> [\&getdir, \$mw, \$outdir], -borderwidth=>1, -pady=>0, )->pack(); # OLIGO FRAME # my $oligocol1 = $oligo_fr->Frame()->pack(-side=>'left'); my $oligocol2 = $oligo_fr->Frame()->pack(-side=>'left'); my $oligocol3 = $oligo_fr->Frame()->pack(-side=>'left'); my ($oligo1, $oligo2, $oligo3, $oligo4, $oligo5, $oligo6); $oligocol2->Entry(-textvariable=> \$oligo1, )->pack(-pady=>4); $oligocol2->Entry(-textvariable=> \$oligo2, )->pack(-pady=>4); $oligocol2->Entry(-textvariable=> \$oligo3, )->pack(-pady=>4); $oligocol2->Entry(-textvariable=> \$oligo4, )->pack(-pady=>4); $oligocol2->Entry(-textvariable=> \$oligo5, )->pack(-pady=>4); $oligocol2->Entry(-textvariable=> \$oligo6, )->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 1')->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 2')->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 3')->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 4')->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 5')->pack(-pady=>4); $oligocol3->Label(-text=>'Oligo 6')->pack(-pady=>4); # RAW DATA OUTPUT # my $raw = 'n'; $raw_fr->Checkbutton(-text=>'Create raw output', -variable=> \$raw, -onvalue=>'y', -offvalue=>'n', )->pack(-anchor=>'center'); # ANALYSIS # my $analtype = 'single'; my $outprefix; $analtype_fr->Radiobutton(-text=>'Individual analysis (separate results files for each oligo)', -variable=> \$analtype, -value=>'single', -pady=>0, )->pack(-anchor=>'nw'); $analtype_fr->Radiobutton(-text=>'Combined analysis (single analysis for all selected oligos)', -variable=> \$analtype, -value=>'combo', -pady=>0, )->pack(-anchor=>'nw'); $analtype_fr->Label(-text=>' Prefix for output files')->pack(-side=>'left', -anchor=>'nw'); $analtype_fr->Entry(-textvariable=> \$outprefix, )->pack(-side=>'left', -anchor=>'nw', ); # STRANDS # my $strand = 'both'; $strand_fr->Radiobutton(-text=>'Positive', -variable=> \$strand, -value=>'pos', -pady=>0, )->pack(-anchor=>'nw'); $strand_fr->Radiobutton(-text=>'Negative', -variable=> \$strand, -value=>'neg', -pady=>0, )->pack(-anchor=>'nw'); $strand_fr->Radiobutton(-text=>'Both Strands', -variable=> \$strand, -value=>'both', -pady=>0, )->pack(-anchor=>'nw'); $strand_fr->Radiobutton(-text=>'All Analyses', -variable=> \$strand, -value=>'all', -pady=>0, )->pack(-anchor=>'nw'); # BINS # my $bins = 20; $bin_fr->Scale(-from=>3, -to=>40, -resolution=>1, -variable=> \$bins, )->pack(); # COMMAND FRAME # $command_fr->Button(-text=>'Exit', -command=> sub { exit }, )->pack(-side=>'bottom', -pady=> 5, ); $command_fr->Button(-text=>'Reset Fields')->pack(-side=>'bottom', -pady=> 5, ); $command_fr->Button(-text=>'Run LOCK', -command=> [\&lockmain,\$mw,\$oligo1,\$oligo2,\$oligo3,\$oligo4,\$oligo5,\$oligo6,\$analtype,\$outprefix,\$seqfile,\$outdir,\$bins,\$strand,\$raw,\$logwin], )->pack(-side=>'bottom', -pady=> 5, ); MainLoop; } sub getfile { my ($mwref, $seqfileref, $outdirref) = @_; my $file = OpenDialog(); $$seqfileref = $file; if ($$outdirref eq '') { my $newdir = $file; $newdir =~ /^(.*)\\/; $$outdirref = $1; } $$mwref->raise; } sub getdir { my ($mwref, $outdirref) = @_; my $dir = BrowseForFolder('Choose a directory for all output files: '); $$outdirref = $dir; $$mwref->raise; } sub printlog { my $line = shift; my $logwinref = shift; my $logwin = $$logwinref; $logwin->configure(-state=>'normal'); $logwin->insert('end',$line); $logwin->yview(moveto=>1); $logwin->configure(-state=>'disabled'); $logwin->update(); } sub lockmain { my $mwref = shift; my $oligo1ref = shift; my $oligo2ref = shift; my $oligo3ref = shift; my $oligo4ref = shift; my $oligo5ref = shift; my $oligo6ref = shift; my $analtyperef = shift; my $outprefixref = shift; my $seqfileref = shift; my $outdirref = shift; my $binsref = shift; my $strandref = shift; my $rawref = shift; my $logwinref = shift; if ($$analtyperef eq 'combo' && $$outprefixref eq '') { $$mwref->messageBox(-title => 'Error', -message => 'Please type in an output prefix', -type => 'OK'); return; } if ($$analtyperef eq 'combo') { my @oligos = (); &printlog("Running combo analysis\n",$logwinref); push @oligos, $$oligo1ref if $$oligo1ref; push @oligos, $$oligo2ref if $$oligo2ref; push @oligos, $$oligo3ref if $$oligo3ref; push @oligos, $$oligo4ref if $$oligo4ref; push @oligos, $$oligo5ref if $$oligo5ref; push @oligos, $$oligo6ref if $$oligo6ref; &runlock($mwref,$logwinref,$$analtyperef,$$seqfileref,$$outdirref,$$binsref,$$strandref,$$rawref,$$outprefixref,@oligos); } elsif ($$analtyperef eq 'single') { my @oligos = (); &printlog("Running single analyses\n",$logwinref); push @oligos, $$oligo1ref if $$oligo1ref; push @oligos, $$oligo2ref if $$oligo2ref; push @oligos, $$oligo3ref if $$oligo3ref; push @oligos, $$oligo4ref if $$oligo4ref; push @oligos, $$oligo5ref if $$oligo5ref; push @oligos, $$oligo6ref if $$oligo6ref; foreach (@oligos) { &runlock($mwref,$logwinref,$$analtyperef,$$seqfileref,$$outdirref,$$binsref,$$strandref,$$rawref,$_,$_); } } $$mwref->messageBox(-title => 'Program Finished', -message => 'Processing complete', -type => 'OK'); } sub runlock { my $mwref = shift; my $logwinref = shift; my $analtype = shift; my $seqfile = shift; my $outdir = shift; my $bins = shift; my $strand = shift; my $raw = shift; my $prefix = shift; my @mainoligos = @_; # print "oligos: @mainoligos\n"; my $longestlength = 0; foreach (@mainoligos) { my $length = length($_); $longestlength = $length if $length > $longestlength; } # print "seqfile: $seqfile\n"; open(SEQFILE,$seqfile) or die "Can't open seqfile\n"; my $seq = ''; my $count = 0; &printlog("Reading sequence ...\n",$logwinref); while() { if (/>/) { die "More than one sequence present -- only one sequence allowed" if $count == 1; $count++; next; } chomp($_); $seq .= $_; } my $wrapseq = substr($seq,0,length($longestlength)); $seq .= $wrapseq; # this accounts for the cirularity of the genome # print "seq: $seq\n"; my @revoligos = (); foreach my $entry (@mainoligos) { my $newoligo = &revcom($entry); push @revoligos,$newoligo; } # print "revoligos: @revoligos\n"; if ($strand eq 'pos' || $strand eq 'all') { &printlog("Finding matches on positive strand ...\n",$logwinref); &findoligos($mwref,$logwinref,\$seq,$outdir,$raw,$bins,'pos',$prefix,@mainoligos); # the last argument is used in the # filename prefixes } if ($strand eq 'neg' || $strand eq 'all') { &printlog("Finding matches on negative strand ...\n",$logwinref); &findoligos($mwref,$logwinref,\$seq,$outdir,$raw,$bins,'neg',$prefix,@revoligos); } if ($strand eq 'both' || $strand eq 'all') { &printlog("Finding matches on both strands ... \n",$logwinref); &findoligos($mwref,$logwinref,\$seq,$outdir,$raw,$bins,'both',$prefix,@mainoligos,@revoligos); } } sub findoligos { my $mwref = shift; my $logwinref = shift; my $seqref = shift; my $seq = $$seqref; my $outdir = shift; my $raw = shift; my $bins = shift; my $strand = shift; my $prefix = shift; my @oligos = @_; # print "Entering findoligos sub\n"; my %oligocheck = (); foreach my $entry (@oligos) { &printlog("\tMatching oligo $entry\n",$logwinref); while ($seq =~ /$entry/gi) { $oligocheck{pos($seq)-2} = 1; } } # print " size of oligos: $#oligos\n"; # print " oligo: $oligos[0]\n"; # print "prefix: $prefix\n"; chdir $outdir; while() { last if $raw =~ /^n$/i; my $oligocoord = $prefix . "_$strand" . '_coord.txt'; &open_writefile(*OUTFILE,$mwref,$oligocoord); if (!%oligocheck) { die "No matches found\n"; } &printlog("Generating output ...\n",$logwinref); print OUTFILE "$strand strand, $prefix analysis\n"; print OUTFILE "Raw match coordinates\n"; print OUTFILE "---------------------\n"; foreach my $entry (sort {$a<=>$b} keys %oligocheck ) { print OUTFILE "$entry\n"; } last; } close OUTFILE; my $length = length($seq); ############## # Statistics # ############## use Statistics::Descriptive; my $oligostat = $prefix . "_$strand" . '_stats.txt'; &open_writefile(*STATFILE,$mwref,$oligostat); my @position = sort {$a<=>$b} keys %oligocheck; my @distance = (); my %distancecoordstart = (); my %distancecoordend = (); for (my $k = 0; $k < $#position; $k++) { push @distance, $position[$k+1] - $position[$k]; $distancecoordstart{$position[$k]} = $position[$k+1]-$position[$k]; $distancecoordend{$position[$k]} = $position[$k+1]; } push @distance, $length - $position[$#position] + $position[0]; # circular $distancecoordstart{$position[$#position]} = $length-$position[$#position]+$position[0]; $distancecoordend{$position[$#position]} = $position[0]; while() { last if $raw =~ /^n$/i; my $oligodist = $prefix . "_$strand" . '_dist.txt'; &open_writefile(*DISTANCEFILE,$mwref,$oligodist); print DISTANCEFILE "$strand strand for $prefix analysis\n"; print DISTANCEFILE "The output is generated twice: first organized by\n"; print DISTANCEFILE "length, then organized by position\n"; print DISTANCEFILE "-------------------------------------------------\n\n"; print DISTANCEFILE "Organized by distance length\n"; print DISTANCEFILE "----------------------------\n"; foreach my $entry (sort {$distancecoordstart{$b} <=> $distancecoordstart{$a} } keys %distancecoordstart) { print DISTANCEFILE "From match at \t$entry\t to match at \t$distancecoordend{$entry}\t is \t$distancecoordstart{$entry}\t bp\n"; } print DISTANCEFILE "\nOrganized by position\n"; print DISTANCEFILE "---------------------\n"; foreach my $entry (sort {$a<=>$b} keys %distancecoordstart) { print DISTANCEFILE "From match at \t$entry\t to match at \t$distancecoordend{$entry}\t is \t$distancecoordstart{$entry}\t bp\n"; } close DISTANCEFILE; last; } my $stat = Statistics::Descriptive::Full->new(); $stat->add_data(@distance); my $count = $stat->count(); my $mean = $stat->mean(); my $stdev = $stat->standard_deviation(); my $median = $stat->median(); my $var = $stat->variance(); my $min = $stat->min(); my $max = $stat->max(); my %histo = $stat->frequency_distribution($bins); print STATFILE "Statistics on distance between matches (bp) for oligos\n"; foreach my $entry (@oligos) { print STATFILE "\t$entry\n"; } print STATFILE "$strand strand of $prefix analysis\n"; print STATFILE "sequence of length $length\n"; print STATFILE "------------------------------------------------------\n"; print STATFILE "Count\t$count\nMean\t$mean\nStdDev\t$stdev\n"; print STATFILE "Variance\t$var\nMedian\t$median\n"; print STATFILE "Minimum\t$min\nMaximum\t$max\n\n"; print STATFILE "Frequency Distribution\n"; print STATFILE "----------------------\n"; print STATFILE "\tDistance (bp)\t\tNumber of Matches\n"; my $marker = 0; my @xval = (); my @yval = (); for (sort {$a<=>$b} keys %histo) { print STATFILE "\t$marker to $_\t\t$histo{$_}\n"; push @xval, int($_); push @yval, $histo{$_}; $marker = $_; } close STATFILE; ################################## # Distance Distribution Graphics # ################################## use GD::Graph; use GD::Graph::bars; use GD::Graph::Data; use GD::Graph::colour; my @data = (\@xval,\@yval); my $graph = GD::Graph::bars->new(800,400); $graph->set( x_label => 'Distance (bp)', x_label_position => 1/2, y_label => 'Number of Distances', title => "Distribution of Distances Between Matches: $prefix analysis, $strand strand", fgclr => 'black', ); $graph->set_text_clr('black'); # add values to plot my $datavals = GD::Graph::Data->new(\@data); my $values = $datavals->copy; $graph->set(show_values => $values); my $plot = $graph->plot(\@data); # $graph->plot($data); my $oligodistgraph = $prefix . "_$strand" . '_dist.jpg'; &open_writefile(*DISTRIBUTION,$mwref,$oligodistgraph,); binmode DISTRIBUTION; print DISTRIBUTION $plot->jpeg(100); close DISTRIBUTION; ##################### # Position Graphics # ##################### use GD; my $innerradius = 150; my $outerradius = 200; my $im = new GD::Image($outerradius*2+1, $outerradius*2+1); my $white = $im->colorAllocate(255,255,255); my $black = $im->colorAllocate(0,0,0); my $red = $im->colorAllocate(255,0,0); $im->transparent($white); $im->interlaced('true'); # Draw Marks # foreach my $entry (keys %oligocheck) { my $radians = 2*3.14159265358979*($entry/$length); my $xin = int($outerradius+1 + (sin($radians) * $innerradius) ); my $yin = int($outerradius+1 - (cos($radians) * $innerradius) ); my $xout = int($outerradius+1 + (sin($radians) * ($outerradius-2) ) ); my $yout = int($outerradius+1 - (cos($radians) * ($outerradius-2) ) ); $im->line($xin,$yin,$xout,$yout,$red); } $im->arc($outerradius+1, $outerradius+1, $innerradius*2, $innerradius*2, 0, 360, $black); $im->arc($outerradius+1, $outerradius+1, $outerradius*2, $outerradius*2, 0, 360, $black); # Draw Minute Marks # my $fwidth; my $font; my $fheight; for (my $l = 0; $l <= 99; $l++) { if ($l % 10 == 0) { my $radians = 2*3.14159265358979*($l/100); my $x1 = int($outerradius+1 + (sin($radians) * ($innerradius-1) ) ); my $y1 = int($outerradius+1 - (cos($radians) * ($innerradius-1) ) ); my $x2 = int($outerradius+1 + (sin($radians) * ($innerradius*0.75) ) ); my $y2 = int($outerradius+1 - (cos($radians) * ($innerradius*0.75) ) ); $im->line($x1,$y1,$x2,$y2,$black); $font = GD::Font->MediumBold; $fwidth = $font->width; $fwidth *= length($l); $fheight = $font->height; my $fontx = int($outerradius+1 + (sin($radians) * ($innerradius*0.65) ) ); my $fonty = int($outerradius+1 - (cos($radians) * ($innerradius*0.65) ) ); $im->string($font,$fontx-($fwidth/3),$fonty-($fheight/3),$l,$black); next; } elsif ($l % 5 == 0) { my $radians = 2*3.14159265358979*($l/100); my $x1 = int($outerradius+1 + (sin($radians) * ($innerradius-1) ) ); my $y1 = int($outerradius+1 - (cos($radians) * ($innerradius-1) ) ); my $x2 = int($outerradius+1 + (sin($radians) * ($innerradius*0.85) ) ); my $y2 = int($outerradius+1 - (cos($radians) * ($innerradius*0.85) ) ); $im->line($x1,$y1,$x2,$y2,$black); next; } else { my $radians = 2*3.14159265358979*($l/100); my $x1 = int($outerradius+1 + (sin($radians) * ($innerradius-1) ) ); my $y1 = int($outerradius+1 - (cos($radians) * ($innerradius-1) ) ); my $x2 = int($outerradius+1 + (sin($radians) * ($innerradius*0.95) ) ); my $y2 = int($outerradius+1 - (cos($radians) * ($innerradius*0.95) ) ); $im->line($x1,$y1,$x2,$y2,$black); next; } } my $centerx1 = $outerradius+1 - ((length($length)+3)*$fwidth)/4; my $centerx2 = $outerradius+1 - ((length($prefix)+9)*$fwidth)/4; my $centerx3 = $outerradius+1 - ((length($strand)+7)*$fwidth)/4; $im->string($font,$centerx1,$outerradius+1-($fheight),"$length bp",$black); $im->string($font,$centerx2,$outerradius+1,"$prefix analysis",$black); $im->string($font,$centerx3,$outerradius+1+($fheight),"$strand strand",$black); my $oligocoordgraph = $prefix . "_$strand" . '_coord.jpg'; &open_writefile(*GRAPHICFILE,$mwref,$oligocoordgraph); binmode GRAPHICFILE; print GRAPHICFILE $im->jpeg(100); close GRAPHICFILE; } sub revcom { my $oligo = $_[0]; $oligo =~ tr/gatcGATC/ctagCTAG/; my $rcoligo = reverse($oligo); return $rcoligo; } sub open_writefile { my $filehandle = shift; my $mwref = shift; my $filename = shift; # print "Filename: $filename\n"; if (-e $filename) { # print "File exists: $filename\n"; my $response = $$mwref->messageBox(-title => 'File exists', -message => "Overwrite existing file $filename?", -type => 'YesNo', ); if ($response =~ /no/i) { die "$filename not overwritten.\n"; } else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; } } else { open($filehandle, ">$filename") or die "Couldn't open $filename\n"; } }