#!/usr/local/bin/perl
use warnings;
use strict;
#use explicit;

#----PARAMETERS:

my $maxIsoptopeRatio = 4; # maximum difference in ratio of isoptope peak intensity;
my $precision = 0.05; # +/- precision in amu
my $IgnoreLowMoverZRange = 0; #for TMT e.g. 140; # no deconv/deisotope m/z range below given amu

#----end of PARAMETERS

my $MGF_OUTFILE_NAME;
my $REP_OUTFILE_NAME;
my $MGF_INFILE_NAME;
my $MASS;
my $TITLE;
my $PEPMASS; 
my $PRECURSOR_INTENSITY=0;
my $CHARGE;
my $RTINSECONDS=0;
my $SCANS=0;
my $NUMBER_MS2SCANS_TOTAL;
my $NUMBER_MS2SCANS_FILTERED;

my @arr1 = ();
my @arr2 = ();
my @v = ();
my $precursor;
my $x;
my $y;
my $i;	

print "---SugarQBits Deconv/Deiso---\n";
print "parameters:\n";
print "mass precision = +/-$precision amu\n";
print "exclude m/z < $IgnoreLowMoverZRange amu\n---   ---\n";

print "INPUT (.mgf) FILE DIRECTORY (full path):>\n";


my $dirname = <>;

chop($dirname);

opendir(DIR, $dirname) or die "Could not open $dirname\n";

my @files = readdir(DIR);

shift @files; #get rid of "."

shift @files; #get rid of ".."

closedir(DIR);

foreach $MGF_INFILE_NAME (@files) {

	$MGF_INFILE_NAME = join ("\\\\", $dirname, $MGF_INFILE_NAME);

		if ($MGF_INFILE_NAME =~ /.MGF/ || $MGF_INFILE_NAME =~ /.mgf/ ){
			
			print "SugarQbits Deconv/Deiso $MGF_INFILE_NAME...";

			open(INFILE,"<$MGF_INFILE_NAME") || die "$MGF_INFILE_NAME not found!\n";

			$MGF_OUTFILE_NAME = ">>".substr ($MGF_INFILE_NAME,0,-4)."_DD.mgf";

			open (OUTFILE, $MGF_OUTFILE_NAME);

			#Start parsing MGF-file:

			while (<INFILE>){#feed in data line until EOF

			chomp;

			@v = split (/=|\s/, $_);
	
			if ($v[0] eq "BEGIN"){#MGF-MS2-spectrum begin
		
				#reset variables and empty arrays of data from previous MS2-spectrum
			
				@arr1 = ();
		
				@arr2 = ();
				
				$NUMBER_MS2SCANS_TOTAL++;
									
			}elsif($v[0] eq "TITLE"){#MGF-MS2-spectrum title
				
				$TITLE = $v[1];	#will truncate msconvert generated "TPP-compatible" spectrum titles 
									
			}elsif ($v[0] eq "PEPMASS"){#MGF precursor ion m/z and its intensity
			
				$PEPMASS = $v[1];
			
				if (scalar @v > 2) {
			
					$PRECURSOR_INTENSITY = $v[2];
		
				}else{ #PEAKS generated mgf files do not contain precursor intensity values
		
					$PRECURSOR_INTENSITY = 0;
				}
				
			}elsif ($v[0] eq "CHARGE"){#MGF precursor charge state
				
				$CHARGE = substr $v[1], 0, 1; #remove + sign from charge.
				
			}elsif ($v[0] eq "RTINSECONDS"){
			
				$RTINSECONDS = $v[1];
			
			}elsif ($v[0] eq "SCANS"){#MGF MS2-spectrum scan number
			
				$SCANS = $v[1]
				
			}elsif ($v[0] eq "END"){
				
				$NUMBER_MS2SCANS_FILTERED++;
		
				$PEPMASS = $PEPMASS * $CHARGE - ($CHARGE - 1 ) * 1.00727; 				
						
				push (@arr1, @arr2); #combine @arr1 @arr2
				
				@arr2 = ();
		
				@arr2 = sort { $a->[0] <=> $b->[0]} @arr1; #sort combined array by MoverZ
				
				@arr1 = ();
								
				print OUTFILE "BEGIN IONS\nTITLE=DD_".$TITLE."\nSCANS=$SCANS\nPEPMASS=$PEPMASS $PRECURSOR_INTENSITY\nRTINSECONDS=$RTINSECONDS\nCHARGE=1+\n"; #charge deconvoluted spectra will be put out as 1+ precursors
				
		
				for ($x = 0 ; $x < @arr2 ; $x++){
			
					if ($arr2[$x][1] > 0) {
				
						for ($y = $x - 1; $y > 0; $y --) {
						
							if (($arr2[$x][0] - $arr2[$y][0]) > 1 + $precision || $arr2[$x][0] < $IgnoreLowMoverZRange) { #@distance > 1 amu, assume unrelated peak; leave intensity as is and quit loop								
								
								$y = 0;

							}elsif (($arr2[$x][0] - $arr2[$y][0]) < 1 + $precision && ($arr2[$x][0] - $arr2[$y][0]) > 1 - $precision && abs($arr2[$y][1])/$arr2[$x][1] < $maxIsoptopeRatio && $arr2[$x][1]/abs($arr2[$y][1]) < $maxIsoptopeRatio){ #assume isotope peak; delete by setting intensity to 0
								
								$arr2[$x][1] = abs($arr2[$x][1])*(-1);
							
					
							
						}	#end of local deisotoping
				
					}					
			
					if ($arr2[$x][1] > 0) {# only print masses with Intensities > 0

						print OUTFILE $arr2[$x][0]." ".$arr2[$x][1]."\n";						
						
					}
				
				}
		
				print OUTFILE "END IONS\n";

					
			}elsif ($v[0] !~ /[a-df-zA-DF-Z]/ && $v[1] !~ /[a-df-zA-DF-Z]/) { #Lines that do not contain letters (but "E" for exponential numbers) are deemed MS2 data pairs
		
				#push @arr1,[$v[0], $v[1]];
		
				if (@arr1 > 1 && $v[0] > $IgnoreLowMoverZRange) {#MoverZ below limit will be ignored for CS detection
			
					for ($x = @arr1-1; $x > 0; $x --){
				
						if (($v[0] - $arr1[$x][0]) > 0.5 + $precision) { #unknown charge-state; assume 1; quit loop
							
														
							push @arr1,[$v[0], $v[1]];
							
							$x = 0;
						
						}elsif (($v[0] - $arr1[$x][0]) < 0.5 + $precision && ($v[0] - $arr1[$x][0]) > 0.5 - $precision && abs($arr1[$x][1]/$v[1]) < $maxIsoptopeRatio && abs($v[1]/$arr1[$x][1]) < $maxIsoptopeRatio){ #assume charge state 2
							
							#deconvolute MoverZ of both peaks and push positive Intentsity values into arr2:								
							
							
							push @arr2,[($arr1[$x][0] * 2 - 1 * 1.00727),abs($arr1[$x][1])];
					
							push @arr2,[($v[0] * 2 - 1 * 1.00727), $v[1]];
					
							#set intensities for peaks in @arr1 to negative value
								
							$arr1[$x][1] = abs($arr1[$x][1]) * (-1);
														
							$v[1] = $v[1] * (-1);
							
							push @arr1,[$v[0], $v[1]];
					
							#quit loop
					
							$x = 0;
					
						}elsif (($v[0] - $arr1[$x][0]) < (1/3) + $precision && ($v[0] - $arr1[$x][0]) > (1/3) - $precision && abs($arr1[$x][1]/$v[1]) < $maxIsoptopeRatio && abs($v[1]/$arr1[$x][1]) < $maxIsoptopeRatio){ #assume charge state 3
						
							#deconvolute MoverZ of both peaks and push into arr2:								
							
														
							push @arr2,[($arr1[$x][0] * 3 - 2 * 1.00727),abs($arr1[$x][1])];
					
							push @arr2,[($v[0] * 3 - 2 * 1.00727), $v[1]];
					
							#set intensities for lower peaks in @arr1 to negative value
								
							$arr1[$x][1] = abs($arr1[$x][1]) * (-1);
														
							$v[1] = $v[1] * (-1);
							
							push @arr1,[$v[0], $v[1]];
							
							#stop loop
					
							$x = 0;
					
						}
							
					}
					
				}else{#not enough peaks in array yet!
					
					push @arr1,[$v[0], $v[1]];
					
				}
	
			}else{
		
				print "dont know what to do with: $v[0]-$v[1]\n";
		
			}	
		
		}

		close OUTFILE;

		close INFILE;

		print "done!\n($NUMBER_MS2SCANS_FILTERED DeconvDeiso/$NUMBER_MS2SCANS_TOTAL total)\n";
	}
}