#!/usr/local/bin/perl
use warnings;
use strict;
#use explicit;

#----PARAMETERS:

# histogram 
#my @unsortedRepIons = ();
# GlycoDB as released and reduced glycan masses: 
my @unsortedRepIons = ();#for testing
#TMTglycoproteomics: 
my @unsortedRepIons = (218.0657,126.12772,127.124761,127.131081,128.128116,128.134436,129.131471,129.137790,130.134825,130.141145,131.138180,131.144500,132.141535,132.147855,133.144890,133.151210,134.148245,134.154565,135.151600,126.054955,127.038970,128.054955,138.054955,144.065519,145.049535,163.0601,168.065519,186.076084,204.086649,243.02643,274.092128,290.08702,292.102693,308.09757,323.224,366.139472,673.229772,657.234895,369.1420,351.1314,531.1943,184.0732); #the last 4 are empirical oxonium ions of PC-decorated glycans
my $precision = 10; # +/- precision in ppm, default value: 10
my $IgnoreLowMoverZRange = 0; # no deconv or deisotope m/z range below given amu


#----end of PARAMETERS

my $MGF_OUTFILE_NAME;
my $REP_OUTFILE_NAME;
my $MGF_INFILE_NAME;
my $MASS;
my $TITLE;
my $PEPMASS; 
my $PRECURSOR_INTENSITY;
my $CHARGE;
my $RTINSECONDS=0;
my $SCANS=0;
my $NUMBER_MS2SCANS_TOTAL;
my $NUMBER_MS2SCANS_FILTERED;

my @arr1 = ();
my @arr2 = ();
my @v = ();
my $precursor;
my $x;
my $y;
my $i;	

my @RepIons = sort {$a <=> $b} @unsortedRepIons; #sort RepIons list by MoverZ

print "---SugarQBits RepX---\n";
print "parameters:\n";
print "mass precision = +/-$precision ppm\n";
print "lower mass exemption < $IgnoreLowMoverZRange amu\n";
print "Reporter Ions:\n";

foreach $x (@RepIons) {
	print $x."\n";
}
print "---   ---\n";

print "INPUT (.mgf) FILE DIRECTORY (full path):>\n";

my $dirname = <>;

chop($dirname);

opendir(DIR, $dirname) or die "Could not open $dirname\n";

my @files = readdir(DIR);

shift @files; #get rid of "."

shift @files; #get rid of ".."

closedir(DIR);

foreach $MGF_INFILE_NAME (@files) {

	$MGF_INFILE_NAME = join ("\\\\", $dirname, $MGF_INFILE_NAME);

	if ($MGF_INFILE_NAME =~ /.MGF/ || $MGF_INFILE_NAME =~ /.mgf/ ){
		
		print "SugarQbits RepX $MGF_INFILE_NAME...";

		open(INFILE,"<$MGF_INFILE_NAME") || die "$MGF_INFILE_NAME not found!\n";
		
		$NUMBER_MS2SCANS_TOTAL = 0; #reset counter
		
		$NUMBER_MS2SCANS_FILTERED = 0;#reset counter

		$MGF_OUTFILE_NAME = ">>".substr ($MGF_INFILE_NAME,0,-4)."_RepX.mgf";

		open (OUTFILE1, $MGF_OUTFILE_NAME);

		$REP_OUTFILE_NAME = ">>".substr ($MGF_INFILE_NAME,0,-4)."_RepIntensities.csv";

		open (OUTFILE2, $REP_OUTFILE_NAME);

		#---write header for csv file

		print OUTFILE2 "MS2 Spectrum Title,Rtime";
				
		for ($x = 0; $x < @RepIons; $x++) { 
		
			print OUTFILE2 ",".$RepIons[$x];		
		
		}
				
		print OUTFILE2 "\n";

		while (<INFILE>){

		chomp;

		@v = split (/=|\s/, $_);
	
		if ($v[0] eq "BEGIN"){
		
			#reset variables and empty arrays of data from previous MS2-spectrum
			
			@arr1 = ();
		
			@arr2 = ();
		
			$NUMBER_MS2SCANS_TOTAL++;
		
			#reset RepIons array arr2
			
			for ($x = 0; $x < @RepIons; $x++) { 
		
				$arr2[$x][0] = $RepIons[$x];
		
				$arr2[$x][1] = 0;	
			
			}
									
		}elsif($v[0] eq "TITLE"){#MGF-MS2-spectrum title
				
			$TITLE = $v[1];	#DISCLAIMER: will truncate msconvert generated "TPP-compatible" spectrum titles, removing all space separated content...
									
		}elsif ($v[0] eq "PEPMASS"){#MGF precursor ion m/z and its intensity
			
			$PEPMASS = $v[1];
			
			if (scalar @v > 2) {
			
				$PRECURSOR_INTENSITY = $v[2];
		
			}else{ #PEAKS generated mgf files do not contain precursor intensity values
		
				$PRECURSOR_INTENSITY = 0;
			
			}
				
		}elsif ($v[0] eq "CHARGE"){#MGF precursor charge state
				
			$CHARGE = substr $v[1], 0, 1; #remove + sign from charge.
				
		}elsif ($v[0] eq "RTINSECONDS"){#MGFclassics RetentionTimeINSECONDS
			
			$RTINSECONDS = $v[1];
			
		}elsif ($v[0] eq "SCANS"){#MGF MS2-spectrum scan number
			
			$SCANS = $v[1]
				
		}elsif ($v[0] eq "END"){
		
			$NUMBER_MS2SCANS_FILTERED++;
		
			print OUTFILE1 "BEGIN IONS\n";
					
			print OUTFILE1 "TITLE=SQbRepX_".$TITLE."\nSCANS=$SCANS\nPEPMASS=$PEPMASS $PRECURSOR_INTENSITY\nCHARGE=$CHARGE+\n";
				
			for ($x = 0 ; $x < @arr1; $x++){
						
				if ($arr1[$x][1] > 0){ #only print value pairs with intensity > 0
							
					print OUTFILE1 "$arr1[$x][0] $arr1[$x][1]\n";

				}
				
			}
				
			print OUTFILE1 "END IONS\n";
				
			#--------DO ALL RepIntensities-file PROCESSING FROM HERE!				
				
			print OUTFILE2 "TITLE=SQbRepX_".$TITLE.",$RTINSECONDS";
				
			for ($x = 0; $x < @RepIons; $x++) { 
		
				print OUTFILE2 ",".$arr2[$x][1];		
		
			}
				
			print OUTFILE2 "\n";

		}elsif ($v[0] !~ /[a-df-zA-DF-Z]/ && $v[1] !~ /[a-df-zA-DF-Z]/) { #Lines that do not contain letters (but "E" for exponential numbers) are deemed MS2 data pairs
		
			if ($v[0] < $arr2[(@RepIons - 1)][0] + ($arr2[(@RepIons - 1)][0]/1000000)*$precision ){					
					
				for ($x = 0; $x < @RepIons; $x++) {
					
					if ($v[0] < $arr2[$x][0] + ($arr2[$x][0]/1000000)*$precision && $v[0] > $arr2[$x][0] - ($arr2[$x][0]/1000000)*$precision ){

						#find highest peak in window
						
						#if ($arr2[$x][1] < $v[1]) {

						#	$arr2[$x][1] = $v[1];
														
						#	$v[1] = 0; #remove RepIons from MS2 spectrum by setting Intensity value to 0
						
						#sum all peaks in window
						
						if ($arr2[$x][1] != 0) {

							$arr2[$x][1] = $arr2[$x][1] + $v[1];
														
							$v[1] = 0; #remove RepIons from MS2 spectrum by setting Intensity value to 0
					
						
						}else{
							
							$arr2[$x][1] = $v[1];
							
							$v[1] = 0; #still remove RepIons from MS2 spectrum by setting Intensity value to 0?
						
						}
					
					}

				}
					
			}
					
			push @arr1, [$v[0], $v[1]];
	
		}else{
		
			print "dont know what to do with: $v[0]-$v[1]\n";
		
		}	
		
	}
	
	close OUTFILE2;

	close OUTFILE1;

	close INFILE;
	
	print "done!\n($NUMBER_MS2SCANS_FILTERED RepXFiltered/$NUMBER_MS2SCANS_TOTAL total)\n";

}

}