#!/usr/local/bin/perl
use warnings;
use strict;
#use explicit;

#simple converter to generate a pseudo-.mgf file from .ms1 files.
#BPM is set to PEPMASS
#CHARGE is set to 1+


#----PARAMETERS:

my $precision = 0.05; # +/- precision in amu
my $IgnoreLowMoverZRange = 0; # no deconv or deisotope m/z range below given amu

#----end of PARAMETERS

my $MGF_OUTFILE_NAME;
my $REP_OUTFILE_NAME;
my $MGF_INFILE_NAME;
my $MASS;
my $TITLE;
my $PEPMASS; 
my $PRECURSOR_INTENSITY;
my $CHARGE;
my $RTINSECONDS;
my $SCANS = 1;
my $NUMBER_MS2SCANS_TOTAL;
my $NUMBER_MS2SCANS_FILTERED;
my $BPI;
my $BPM;
my $TIC;
my $ORIGINAL_FILE_NAME;
my $HEADER;

my @arr1 = ();
my @arr2 = ();
my @v = ();
my $precursor;
my $x;
my $y;
my $i;	


print "---ms1 to mgf conversion---\n";
print "parameters:\n";
print "mass precision = +/-$precision amu\n";

print "INPUT (.ms1) FILE DIRECTORY (full path):>\n";

my $dirname = <>;

chop($dirname);

opendir(DIR, $dirname) or die "Could not open $dirname\n";

my @files = readdir(DIR);

shift @files; #get rid of "."

shift @files; #get rid of ".."

closedir(DIR);

foreach $MGF_INFILE_NAME (@files) {

	$MGF_INFILE_NAME = join ("\\\\", $dirname, $MGF_INFILE_NAME);

	if ($MGF_INFILE_NAME =~ /.MS1/ || $MGF_INFILE_NAME =~ /.ms1/ ){
		
		print ".ms1 to pseudo.mgf conversion $MGF_INFILE_NAME...";

		open(INFILE,"<$MGF_INFILE_NAME") || die "$MGF_INFILE_NAME not found!\n";
		
		$NUMBER_MS2SCANS_TOTAL = 0; #reset counter
		
		$NUMBER_MS2SCANS_FILTERED = 0;#reset counter

		$MGF_OUTFILE_NAME = ">>".substr ($MGF_INFILE_NAME,0,-4)."_ms1.mgf";

		open (OUTFILE1, $MGF_OUTFILE_NAME);

		while (<INFILE>){

		chomp;

		@v = split (/\s|\t/, $_);
	
		if ($v[0] eq "H"){ #global file information on top of ms1 file
		
			if ($v[1] eq "Source") {
				
				$ORIGINAL_FILE_NAME = $v[1];
				
			}
			
		}elsif($v[0] eq "I"){#start header information lines
				
			$HEADER = $v[1];	#will truncate msconvert generated "TPP-compatible" spectrum titles
			
			if ($HEADER eq "NativeID") {
				
				$TITLE = join ("_",@v);
				
			}elsif ($HEADER eq "RTime"){
			
				$RTINSECONDS = $v[2]
			
			}elsif ($HEADER eq "BPI"){
			
				$BPI = $v[2]
			
			}elsif ($HEADER eq "BPM"){
			
				$BPM = $v[2]
			
			}elsif ($HEADER eq "TIC"){
			
				$TIC = $v[2]
			
			}
		
			
		}elsif ($v[0] eq "S" && $v[1] ne "1"){#next scan starts; finish extraction of previous scan...
		
			$TITLE = $TITLE.":BPI~".$BPI.":TIC~".$TIC.":SCANS~".$SCANS;
			
			$SCANS = $v[1];
			
			print OUTFILE1 "BEGIN IONS\nTITLE=pseudoMGF_".$TITLE."\nSCANS=$SCANS\nPEPMASS=$BPM\nRTINSECONDS=$RTINSECONDS\nCHARGE=1+\n"; #precursor mass is set to BPM
				
			for ($x = 0 ; $x < @arr1; $x++){
						
				if ($arr1[$x][1] > 0){ #only print value pairs with intensity > 0
							
					print OUTFILE1 "$arr1[$x][0] $arr1[$x][1]\n";

				}
				
			}
				
			print OUTFILE1 "END IONS\n";
			
			#----
			
			#reset variables and empty arrays of data for next spectrum
			
			@arr1 = ();
		
			@arr2 = ();
		
			$NUMBER_MS2SCANS_TOTAL++;
		

		}elsif ($v[0] !~ /[a-df-zA-DF-Z]/ && $v[1] !~ /[a-df-zA-DF-Z]/) { #Lines that do not contain letters (but "E" for exponential numbers) are deemed MS data pairs
		
			$v[0] = sprintf "%.6f",$v[0];
		
			$v[1] = sprintf "%.6f",$v[1];
		
			push @arr1, [$v[0], $v[1]];
	
		}else{
		
			print "\ndont know what to do with: $v[0]-$v[1]\n";
		
		}	
		
	}
	

	close OUTFILE1;

	close INFILE;
	
	print "done!\n($NUMBER_MS2SCANS_TOTAL scans converted)\n";

}

}