#!/usr/local/bin/perl

################################################################################

require 5.000 ;	# This is a perl 5.0 script
require MDL_sdf ;


use Getopt::Long ;
use File::Basename ;
$me = basename( $0, ".perl") ;

undef $opt_help ;
undef  $opt_property_name;

GetOptions('help', 'property_name:s');

if( defined($opt_help)  ) { 
	die( 
qq{Example usage: $me [-property_name NSC]  < input.sdf > output.sdf

$me reads an MDL SDFile, sorts all records using the given property and output an MDL SDFILE.

The property should be numeric.

If the option -property_name is not defined, the content of line 1 will be used.

Warning: $me might require a lot of memory (the whole input file is stored in memory). For example, sorting the entire NCI database (about 250,000 entries with biological data added, a ~800 MB SD file) by NSC number required 1.5GB of memory and about 20 min. of computer time (this was done on galaxy.nih.gov, an SGI computer with 32 x 250 MHz R10000 processors (only one CPU was used) and 8GB RAM)

\n}) ;

}
##################################################################
#Global variables
	use strict ;
	
	
	my @All_SDF = () ;
	my $current_record_number = 0 ;
	my @Sorted_all_SDF = () ;
	
##################################################################
#First read the whole SD file into memory


	print STDERR "Reading data ...\n" ;
		
	FOR_EACH_SDF_ENTRY: while( 1 )
	{
		
		my $sdf_entry = MDL_sdf_non_parsed_molecule->readFromInput() ; 
	
		defined $sdf_entry || last FOR_EACH_SDF_ENTRY ;  #end of the loop
		$current_record_number ++ ;
		
		my $prop ;
		
		if( defined $main'opt_property_name)
		{
			$prop = $sdf_entry->data_for_field_name($main'opt_property_name) ;
		}else
		{
			$prop = $sdf_entry->line1() ;
		}

		if( ! defined $prop )
		{
			warn "Property undefined for record n. $current_record_number\n";
		}
		push( @All_SDF, [$prop, $sdf_entry]); #add an array ref
		
	}	
	
	print STDERR "$current_record_number record(s) read.\n" ;

##################################################################
# Sort the entries numerically

	print STDERR "Sorting ...\n" ;
	@Sorted_all_SDF = sort  {$a->[0] <=> $b->[0];} @All_SDF ;

##################################################################
#Write
	print STDERR "Writing ...\n" ;
	
	my $entry ;
	
	foreach $entry	(@Sorted_all_SDF)
	{
		my $sdf = $entry->[1] ;
		defined $sdf || die "Assertion failed" ;
		$sdf->write() ;
	}	

##################################################################



