#!/usr/local/bin/perl


require 5.000 ;	# This is a perl 5.0 script

use Getopt::Long ;
use File::Basename ;
$me = basename( $0, ".perl") ;

$ret = GetOptions('help', 'prop=s');
$ret || die "Options are not correct. Try $me -help\n" ;

if( defined($opt_help)  ) { 
	die( "Example usage : $me [-help] -prop NSC  input1.sdf input2.sdf [input3.sdf ...] > list.txt
$me read the first  MDL SDF file and write it to stdout.
Thereafter, the 2nd file is read, and entries are written to stdout only if an entry with the same property has not been read from the first file.

It is assumed that the selected property is present for each entry and is a unique identifier.
Note : Options can be abbreviated as long as they are unambiguous.
\n") ;

}

#$Mylibs = "/home/brunob/lib/perl /home/bruno/lib/perl";
#push(@INC, split(/\s+/, $Mylibs));

require MDL_sdf ;

defined $opt_prop || die "Undefined option\n";
my $property = $opt_prop ;

my $first_file = shift( @ARGV) ;
my @other_files = @ARGV ;

defined $first_file || die "Not enough arguments" ;
#$#other_files 

#-----------------------------------------------------#
#Properies are stored in a hash table
my %UniqueProperties = () ;
my $entry_number = 0 ;
#Read the first file
local *STDIN;
open( STDIN, "$first_file") || die "Can't open $first_file" ;

FOR_EACH_SDF_ENTRY: while( 1 )
{ 
	use strict ;
	

	#Read data from the first file and create one object of type MDL_sdf
	my $sdf_entry = MDL_sdf_non_parsed_molecule->readFromInput() ;
	defined $sdf_entry || last FOR_EACH_SDF_ENTRY ;  #end of the loop
	
	$entry_number ++ ;
	
	my $id = $sdf_entry->data_for_field_name($property) ;
	
	defined $id || die "$property is not defined for entry n. $entry_number\n";
	
	$UniqueProperties{$id} ++ ;
	
	$sdf_entry->write();

}

printf STDERR  "%d entries read from %s\n", $entry_number, $first_file ;

#-----------------------------------------------------#
# Read the other files
my $file ;
foreach $file (@other_files)
{
	use strict ;
	my $entries_read = 0 ;
	my $entries_written = 0 ;
	open( STDIN, "$file") || die "Can't open $file" ;

	FOR_EACH_SDF_ENTRY_2: while( 1 )
	{ 
		#Read data from the  file and create one object of type MDL_sdf
		my $sdf_entry = MDL_sdf_non_parsed_molecule->readFromInput() ;
		defined $sdf_entry || last FOR_EACH_SDF_ENTRY_2 ;  #end of the loop
		$entries_read ++ ;
	
		my $id = $sdf_entry->data_for_field_name($property) ;
	
		defined $id || die "$property is not defined for entry n. $entry_number\n";
	
		#print STDERR "id2 = $id s=$UniqueProperties{$id}\n" ;
		
		if(! defined $UniqueProperties{$id} )
		{
			$sdf_entry->write();
			$entries_written ++ ;
		}
		
		$UniqueProperties{$id} ++ ;
	}
	printf STDERR  "%d entries read and %d entries added from %s\n", $entries_read, $entries_written , $file ;

}


