/*********************************************************
 usage:

 EndNote2bib -h
 EndNote2bib [-Q] [-q] < input.end > output.tex
 EndNote2bib [-Q] [-q] input1.end [input2.end ...] > output.tex

 This program exports/converts one or more input files in EndNote ASCII format
 to an output file in BibTeX format. As the usage line shows, it works as a
 pipe in the UNIX sense, or takes input files as arguments.

 The differences w.r.t this original version r2bib.c are
 - from the application point of view
 .  The EndNote tag %0 is the prime source of deciding which type of
    reference this is (Book, article, thesis....)
 .  The EndNote tags %R, %U, %@, %Z, %7, %8, %9 are also converted.
 .  multiple %E fields are concatenated with "and", not just
    multiple %A fields
 .  switches -h, -q and -Q have been added: -h prints some usage
    summary, and -q can be used to mute the complains about
    unknown EndNote tags. If -Q is added, the fields are put into
    pairs of quotes, whereas the default is placing them into pairs
    of curly parentheses.
 - from a programmer's point of view
 .  K&R C-style  source code has been modified to ANSI C
 .  some declarations of system functions have been replaced
    by an include stdlib.h. exit(0,1) have been replaced
    by exit(SUCCES,FAILURE). main() ends on return 0 , not
    on exit().
 .  some forward declarations of local functions have been
    removed by moving these local functions ahead such that
    the compiler has seen them before they are called.
 .  The use of NULL and '\0' has been corrected to remove
    some gcc warnings
 .  The structure with the hash that guesses types from tags
    has been made local to the function it is used in.

 To wrap the output lines under UNIX use:
 EndNote2bib < input.end | fold -s >  output.tex

 See also http://www.endnote.com

 For similar tools and tools that work in the reverse direction
 (BibTeX to EndNote) see http://support.isiresearchsoft.com/pub/bibtex/
 and 

 The original source that converts the similar refer
 format to BibTeX has been taken from
 http://www.ctan.org/tex-archive/biblio/bibtex/utils/r2bib/ .

 Richard J. Mathar, http://www.strw.leidenuniv.nl/~mathar
 Feb 22, 2010
***********************************************************/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <string.h>

void usage(char *argv0)
{
        printf("usage: %s [-Q] [-q]; # pipe stdin to stdout\n",argv0) ;
        printf("\t %s [-Q] [-q] infile1.end [infile2.end ...]; # convert list of input files to stdout\n",argv0) ;
        printf("\t %s -h ; # help: print these usage lines here\n",argv0) ;
	printf("Option -q suppresses complains on unknown EndNote tags\n") ;
	printf("Option -Q uses pairs of quotes to embrace fields\n") ;
}

/* Return NULL if bp points to an (essentially) empty string.
   "Essentially" means ignoring white space.
   Otherwise return the pointer to the first non-space character.
*/
char * sanz(char *bp)
{
	/* skip to the end of the line */
	char	*cp = &bp[strlen(bp) - 1];

	/*
	 * back up over any spaces chars
	 */
	while (isspace(*cp) && (cp >= bp))
		cp--;

	if (cp < bp)
		return NULL ;	/* empty line */

	/* truncate after the last non-space character in the line */
	*++cp = '\0' ;

	/* search from the start of bp to the actual (non-space) end */
	while (isspace(*bp) && (bp < cp))
		bp++;

	if (cp == bp)
		return NULL ;	/* empty line */

	return bp ;
}


/* EndNote standards
   %A Author
   %B secondary title (of a book or conference name)
   %C Place published
   %D Year
   %E editor / secondary author
   %F label
   %G Language
   %H Translated Author
   %I Publisher
   %J secondary title (journal name)
   %K Keywords
   %L call number
   %M accession number
   %N number (issue)
   %P pages
   %Q Translated Title
   %R DOI
   %S tertiary title
   %T title
   %U URL
   %V volume
   %W database provider
   %X Abstract
   %Y tertiary author
   %Z notes
   %0 Reference type
   %1 to %4 custom notes (Wiley stores the DOI here)
   %6 number of volumes
   %7 edition
   %8 date
   %9 type of work
   %? subsidiary author
   %@ ISBN/ISSN
   %! short title
   %#, %$ %] custom 5 to custom 7
   %& section
   %( original publication
   %) reprint edition
   %* reviewed item
   %+ author address
   %^ caption
   %> File attchments
   %< research notes
   %[ access date
   %= Custom 8
   %~ name of database
*/
struct Rb {
	char	rb_kl;		/* EndNote key letter		*/
	char *	rb_kw;		/* bibtex string		*/
	char	rb_emit;	/* don't print data if 0	*/
	char *	rb_data;	/* EndNote data			*/
} rb[] = {
	{ '0',	"",		0,	NULL	},
	{ '7',	"edition",	1,	NULL	},
	{ '8',	"month",	1,	NULL	},
	{ '9',	"type",		1,	NULL	},
	{ '@',	"isbn",		1,	NULL	},	/* To do: could also be issn */
	{ 'A',	"author",	1,	NULL	},
	{ 'B',	"booktitle",	1,	NULL	},
	{ 'C',	"address",	1,	NULL	},
	{ 'D',	"year",		1,	NULL	},
	{ 'E',	"editor",	1,	NULL	},
	{ 'F',	"",		0,	NULL	},
	{ 'I',	"publisher",	1,	NULL	},
	{ 'J',	"journal",	1,	NULL	},
	{ 'L',	"key",		1,	NULL	},	/* use as bibtex key; comes earlier in the list to precede K */
	{ 'K',	"note",		1,	NULL	},
	{ 'N',	"number",	1,	NULL	},
	{ 'P',	"pages",	1,	NULL	},
	{ 'Q',	"institution",	1,	NULL	},
	{ 'R',	"doi",  	1,	NULL	},
	{ 'S',	"series",	1,	NULL	},
	{ 'T',	"title",	1,	NULL	},
	{ 'U',	"url",		1,	NULL	},
	{ 'V',	"volume",	1,	NULL	},
	{ 'X',	"abstract",	1,	NULL	},
	{ 'Z',	"annote",	1,	NULL	},
	{ 0,	0,		0,	0	}
};

void guesstype()
{
	/*
	 * entries are in order of precedence.
	 * Any entry with a 'J' field must be an article, but anthing with an 'I'
	 * field doesn't have to be a book (if an entry has both 'J' and 'I' it is
	 * considered to be an article).
	 */
	struct Bmap {
		char	bm_kl;
		char	*bm_entry;
	} bmap[] = {
		{ 'J',	"article"	},
		{ 'B',	"inbook"	},
		{ 'R',	"techreport"	},
		{ 'I',	"book"		},
		{ 0,	0		}
	};
	struct Bmap	*bm = & bmap[0] ;
	/*
	 * figure out what type of entry this is.
	 */
	for ( ; bm->bm_kl != 0; bm++)
	{
		struct Rb *trb;
		for (trb = &rb[0]; trb->rb_kl ; trb++)
		{
			if ((trb->rb_kl == bm->bm_kl) && trb->rb_data)
			{
				printf("@%s{", bm->bm_entry);
				goto out;
			}
		}
	}
out:
	if (bm->bm_kl == 0)
		printf("@misc{");
}


/*
  examine %0 and print a @book, @incollection or any other type on stdout,
  if the tag matches something known.
  If the %0 is absent or this cannot be done for other reasons, return 1, else 0.
*/
int tagzero()
{
	/* everything not listed will be re-evaluated in the caller function */
	static char *ZMatch[][2] = {
		{"Book", "book"},
		{"Electronic Book", "book"},
		{"Manuscript", "manual"},
		{"Magazine Article", "article"},
		{"In Proceedings", "inproceedings"},
		{"Conference Paper", "inproceedings"},
		{"Conference Proceedings", "proceedings"},
		{"Report", "techreport"},
		{"Journal Article", "article"},
		{"Electronic Article", "article"},
		{"Book Section", "inbook"},
		{"Edited Book", "book"},
		{"Newspaper Article","article"},
		{"Thesis", "phdthesis"},		/* could be also masterthesis, %9 = Dissertation to be investigated */
		{"Personal Communication", "unpublished"},
		{"Unpublished Work", "unpublished"},
		{"Generic", "misc"}
	} ;
	struct Rb	*trb;

	/* search through the tags of this entry */
	for (trb = &rb[0]; trb->rb_kl ; trb++)
	{
		/* found tag %0 and there is actually a line in it */
		if ( trb->rb_kl == '0' && trb->rb_data )
		{
			int zm =0 ;
			/* if the tag matches s.th. in ZMatch[], convert, else return */
			for( ; zm < sizeof(ZMatch)/sizeof(char *[2]) ; zm++)
				if ( strncmp(trb->rb_data, ZMatch[zm][0], strlen(ZMatch[zm][0]) ) == 0 )
				{
					printf("@%s{", ZMatch[zm][1]);
					return 0 ;
				}
		}
	}
	return 1 ;	/* unsuccessful */
}

void dumprb(const int brakquot)
{
	struct Rb	*trb;
	static int	key;
	char		*bibkey;
	char		*cp;
	int		first=1;

	/* try to consider %0 ...; if this doesn't resolve, look at the availability of other key tags...*/
	if( tagzero() )
		guesstype() ;

	/*
	 * in order of precedence, the bibtex key is determined:
	 *	1. use EndNote label (%F).
	 *	2. use EndNote Call Number (%L).
	 *	3. use keyword (%K) if only one word.
	 *	4. otherwise just use the string "keyN" where N
	 *	   is the count of this bibliographic entry in
	 *	   the EndNote file.
	 */
	key++;
	for (trb = &rb[0]; trb->rb_kl != 0; trb++)
	{
		if ((trb->rb_kl == 'F') && trb->rb_data)
		{
			for (cp = trb->rb_data; *cp ; cp++)
				if (isspace(*cp))
					break;

			/* ran to end of string? */
			if (*cp == '\0')
			{
				printf("%s,\n", trb->rb_data);

				/* if used here then free & zero it */
				free(trb->rb_data);
				trb->rb_data = NULL;
				break;
			}
		}

		if ((trb->rb_kl == 'L') && trb->rb_data )
		{
			for (cp = trb->rb_data; *cp ; cp++)
				if (isspace(*cp))
					break;

			/* ran to end of string? */
			if (*cp == '\0')
			{
				printf("%s,\n", trb->rb_data);
				break;
			}
		}

		if ((trb->rb_kl == 'K') && trb->rb_data)
		{
			for (cp = trb->rb_data; *cp ; cp++)
				if (isspace(*cp))
					break;

			/* ran to end of string? */
			if (*cp == '\0')
			{
				printf("%s,\n", trb->rb_data);

				/* if used here then free & zero it */
				free(trb->rb_data);
				trb->rb_data = NULL;
				break;
			}
		}
	}

	/* nothing reasonable to use, punt */
	if (trb->rb_kl == 0)
		printf("key%d,\n", key);

	first = 1;

	for (trb = &rb[0]; trb->rb_kl != 0; trb++)
	{
		if (trb->rb_data == NULL)
			continue;

		if (trb->rb_emit != 0)
		{
			/*
			 * clank,
			 * this is so that things will line up.
			 */
			if (strlen(trb->rb_kw) < 6)
				cp = "\t\t";
			else
				cp = "\t";

			if (! first)
				printf(",\n");

			/* if brakquot is nonzero, we use pairs of quotes to delimit the data
			* fields, else pairs of curly parentheses.
			*/
			if ( brakquot)
				printf("\t%s =%s\"%s\"", trb->rb_kw, cp, trb->rb_data);
			else
				printf("\t%s =%s{%s}", trb->rb_kw, cp, trb->rb_data);
			first = 0;
		}

		free(trb->rb_data);
		trb->rb_data = NULL;
	}

	printf("\n}\n\n");
}

/* swallow multiple author case */
char * andfix(char *string)
{
	char	*tmp;
	char	*cp;

	tmp = string;

	for (cp = string; *cp ; cp++)
	{
		if (strncmp(cp, " and ", 5) == 0)
		{
			/*
			 * +2 for the curly braces around "{and}",
			 * +1 for the null at the end.
			 */
			if ((tmp = malloc(strlen(string) + 2 + 1)) == NULL) {
				perror("malloc");
				exit(EXIT_FAILURE);
			}

			strncpy(tmp, string, cp - string);
			tmp[cp - string] = '\0'; /* strncpy doesn't */
			strcat(tmp, " {and} ");
			strcat(tmp, cp + 5);
		}
	}

	return tmp ;
}

/* handle one EndNote line
   'cp' points to the 3rd character in the line (just after the tag)
*/
void stuffrb(struct Rb *lrb, char *cp)
{
	/* empty data field */
	if ((cp = sanz(cp)) == NULL)
		return;

	if (lrb->rb_kl == 'A' || lrb->rb_kl == 'E')
		cp = andfix(cp);

	if (lrb->rb_data == NULL)
	{
		if ((lrb->rb_data = malloc(strlen(cp) + 1)) == NULL)
		{
			perror("malloc");
			exit(EXIT_FAILURE);
		}
		strcpy(lrb->rb_data, cp);
	}
	else
	{
		char	*conj;

		if (lrb->rb_kl == 'A' || lrb->rb_kl == 'E')
			conj = " and ";
		else
			conj = " ";

		if ((lrb->rb_data = realloc(lrb->rb_data, strlen(lrb->rb_data) + strlen(cp) + strlen(conj) + 1)) == NULL)
		{
			perror("realloc");
			exit(EXIT_FAILURE);
		}

		strcat(lrb->rb_data, conj);
		strcat(lrb->rb_data, cp);
	}
}

/* Handle one file 
*/
int r2bib(char *file, FILE *fid, int quiet, const int brakquot)
{
	struct Rb	*lrb = NULL;		/* last rb stored into */
	int		line=0 ;
	char		buf[BUFSIZ];
	int		err= 0;
	const time_t now = time(NULL) ;

	printf("@comment { created %s from %s by %s (Richard J. Mathar) run by %s}\n\n",
		ctime(&now),file,__FILE__,getenv("USERNAME"))  ;

	while ( fgets(buf, sizeof(buf), fid) )
	{
		char	*cp;
		line++;

		/* an empty line? If yes, assume start of a new entry */
		if ((cp = sanz(buf)) == NULL)
		{
			if (lrb )
			{
				dumprb(brakquot);
				lrb = NULL;
			}
			continue;
		}

		/*
		 * if the first letter is a % then it's the
		 * a new record, otherwise it's a continuation
		 * of the previous one.
		 */
		if (cp[0] == '%')
		{
			/* search trough the supported list of tags */
			for (lrb = &rb[0]; lrb->rb_kl != 0; lrb++)
				if (lrb->rb_kl == cp[1])
				{
					stuffrb(lrb, &cp[2]);
					break;
				}
			if (lrb->rb_kl == 0 && !quiet)
			{
				fprintf(stderr, "r2b: %s: line %d: unknown key letter %c, ignoring\n", file, line, cp[1]);
				err = 1;
			}
		}
		else
		{
			if (lrb == NULL && !quiet)
			{
				fprintf(stderr, "r2b: %s: line %d: bad format, ignoring\n", file, line);
				err = 1;
				continue;
			}

			stuffrb(lrb, &cp[0]);
		}
	}

	if (lrb )
		dumprb(brakquot);

	return err ;
}

/* loop over the input files in the argument list;
   If this list is empty, scan the standard input
*/
int main(int argc, char *argv[])
{
	int	err=0;
	char oc ;
	int quiet=0 ;
	int brakquot=0 ;
	while (  (oc=getopt(argc,argv,"qQh")) != -1 )
	{
		switch(oc)
		{
		case 'q' :
			quiet = 1 ;
			break ;
		case 'Q' :
			brakquot = 1 ;
			break ;
		case 'h' :
			usage(argv[0]) ;
			return 0 ;
		case '?' :
			fprintf(stderr,"Invalid command line option %c\n",oc) ;
			usage(argv[0]) ;
			break ;
		}
	}

	if (optind < argc-1 )
	{
		int i ;
		for (i = optind; i < argc; i++)
		{
			FILE *fid = fopen(argv[i], "r") ;
			if ( fid == NULL)
			{
				fprintf(stderr, "fopen: ");
				perror(argv[i]);
				continue;
			}
			err += r2bib(argv[i], fid, quiet, brakquot);
			fclose(fid) ;
		}
	}
	else
		err += r2bib("stdin", stdin, quiet, brakquot);

	if (err)
		return 1 ;

	return 0 ;
}