/********************************************************* usage: EndNote2bib -h EndNote2bib [-Q] [-q] < input.end > output.tex EndNote2bib [-Q] [-q] input1.end [input2.end ...] > output.tex This program exports/converts one or more input files in EndNote ASCII format to an output file in BibTeX format. As the usage line shows, it works as a pipe in the UNIX sense, or takes input files as arguments. The differences w.r.t this original version r2bib.c are - from the application point of view . The EndNote tag %0 is the prime source of deciding which type of reference this is (Book, article, thesis....) . The EndNote tags %R, %U, %@, %Z, %7, %8, %9 are also converted. . multiple %E fields are concatenated with "and", not just multiple %A fields . switches -h, -q and -Q have been added: -h prints some usage summary, and -q can be used to mute the complains about unknown EndNote tags. If -Q is added, the fields are put into pairs of quotes, whereas the default is placing them into pairs of curly parentheses. - from a programmer's point of view . K&R C-style source code has been modified to ANSI C . some declarations of system functions have been replaced by an include stdlib.h. exit(0,1) have been replaced by exit(SUCCES,FAILURE). main() ends on return 0 , not on exit(). . some forward declarations of local functions have been removed by moving these local functions ahead such that the compiler has seen them before they are called. . The use of NULL and '\0' has been corrected to remove some gcc warnings . The structure with the hash that guesses types from tags has been made local to the function it is used in. To wrap the output lines under UNIX use: EndNote2bib < input.end | fold -s > output.tex See also http://www.endnote.com For similar tools and tools that work in the reverse direction (BibTeX to EndNote) see http://support.isiresearchsoft.com/pub/bibtex/ and The original source that converts the similar refer format to BibTeX has been taken from http://www.ctan.org/tex-archive/biblio/bibtex/utils/r2bib/ . Richard J. Mathar, http://www.strw.leidenuniv.nl/~mathar Feb 22, 2010 ***********************************************************/ #include #include #include #include #include #include void usage(char *argv0) { printf("usage: %s [-Q] [-q]; # pipe stdin to stdout\n",argv0) ; printf("\t %s [-Q] [-q] infile1.end [infile2.end ...]; # convert list of input files to stdout\n",argv0) ; printf("\t %s -h ; # help: print these usage lines here\n",argv0) ; printf("Option -q suppresses complains on unknown EndNote tags\n") ; printf("Option -Q uses pairs of quotes to embrace fields\n") ; } /* Return NULL if bp points to an (essentially) empty string. "Essentially" means ignoring white space. Otherwise return the pointer to the first non-space character. */ char * sanz(char *bp) { /* skip to the end of the line */ char *cp = &bp[strlen(bp) - 1]; /* * back up over any spaces chars */ while (isspace(*cp) && (cp >= bp)) cp--; if (cp < bp) return NULL ; /* empty line */ /* truncate after the last non-space character in the line */ *++cp = '\0' ; /* search from the start of bp to the actual (non-space) end */ while (isspace(*bp) && (bp < cp)) bp++; if (cp == bp) return NULL ; /* empty line */ return bp ; } /* EndNote standards %A Author %B secondary title (of a book or conference name) %C Place published %D Year %E editor / secondary author %F label %G Language %H Translated Author %I Publisher %J secondary title (journal name) %K Keywords %L call number %M accession number %N number (issue) %P pages %Q Translated Title %R DOI %S tertiary title %T title %U URL %V volume %W database provider %X Abstract %Y tertiary author %Z notes %0 Reference type %1 to %4 custom notes (Wiley stores the DOI here) %6 number of volumes %7 edition %8 date %9 type of work %? subsidiary author %@ ISBN/ISSN %! short title %#, %$ %] custom 5 to custom 7 %& section %( original publication %) reprint edition %* reviewed item %+ author address %^ caption %> File attchments %< research notes %[ access date %= Custom 8 %~ name of database */ struct Rb { char rb_kl; /* EndNote key letter */ char * rb_kw; /* bibtex string */ char rb_emit; /* don't print data if 0 */ char * rb_data; /* EndNote data */ } rb[] = { { '0', "", 0, NULL }, { '7', "edition", 1, NULL }, { '8', "month", 1, NULL }, { '9', "type", 1, NULL }, { '@', "isbn", 1, NULL }, /* To do: could also be issn */ { 'A', "author", 1, NULL }, { 'B', "booktitle", 1, NULL }, { 'C', "address", 1, NULL }, { 'D', "year", 1, NULL }, { 'E', "editor", 1, NULL }, { 'F', "", 0, NULL }, { 'I', "publisher", 1, NULL }, { 'J', "journal", 1, NULL }, { 'L', "key", 1, NULL }, /* use as bibtex key; comes earlier in the list to precede K */ { 'K', "note", 1, NULL }, { 'N', "number", 1, NULL }, { 'P', "pages", 1, NULL }, { 'Q', "institution", 1, NULL }, { 'R', "doi", 1, NULL }, { 'S', "series", 1, NULL }, { 'T', "title", 1, NULL }, { 'U', "url", 1, NULL }, { 'V', "volume", 1, NULL }, { 'X', "abstract", 1, NULL }, { 'Z', "annote", 1, NULL }, { 0, 0, 0, 0 } }; void guesstype() { /* * entries are in order of precedence. * Any entry with a 'J' field must be an article, but anthing with an 'I' * field doesn't have to be a book (if an entry has both 'J' and 'I' it is * considered to be an article). */ struct Bmap { char bm_kl; char *bm_entry; } bmap[] = { { 'J', "article" }, { 'B', "inbook" }, { 'R', "techreport" }, { 'I', "book" }, { 0, 0 } }; struct Bmap *bm = & bmap[0] ; /* * figure out what type of entry this is. */ for ( ; bm->bm_kl != 0; bm++) { struct Rb *trb; for (trb = &rb[0]; trb->rb_kl ; trb++) { if ((trb->rb_kl == bm->bm_kl) && trb->rb_data) { printf("@%s{", bm->bm_entry); goto out; } } } out: if (bm->bm_kl == 0) printf("@misc{"); } /* examine %0 and print a @book, @incollection or any other type on stdout, if the tag matches something known. If the %0 is absent or this cannot be done for other reasons, return 1, else 0. */ int tagzero() { /* everything not listed will be re-evaluated in the caller function */ static char *ZMatch[][2] = { {"Book", "book"}, {"Electronic Book", "book"}, {"Manuscript", "manual"}, {"Magazine Article", "article"}, {"In Proceedings", "inproceedings"}, {"Conference Paper", "inproceedings"}, {"Conference Proceedings", "proceedings"}, {"Report", "techreport"}, {"Journal Article", "article"}, {"Electronic Article", "article"}, {"Book Section", "inbook"}, {"Edited Book", "book"}, {"Newspaper Article","article"}, {"Thesis", "phdthesis"}, /* could be also masterthesis, %9 = Dissertation to be investigated */ {"Personal Communication", "unpublished"}, {"Unpublished Work", "unpublished"}, {"Generic", "misc"} } ; struct Rb *trb; /* search through the tags of this entry */ for (trb = &rb[0]; trb->rb_kl ; trb++) { /* found tag %0 and there is actually a line in it */ if ( trb->rb_kl == '0' && trb->rb_data ) { int zm =0 ; /* if the tag matches s.th. in ZMatch[], convert, else return */ for( ; zm < sizeof(ZMatch)/sizeof(char *[2]) ; zm++) if ( strncmp(trb->rb_data, ZMatch[zm][0], strlen(ZMatch[zm][0]) ) == 0 ) { printf("@%s{", ZMatch[zm][1]); return 0 ; } } } return 1 ; /* unsuccessful */ } void dumprb(const int brakquot) { struct Rb *trb; static int key; char *bibkey; char *cp; int first=1; /* try to consider %0 ...; if this doesn't resolve, look at the availability of other key tags...*/ if( tagzero() ) guesstype() ; /* * in order of precedence, the bibtex key is determined: * 1. use EndNote label (%F). * 2. use EndNote Call Number (%L). * 3. use keyword (%K) if only one word. * 4. otherwise just use the string "keyN" where N * is the count of this bibliographic entry in * the EndNote file. */ key++; for (trb = &rb[0]; trb->rb_kl != 0; trb++) { if ((trb->rb_kl == 'F') && trb->rb_data) { for (cp = trb->rb_data; *cp ; cp++) if (isspace(*cp)) break; /* ran to end of string? */ if (*cp == '\0') { printf("%s,\n", trb->rb_data); /* if used here then free & zero it */ free(trb->rb_data); trb->rb_data = NULL; break; } } if ((trb->rb_kl == 'L') && trb->rb_data ) { for (cp = trb->rb_data; *cp ; cp++) if (isspace(*cp)) break; /* ran to end of string? */ if (*cp == '\0') { printf("%s,\n", trb->rb_data); break; } } if ((trb->rb_kl == 'K') && trb->rb_data) { for (cp = trb->rb_data; *cp ; cp++) if (isspace(*cp)) break; /* ran to end of string? */ if (*cp == '\0') { printf("%s,\n", trb->rb_data); /* if used here then free & zero it */ free(trb->rb_data); trb->rb_data = NULL; break; } } } /* nothing reasonable to use, punt */ if (trb->rb_kl == 0) printf("key%d,\n", key); first = 1; for (trb = &rb[0]; trb->rb_kl != 0; trb++) { if (trb->rb_data == NULL) continue; if (trb->rb_emit != 0) { /* * clank, * this is so that things will line up. */ if (strlen(trb->rb_kw) < 6) cp = "\t\t"; else cp = "\t"; if (! first) printf(",\n"); /* if brakquot is nonzero, we use pairs of quotes to delimit the data * fields, else pairs of curly parentheses. */ if ( brakquot) printf("\t%s =%s\"%s\"", trb->rb_kw, cp, trb->rb_data); else printf("\t%s =%s{%s}", trb->rb_kw, cp, trb->rb_data); first = 0; } free(trb->rb_data); trb->rb_data = NULL; } printf("\n}\n\n"); } /* swallow multiple author case */ char * andfix(char *string) { char *tmp; char *cp; tmp = string; for (cp = string; *cp ; cp++) { if (strncmp(cp, " and ", 5) == 0) { /* * +2 for the curly braces around "{and}", * +1 for the null at the end. */ if ((tmp = malloc(strlen(string) + 2 + 1)) == NULL) { perror("malloc"); exit(EXIT_FAILURE); } strncpy(tmp, string, cp - string); tmp[cp - string] = '\0'; /* strncpy doesn't */ strcat(tmp, " {and} "); strcat(tmp, cp + 5); } } return tmp ; } /* handle one EndNote line 'cp' points to the 3rd character in the line (just after the tag) */ void stuffrb(struct Rb *lrb, char *cp) { /* empty data field */ if ((cp = sanz(cp)) == NULL) return; if (lrb->rb_kl == 'A' || lrb->rb_kl == 'E') cp = andfix(cp); if (lrb->rb_data == NULL) { if ((lrb->rb_data = malloc(strlen(cp) + 1)) == NULL) { perror("malloc"); exit(EXIT_FAILURE); } strcpy(lrb->rb_data, cp); } else { char *conj; if (lrb->rb_kl == 'A' || lrb->rb_kl == 'E') conj = " and "; else conj = " "; if ((lrb->rb_data = realloc(lrb->rb_data, strlen(lrb->rb_data) + strlen(cp) + strlen(conj) + 1)) == NULL) { perror("realloc"); exit(EXIT_FAILURE); } strcat(lrb->rb_data, conj); strcat(lrb->rb_data, cp); } } /* Handle one file */ int r2bib(char *file, FILE *fid, int quiet, const int brakquot) { struct Rb *lrb = NULL; /* last rb stored into */ int line=0 ; char buf[BUFSIZ]; int err= 0; const time_t now = time(NULL) ; printf("@comment { created %s from %s by %s (Richard J. Mathar) run by %s}\n\n", ctime(&now),file,__FILE__,getenv("USERNAME")) ; while ( fgets(buf, sizeof(buf), fid) ) { char *cp; line++; /* an empty line? If yes, assume start of a new entry */ if ((cp = sanz(buf)) == NULL) { if (lrb ) { dumprb(brakquot); lrb = NULL; } continue; } /* * if the first letter is a % then it's the * a new record, otherwise it's a continuation * of the previous one. */ if (cp[0] == '%') { /* search trough the supported list of tags */ for (lrb = &rb[0]; lrb->rb_kl != 0; lrb++) if (lrb->rb_kl == cp[1]) { stuffrb(lrb, &cp[2]); break; } if (lrb->rb_kl == 0 && !quiet) { fprintf(stderr, "r2b: %s: line %d: unknown key letter %c, ignoring\n", file, line, cp[1]); err = 1; } } else { if (lrb == NULL && !quiet) { fprintf(stderr, "r2b: %s: line %d: bad format, ignoring\n", file, line); err = 1; continue; } stuffrb(lrb, &cp[0]); } } if (lrb ) dumprb(brakquot); return err ; } /* loop over the input files in the argument list; If this list is empty, scan the standard input */ int main(int argc, char *argv[]) { int err=0; char oc ; int quiet=0 ; int brakquot=0 ; while ( (oc=getopt(argc,argv,"qQh")) != -1 ) { switch(oc) { case 'q' : quiet = 1 ; break ; case 'Q' : brakquot = 1 ; break ; case 'h' : usage(argv[0]) ; return 0 ; case '?' : fprintf(stderr,"Invalid command line option %c\n",oc) ; usage(argv[0]) ; break ; } } if (optind < argc-1 ) { int i ; for (i = optind; i < argc; i++) { FILE *fid = fopen(argv[i], "r") ; if ( fid == NULL) { fprintf(stderr, "fopen: "); perror(argv[i]); continue; } err += r2bib(argv[i], fid, quiet, brakquot); fclose(fid) ; } } else err += r2bib("stdin", stdin, quiet, brakquot); if (err) return 1 ; return 0 ; }