Motivation: I was looking for a way to store and to read my linkage data using a binary format, (then I could get a faster and a smaller size).
I had a glance to the NCBI C toolbox which uses ASN.1 as its main format to encode and to structure the data just like XML.
About ASN1: ASN.1 is a standard that describes data structures for representing, encoding, transmitting, and decoding data. It provides a set of formal rules for describing the structure of objects that are independent of machine-specific encoding techniques and is a precise, formal notation that removes ambiguities. Its usage can be compared to the more recent XML Schema (see also my previous post about JAXB).
The NCBI ASN1 C library is descibed here: http://www.ncbi.nlm.nih.gov/IEB/ToolBox/SDKDOCS/ASNLIB.HTML
Although I still have a problem in the last part, this post can be considered as my first experience with the ASN.1 library.
I started my test by defining a small ASN.1 module to store some PCR primers and their hits on the genome
PCRMod DEFINITIONS ::=
BEGIN
Orientation ::= ENUMERATED
{
forward(0),
reverse(1)
}
Hit ::= SEQUENCE
{
chromosome VisibleString, -- chromosome
start INTEGER, -- starting position in chromosome 5' + strand
orient Orientation -- orientation
}
Primer ::= SEQUENCE {
name VisibleString, -- name
tm REAL OPTIONAL, -- melting temperature
sequence VisibleString, -- sequence
hits SET OF Hit OPTIONAL -- hits
}
PrimerInput ::=SEQUENCE OF Primer
END
This schema 'primer.asn' was digested by the asntool
asntool -Z T -w 128 -m primer.asn -G T -B primer -K primerasn
asntool -Z T -w 128 -m primer.asn-o primerasn.h
here asntool will read the 'primer.asn' and will generate three source files:
primerasn.h: is a C header containing all states that will be used the parse the ASN1 files.
/***********************************************************************
*
**
* Automatic header module from ASNTOOL
*
************************************************************************/
(...)
static char * asnfilename = "primerasn.h";
static AsnValxNode avnx[2] = {
{20,"forward" ,0,0.0,&avnx[1] } ,
{20,"reverse" ,1,0.0,NULL } };
static AsnType atx[20] = {
{401, "Orientation" ,1,0,0,0,0,0,0,0,NULL,&atx[1],&avnx[0],0,&atx[2]} ,
{310, "ENUMERATED" ,0,10,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{402, "Hit" ,1,0,0,0,0,0,0,0,NULL,&atx[8],&atx[3],0,&atx[9]} ,
{0, "chromosome" ,128,0,0,0,0,0,0,0,NULL,&atx[4],NULL,0,&atx[5]} ,
{323, "VisibleString" ,0,26,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{0, "start" ,128,1,0,0,0,0,0,0,NULL,&atx[6],NULL,0,&atx[7]} ,
{302, "INTEGER" ,0,2,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{0, "orient" ,128,2,0,0,0,0,0,0,NULL,&atx[0],NULL,0,NULL} ,
{311, "SEQUENCE" ,0,16,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{403, "Primer" ,1,0,0,0,0,0,0,0,NULL,&atx[8],&atx[10],0,&atx[17]} ,
{0, "name" ,128,0,0,0,0,0,0,0,NULL,&atx[4],NULL,0,&atx[11]} ,
{0, "tm" ,128,1,0,1,0,0,0,0,NULL,&atx[12],NULL,0,&atx[13]} ,
{309, "REAL" ,0,9,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{0, "sequence" ,128,2,0,0,0,0,0,0,NULL,&atx[4],NULL,0,&atx[14]} ,
{0, "hits" ,128,3,0,1,0,0,0,0,NULL,&atx[16],&atx[15],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[2],NULL,0,NULL} ,
{314, "SET OF" ,0,17,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} ,
{404, "PrimerInput" ,1,0,0,0,0,0,0,0,NULL,&atx[19],&atx[18],0,NULL} ,
{0, NULL,1,-1,0,0,0,0,0,0,NULL,&atx[9],NULL,0,NULL} ,
{312, "SEQUENCE OF" ,0,16,0,0,0,0,0,0,NULL,NULL,NULL,0,NULL} };
static AsnModule ampx[1] = {
{ "PCRMod" , "primerasn.h",&atx[0],NULL,NULL,0,0} };
static AsnValxNodePtr avn = avnx;
static AsnTypePtr at = atx;
static AsnModulePtr amp = ampx;
/**************************************************
*
* Defines for Module PCRMod
*
**************************************************/
#define ORIENTATION &at[0]
#define HIT &at[2]
#define HIT_chromosome &at[3]
#define HIT_start &at[5]
#define HIT_orient &at[7]
#define PRIMER &at[9]
#define PRIMER_name &at[10]
#define PRIMER_tm &at[11]
#define PRIMER_sequence &at[13]
#define PRIMER_hits &at[14]
#define PRIMER_hits_E &at[15]
#define PRIMERINPUT &at[17]
#define PRIMERINPUT_E &at[18]
primer.h: contains the C headers used to parse the structure declared in the ASN1 schema. There is a method to allocate/free/read and write each structure.
#
(...)
/**************************************************
*
* Generated objects for Module PCRMod
*
**************************************************/
NLM_EXTERN Boolean LIBCALL
primerAsnLoad PROTO((void));
/* following #defines are for enumerated type, not used by object loaders */
#define Orientation_forward 0
#define Orientation_reverse 1
/**************************************************
* Hit
**************************************************/
typedef struct struct_Hit {
struct struct_Hit PNTR next;
Uint4 OBbits__;
CharPtr chromosome;
Int4 start;
Uint2 orient;
} Hit, PNTR HitPtr;
NLM_EXTERN HitPtr LIBCALL HitFree PROTO ((HitPtr ));
NLM_EXTERN HitPtr LIBCALL HitNew PROTO (( void ));
NLM_EXTERN HitPtr LIBCALL HitAsnRead PROTO (( AsnIoPtr, AsnTypePtr));
NLM_EXTERN Boolean LIBCALL HitAsnWrite PROTO (( HitPtr , AsnIoPtr, AsnTypePtr));
/**************************************************
* Primer
**************************************************/
typedef struct struct_Primer {
struct struct_Primer PNTR next;
Uint4 OBbits__;
CharPtr name;
#define OB__Primer_tm 0
FloatHi tm;
CharPtr sequence;
struct struct_Hit PNTR hits;
} Primer, PNTR PrimerPtr;
NLM_EXTERN PrimerPtr LIBCALL PrimerFree PROTO ((PrimerPtr ));
NLM_EXTERN PrimerPtr LIBCALL PrimerNew PROTO (( void ));
NLM_EXTERN PrimerPtr LIBCALL PrimerAsnRead PROTO (( AsnIoPtr, AsnTypePtr));
NLM_EXTERN Boolean LIBCALL PrimerAsnWrite PROTO (( PrimerPtr , AsnIoPtr, AsnTypePtr));
/**************************************************
* PrimerInput
**************************************************/
typedef struct struct_Primer PrimerInput;
typedef struct struct_Primer PNTR PrimerInputPtr;
#define PrimerInputNew() PrimerNew()
NLM_EXTERN PrimerInputPtr LIBCALL PrimerInputFree PROTO ((PrimerInputPtr ));
NLM_EXTERN PrimerInputPtr LIBCALL PrimerInputNew PROTO (( void ));
NLM_EXTERN PrimerInputPtr LIBCALL PrimerInputAsnRead PROTO (( AsnIoPtr, AsnTypePtr));
NLM_EXTERN Boolean LIBCALL PrimerInputAsnWrite PROTO (( PrimerInputPtr , AsnIoPtr, AsnTypePtr));
(...)
and
primer.c the C implementation of those methods.
(...)
OK, here comes the problem: I wrote a simple ASN1 input
PrimerInput::={
{
name "Primer0",
sequence "ATAGCTACTGATGCATGCATCG"
}
}
and I wanted to read each primer. Here is my source:
#include <cerrno>
#include <fstream>
#include <iostream>
#include <string>
#include <stdexcept>
#include <asn.h>
#include <cassert>
/** include the files generated by the ASN1 tool */
#include <primer.h>
#include <primerasn.h>
int main(int argc, char** argv)
{
int optind=1;
/* init my specification */
if(!primerAsnLoad())
{
fprintf(stderr,"#%s: cannot load ASN1 specification \"%s\".\n" ,
argv[0],asnfilename );
return (EXIT_FAILURE);
}
if(optind+1!=argc)
{
fprintf(stderr,"bad input : usage %s ASN1 file\n",argv[0]);
return(EXIT_FAILURE);
}
/** open the input file */
AsnIoPtr in = AsnIoOpen(argv[1],"r");
if(in==NULL)
{
fprintf(stderr,"Cannot Read %s\n",argv[1]);
return(EXIT_FAILURE);
}
/** init the state of the parser */
AsnTypePtr asn_type_ptr=PRIMERINPUT;
PrimerPtr primer= NULL;
/** while we can read a primer... */
while ((asn_type_ptr = AsnReadId(in, amp, asn_type_ptr)) != NULL)
{
if(asn_type_ptr==PRIMERINPUT_E)
{
primer= PrimerAsnRead(in,asn_type_ptr);
if(primer!=NULL)
{
fprintf(stderr,"sequence: %s\n",primer->sequence);
PrimerFree(primer);
}
}
else
{
AsnReadVal(in, asn_type_ptr,NULL);
}
}
AsnIoClose(in);
return(0);
}
The problem: my input file is processed silently but the output shows two primers insted of one and the sequence is said to be
NULL
.
sequence: (null)
sequence: (null)
I'm blocked here :-)
Pierre