09 September 2011

Customizing "C printf" to print a FASTA sequence.

The GNU C library lets you define your own custom conversion specifiers for printf template strings. In the current post, I will show how to add a new modifier for printf to print a fasta sequence. Say we have a C structure holding a FASTA sequence:

typedef struct fasta_t
	{
	char* name;
	char* sequence;
	}Fasta;
The new output conversion is registered with register_printf_specifier.
register_printf_specifier(
               'W',/* the new modifier */
		/* handler function */
		printf_fasta_handler,
		/* arginfo function */
		printf_fasta_arginfo )

The handler function

"The handler-function is the function called by printf and friends when this conversion appears in a template string... The structure printf_info is used to pass information about the options appearing in an instance of a conversion specifier in a printf template string to the handler and arginfo functions for that specifier.". Here the width is used to restrict the length of the fasta sequence and the precision defines the length of the lines.
#define FPUTC(C) ret+=(fputc(C,stream)==EOF?0:1)
/** handler for printf called by register_printf_function */
static int printf_fasta_handler(
	FILE *stream, /*  stream output */
	const struct printf_info *info, /* information about the various options */
         const void *const *args /* arguments */
	)
	{
	int n=0;
	int ret=0;
	char* p;

	int lline =(info->prec<1?60:info->prec);
	const Fasta* fasta=*((const Fasta**)(args[0]));
	
	if(fasta->name!=NULL)
		{
		FPUTC('>');
		ret+=fputs(fasta->name,stream);
		}
	else
		{
		FPUTC('>');
		}
	
	if(fasta->sequence!=NULL)
		{
		p=(char*)fasta->sequence;
		while(p[n]!=0 && (info->width<1 || n< info->width))
			{
			if(n%lline==0) 
				{
				FPUTC('\n');
				}
			FPUTC(p[n]);
			++n;
			}
		}
	FPUTC('\n');
       	/* returns the number of characters writtern */
	return ret;
	}

The arginfo function

"The arginfo-function is the function called by parse_printf_format when this conversion appears in a template string."
static int printf_fasta_arginfo(
	const struct printf_info *info,
	size_t n,
	int *argtypes,
	int *size)
	{
	 /* We always take exactly one argument and this is a pointer to the
          structure.. */
	 if (n > 0) argtypes[0] = PA_POINTER;
	return 1;
	}

Printing the FASTA sequence:


Fasta record;
(...)
printf("%80W\n",&record);
printf("%200.90W\n",&record);
printf("%200W\n",&record);
printf("%.80W\n",&record);

Compile and run

$ gcc -Wall source.c
$ ./a.out < sequence.fa

>gi|27592135
GGAAGGGCTGCCCCACCATTCATCCTTTTCTCGTAGTTTGTGCACGGTGCGGGAGGTTGT
CTGAGTGACTTCACGGGTCG

>gi|27592135
GGAAGGGCTGCCCCACCATTCATCCTTTTCTCGTAGTTTGTGCACGGTGCGGGAGGTTGTCTGAGTGACTTCACGGGTCGCCTTTGTGCA
GTACTAGATATGCAGCAGACCTATGACATGTGGCTAAAGAAACACAATCCTGGGAAGCCTGGAGAGGGAACACCACTCACTTCGCGAGAA
GGGGAGAAACAGATCCAGAT

>gi|27592135
GGAAGGGCTGCCCCACCATTCATCCTTTTCTCGTAGTTTGTGCACGGTGCGGGAGGTTGT
CTGAGTGACTTCACGGGTCGCCTTTGTGCAGTACTAGATATGCAGCAGACCTATGACATG
TGGCTAAAGAAACACAATCCTGGGAAGCCTGGAGAGGGAACACCACTCACTTCGCGAGAA
GGGGAGAAACAGATCCAGAT

>gi|27592135
GGAAGGGCTGCCCCACCATTCATCCTTTTCTCGTAGTTTGTGCACGGTGCGGGAGGTTGTCTGAGTGACTTCACGGGTCG
CCTTTGTGCAGTACTAGATATGCAGCAGACCTATGACATGTGGCTAAAGAAACACAATCCTGGGAAGCCTGGAGAGGGAA
CACCACTCACTTCGCGAGAAGGGGAGAAACAGATCCAGATGCCCACTGACTATGCTGACATCATGATGGGCTACCACTGC
TGGCTCTGCGGGAAGAACAGCAACAGCAAGAAGCAATGGCAGCAGCACATCCAGTCAGAGAAGCACAAGGAGAAGGTCTT
CACCTCAGACAGTGACTCCAGCTGCTGGAGCTATCGCTTCCCTATGGGCGAGTTCCAGCTCTGTGAAAGGTACCATGCAC
ATGGCTCTGTTTGATCCCAGAAGTGATGACTACTTAGTGGTAAAAACACATTTCCAGACACACAACTTCAGAAAATGAGT
GCAAGCTTCAAGTCTGCCCTTTGTAGCCATAATGTGCTCAGCTCTCGGTCTGCTGAACAGAGTCTACTTGGCTCAATTCT
TGGGGGAATCCCAGATGCTTTATTAGATTGTTTGAATGTCTCACGCCCTCTGAATCAGTGCCTTGAGGTGCCTTCAGAAG
GCTTGTGATGGTTAGNNNTNGCATTTTGGTT

Code

/**
* Author:
* Pierre Lindenbaum PhD
* Contact:
* plindenbaum@yahoo.fr
* Date:
* 2011-09-09
* WWW:
* http://plindenbaum.blogspot.com
* Reference:
* http://www.gnu.org/s/hello/manual/libc/Customizing-Printf.html
* Motivation:
* Customizing printf to print a fasta sequence
* Usage:
* g++ -Wall source.c
* ./a.out < file.fasta
*/
#include <stdio.h>
#include <stdlib.h>
#include <printf.h>
#include <ctype.h>
typedef struct fasta_t
{
char* name;
char* sequence;
}Fasta;
#define FPUTC(C) ret+=(fputc(C,stream)==EOF?0:1)
/** handler for printf called by register_printf_function */
static int printf_fasta_handler(
FILE *stream, /* stream output */
const struct printf_info *info, /* information about the various options */
const void *const *args /* arguments */
)
{
int n=0;
int ret=0;
char* p;
/* return the number of character writtern */
int lline =(info->prec<1?60:info->prec);
const Fasta* fasta=*((const Fasta**)(args[0]));
if(fasta->name!=NULL)
{
FPUTC('>');
ret+=fputs(fasta->name,stream);
}
else
{
FPUTC('>');
}
if(fasta->sequence!=NULL)
{
p=(char*)fasta->sequence;
while(p[n]!=0 && (info->width<1 || n< info->width))
{
if(n%lline==0)
{
FPUTC('\n');
}
FPUTC(p[n]);
++n;
}
}
FPUTC('\n');
return ret;
}
static int printf_fasta_arginfo(
const struct printf_info *info,
size_t n,
int *argtypes,
int *size)
{
/* We always take exactly one argument and this is a pointer to the
structure.. */
if (n > 0) argtypes[0] = PA_POINTER;
return 1;
}
#define APPEND(ptr,len,c) {ptr=realloc(ptr,sizeof(char)*((len)+2));\
if(ptr==NULL)\
{\
fputs("Out of memory\n",stderr);\
exit(EXIT_FAILURE);\
}\
ptr[(len)]=c;\
ptr[(len)+1]=0;\
len++;\
}
int main(int argc,char** argv)
{
size_t len_name=0;
size_t len_seq=0;
Fasta record;
int c;
/* register a new output conversion */
if(register_printf_specifier('W',
/* handler function */
printf_fasta_handler,
/* arginfo function */
printf_fasta_arginfo
)!=0)
{
fputs("Cannot register print function",stderr);
return EXIT_FAILURE;
}
record.name=NULL;
record.sequence=NULL;
for(;;)
{
c=fgetc(stdin);
if(c==EOF || c=='>')
{
if(len_seq>0)
{
printf("%80W\n",&record);
printf("%200.90W\n",&record);
printf("%200W\n",&record);
printf("%.80W\n",&record);
}
if(c==EOF) break;
len_seq=0;
len_name=0;
while((c=fgetc(stdin))!=EOF && c!='\n')
{
APPEND(record.name,len_name,c);
}
}
else if(!isspace(c))
{
APPEND(record.sequence,len_seq,c);
}
}
free(record.name);
free(record.sequence);
return 0;
}
view raw source.c hosted with ❤ by GitHub

That's it,

Pierre

No comments: