GRegex API



Hi folks,

I've been doing some work on a nice wrapper around the wonderful
regex implementation in PCRE (Perl Compatible Regular Expression).

As a perl programmer, i've found that having functions that make
using regular expressions easy tends to change the way you go 
about solving problems (you ditch all that ugly strncmp crap and
just use regexes).  So, I'm doing my best to have a library that
does the same for me when I'm working in C.  Hopefully, the 
library will be useful enough to get included in glib, but that's
somebody else's decision.

The code i've written todate lives at:
http://dev.cgibuilder.com/scottw/code/g_regex/

The API is covered in GRegex.h, which is copied below.  If you'd
like to see some modifications or additions to the library, 
please let me know.  No point in me doing something stupid on
this.


/* GRegex -- regular expression API wrapper around PCRE.
 * Copyright (C) 1999 Scott Wimer
 *
 * This is basically an ease of user wrapper around the functionality of
 * PCRE.
 *
 * With this library, we are, hopefully, drastically reducing the code
 * complexity necessary by making use of a more complex and detailed
 * data structure to store the regex info.  I am hoping to have a regex
 * interface that is almost as easy to use as Perl's.  <fingers crossed>
 *
 * Author: Scott Wimer <scottw@cgibuilder.com>
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
 *
 * This library is free software, you can distribute it or modify it
 * under the following terms:
 *  1) The GNU General Public License (GPL)
 *  2) The GNU Library General Public License (LGPL)
 *  3) The Perl Artistic license (Artistic)
 *  4) The BSD license (BSD)
 *
 * In short, you can use this library in any code you desire, so long as
 * the Copyright notice above remains intact.  If you do make changes to
 * it, I would appreciate that you let me know so I can improve this 
 * library for everybody, but I'm not gonna force you to.
 * 
 * Please note that this library is just a wrapper around Philip Hazel's
 * PCRE library.  Please see the file 'LICENSE' in your PCRE distribution.
 *
 */

#include <glib.h>
#include <pcre.h>

typedef struct {
	gchar  *pattern;      /* the regular expression */
	pcre   *regex;        /* compiled form of the pattern */
	gint   *offsets;      /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
	gint    offset_ct;    /* number of offsets */
	gint    compile_opts; /* options used at compile time on the pattern */
	gint    match_opts;   /* options used at match time on the regex */
	const char *errptr;   /* error message from PCRE on compile */
	gint    erroffset;    /* location in pattern of the error */
	gint    error;        /* boolean flag, 1 if there was an error, 0 if not */
	gint    match;        /* boolean flag, 1 if a match was made, 0 if not */
	gint    matches;      /* number of matching sub patterns */
	gint    string_len;   /* length of the string last used against */
	gint    pos;          /* position in the string where last match left off */
	GSList *delims;       /* delimeter sub strings from split next */
	pcre_extra *study_data; /* data stored when g_regex_study is used */
} GRegex;

/* Really quick outline of features... functions are preceded by 'g_regex_'
 *   new         - compile a pattern and put it in a g_regex structure
 *   free        - free up the memory used by the g_regex structure
 *   study       - study the pattern to make matching more efficient
 *   match       - try matching a pattern in the string
 *   match_next  - try matching pattern again in the string
 *   fetch       - fetch a particular matching sub pattern
 *   fetch_all   - get all of the matching sub patterns
 *   split       - split the string on a regex
 *   split_next  - for using split as an iterator of sorts
 *   replace     - replace occurances of a pattern with some text
 *   reuse       - clear out the structure to match against a new string
 */


/* create a new compiled regex from a pattern */
GRegex * g_regex_new(gchar * pattern);
GRegex * g_regex_new_with_options(gchar * pattern, gchar options);

/* clear the GRegex structure of stuff from the last match
 * this should probably be a macro rather than a function. */
void g_regex_reuse(GRegex * regex);

/* blow away a GRegex */
void g_regex_free(GRegex * regex);


/* match a regex, return true on match (the number of matches) */
gint g_regex_match(GRegex * regex, gchar * string, gchar * options);

/* match the next occurance of a regex in a string (Perl's m//g) */
gint g_regex_match_next(GRegex * regex, gchar * string, gchar * options);


/* match a pattern (not a compiled regex), return true on match (the
 * number of matches).  If the "o" option is used, the pattern string 
 * does not get recompiled.  */
gint g_regex_match_pattern(gchar * pattern, gchar * string, gchar * options);

/* match the next occurance of a pattern (not a compiled regex) in a
 * string (Perl's m//g).  If the "o" options is used, the pattern string
 * does not get recompiled.  */
gint g_regex_match_pattern_next(gchar * pattern, gchar * string, 
								gchar * options);


/* get a sub expression match */
gchar * g_regex_fetch(GRegex * regex, gchar * string, int match_num);

/* get all sub expression matches.  The first gchar* in the array or list is
 * the full match string, the remainder of the gchar*'s are any substrings'
 * from capturing parentheses in the regex.  */
GList  * g_regex_fetch_all(GRegex * regex, gchar * string);
gchar ** g_regex_fetch_all_array(GRegex * regex, gchar * string);
GSList * g_regex_fetch_all_gslist(GRegex * regex, gchar * string);


/* split the string into a list of pieces, if the pattern used
 * in the regex has capturing parens, then we'll also add the elements
 * for each matching substring in the delimiter. */
GList  * g_regex_split(gchar * pattern, gchar * string, 
					   gchar * options, gint max_pieces);
GSList * g_regex_split_gslist(gchar * pattern, gchar * string, 
					   gchar * options, gint max_pieces);

/* split the string into a list of pieces, getting the next piece.
 * Useful inside of a while loop. 
 * Because we don't want to keep having to recompile the string, we get
 * passed a GRegex pointer, not a gchar pointer. */
gchar * g_regex_split_next(GRegex * regex, gchar * string, gchar * options);


/* replace matching substrings in string with different strings.  Like
 * Perl's s/// operator. */
gchar * g_regex_replace(GRegex * regex, gchar * string, gchar * replacement);
gchar * g_regex_replace_e(GRegex * regex, gchar * string, 
                          gchar * (*eval)(gchar *) );


/* study the regular expression, in preparation for making lots of
 * comparisons with it. */
void g_regex_study(GRegex * regex);


/* return the integer equivalent of the option string, this should be
 * inlined. */
gint g_regex_options(gchar * options);



--
Scott Wimer
play  --->    scottw@cgibuilder.com         http://www.cgibuilder.com/
work  --->    scottw@corp.earthlink.net     http://www.earthlink.net/



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]