/* Copyright 2009, UCAR/Unidata and OPeNDAP, Inc. See the COPYRIGHT file for more information. */
struct Symbol Fields | ||
---|---|---|
struct Symbol* next | - | The Symbol objects are all kept on a single linked list. No symbol is ever deleted until the end of the program. |
nc_class objectclass | - | This defines the general class of symbol, one of: NC_GRP, NC_DIM, NC_VAR, NC_ATT, or NC_TYPE. |
nc_classsubclass | - | This defines the sub class of symbol, one of: NC_PRIM, NC_OPAQUE, NC_ENUM, NC_FIELD, NC_VLEN, NC_COMPOUND, NC_ECONST, NC_ARRAY, or NC_FILLVALUE. |
char*name | - | The symbol's name. |
struct Symbol* container | - | The symbol that is the container for this symbol. Typically, this the group symbol that contains this symbol. |
struct Symbol location | - | The current group that was open when this symbol was created. |
List* subnodes | - | The list of child symbols of this symbol. For example, a group symbol will have its dimensions, types, vars, and subgroups will be in this list. |
int is_prefixed | - | True if the name of this symbol contains a complete prefix path (e.g. /x/y/z). |
List* prefix | - | A list of the prefix names for this node. Note that if is_prefixed is false, then this list was constructed from the set of enclosing groups. |
struct Datalist* data | - | Stores the constants from attribute or datalist constructs. |
Typeinfo typ | - | Type information about this symbol as defined by the Typeinfo structure. |
Varinfo var | - | Variable information about a variable symbol as defined by the Varinfo structure. |
Attrinfo att | - | Attribute information about an attribute symbol as defined by the Attrinfo structure. |
Diminfo dim | - | Dimension information about a dimension symbol as defined by the Diminfo structure. |
Groupinfo grp | - | Group information about a group symbol as defined by the Groupinfo structure. |
int lineno | - | The source line in which this symbol was created. |
int touched | - | Used in transitive closure operations to prevent revisiting symbols. |
char* lname | - | Cached C or FORTRAN name (not used?). |
int ncid | - | The ncid/varid/dimid, etc when defining netcdf objects. |
struct Groupinfo Fields | ||
---|---|---|
int is_root | - | Is this the root group? |
struct Diminfo Fields | ||
---|---|---|
int isconstant | - | Is this an anonymous dimension? |
unsigned int size | - | The size of the dimension. |
struct Varinfo Fields | ||
---|---|---|
int nattributes | - | The number of attributes; this is redundant but useful. |
List* attributes | - | The list of all attribute symbols associated with this variable. |
Specialdata special | - | Special attribute values. |
struct Typeinfo Fields | ||
---|---|---|
struct Symbol* basetype | - | Provide a reference to the base type of this symbol. This applies to other types, variables, and attributes. |
int hasvlen | - | Does the type have a vlen definition anywhere within it. This is used as an optimization to avoid searching datalists for vlen constants. |
nc_type typecode | - | The typecode of the basetype. This is most useful when the basetype is a primitive type. |
unsigned long size | - | The size of this object. |
unsigned long offset | - | The field offset for fields in compound types. |
unsigned long alignment | - | The memory alignment (i.e. 1,2,4,or 8). |
Constant econst | - | For enumeration constants, the actual value of the constant. |
Dimset dimset | - | The dimension information for the type or variable. The dimset stores the number of dimensions and a list of pointers to the corresponding dimension symbols. |
struct Attrinfo Fields | ||
---|---|---|
struct Symbol* var | - | The variable with which this attribute is associated; it is NULL for global attributes. |
unsigned long count | - | The number of instances. |
Each datalist instance contains the following information.
struct Datalist Fields | ||
---|---|---|
struct Datalist* next | - | All datalists are chained for reclamation. |
int readonly | - | Can this datalist be modified? |
unsigned int length | - | The number of Constant instances in the data field. |
unsigned int alloc | - | The memory space allocated to the data field. |
Constant* data | - | The vector in sequential memory of the constants comprising this datalist. |
struct Symbol* schema | - | The symbol (type, variable, or attribute) defining the structure of this datalist. |
struct Vlen { | - | Information about the vlen instances contained in this datalist. |
unsigned int count; | ||
unsigned int uid; | ||
} vlen | ||
Odometer* dimdata | - | A tracker to count through dimensions associated with this datalist via the schema. |
In turn, a Constant instance is defined as follows.
typedef struct Constant { nc_type nctype; int lineno; Constvalue value; } Constant;It indicates the type of the value and the source line number in which this constant was created.
The ConstValue type is a union of all possible values that can occur in a datalist.
typedef union Constvalue { struct Datalist* compoundv; // NC_COMPOUND char charv; // NC_CHAR signed char int8v; // NC_BYTE unsigned char uint8v; // NC_UBYTE short int16v; // NC_SHORT unsigned short uint16v; // NC_USHORT int int32v; // NC_INT unsigned int uint32v; // NC_UINT long long int64v; // NC_INT64 unsigned long long uint64v; // NC_UINT64 float floatv; // NC_FLOAT double doublev; // NC_DOUBLE struct Stringv { // NC_STRING int len; char* stringv; } stringv; struct Opaquev { // NC_OPAQUE int len; // length as originally written (rounded to even number) char* stringv; //as constant was written // (padded to even # chars >= 16) // without leading 0x } opaquev; struct Symbol* enumv; // NC_ECONST } Constvalue;
Several fields are of particular interest:
Selected Constvalue Fields | ||
---|---|---|
struct Datalist* compoundv | - | This stores nested datalists - typically of the form "{...{...}...}". |
struct Stringv {int len; char* stringv;} stringv | - | Store string constants. |
struct Opaquev {int len; char* stringv;} opaquev | - | Store opaque constants as written (i.e. abc...), without the leading 0x, and padded to an even number of characters to be at least 16 characters long. |
struct Symbol* enumv | - | Pointer to an enumeration constant definition. |
typedef struct Datasrc { unsigned int index; // 0..length-1 unsigned int length; int autopop; // pop when at end Constant* data; // duplicate pointer; so do not free. struct Datasrc* stack; } Datasrc;The Datasrc tracks the "current" location in the sequence of Constants (taken from a Datalist). The index field indicates the current location. In effect, Datasrc is the lexer and the code that is walking it is in effect parsing the data sequence. The following operations are supported (see data.[ch]).
The node stacking set of cliches is ubiquitous in the parser, so they must be understood to understand how the parser works. The cliche here is shown in the handling of, for example, the varlist rule, which is defined as follows.
varlist: varspec {$$=listlength(stack); listpush(stack,(elem_t)$1);} | varlist ',' varspec {$$=$1; listpush(stack,(elem_t)$3);} ;The varlist rule collects variable name declarations (via the varspec rule). The idea is to use a separate stack named "stack", and tracking the index into the stack of the start of collection of objects. The varlist value (in the YACC sense) is defined as an integer representing the size of the stack at the start of a list of variables. That is what this code does:
$$=listlength(stack)
.
At the point where the set of varspecs should processed, the following code cliche is used.
vardecl: typeref varlist {... stackbase=$2; stacklen=listlength(stack); for(i=stackbase;i<stacklen;i++) { Symbol* sym = (Symbol*)listget(stack,i); ... } listsetlength(stack,stackbase);// remove stack nodes } ...The start of the set of variable declaration symbols is extracted as the integer associated with right-side non-terminal $2, e.g.
stackbase=$2
.
The current stack length is obtained from stacklen=listlength(stack)
.
Then the elements of the stack are extracted one by one using the above loop.
Finally, the nodes on the stack are cleared by the code segment
listsetlength(stack,stackbase)
.
The files genc.[ch] and cdata.c are the primary files for C code generation. The files data.[ch] is also important.
The output routines are as follows.
It has at its disposal several global lists of Symbols. Note that the lists cross all groups.
The superficial operation of gen_ncc is as follows; the details are provided later where the operation is complex.
The following code generates C code for defining the groups. It is fairly canonical and can be seen repeated in variant form when defining dimensions, types, variables, and attributes.
This code is redundant but for consistency, the root group ncid is stored like all other group ncids. Note that nprintf is a macro wrapper around snprint.
nprintf(stmt,sizeof(stmt)," %s = ncid;",groupncid(rootgroup)); cline(stmt);
The loop walks all group symbols in preorder form and generates C code call to nc_def_grp using parameters taken from the group Symbol instance (gsym). The call to nc_def_grp is succeeded by a call to the check_err procedure to verify the operation's result code.
for(igrp=0;igrpcontainer == NULL) PANIC("null container"); nprintf(stmt,sizeof(stmt), " stat = nc_def_grp(%s, \"%s\", &%s);", groupncid(gsym->container), gsym->name, groupncid(gsym)); cline(stmt); // print the def_grp call clined(1,"check_err(stat,__LINE__,__FILE__);"); } flushcode();
The code to generate dimensions, types, attributes, variables is similar, although often more complex.
The code to generate C equivalents of CDL types is in the procedure definectype(). Note that this code is not the code that invokes e.g. nc_def_vlen. The generated C types are used when generating datalists so that the standard C constant assignment mechanism will produce the correct memory values.
The genc_deftype procedure is the one that actually generates C code to define the netcdf types. The generated C code is designed to store the resulting typeid into the C variable defined earlier for holding that typeid.
Note that for compound types, the NC_COMPOUND_OFFSET macro is normally used to match netcdf offsets to the corresponding struct type generated in definectype. However, there is a flag, TESTALIGNMENT, that can be set to use a computed value for the offset.
For attributes, the general form generated is
T* attributevar = {...};Except for VLENs, the datalist is completely contained in the brackets, with bracket nesting as required. A generated pointer the attributevar is included in the generated call to nc_put_att().
For variables, the general form generated is similar to attributes.
T* varvar = {...};Again, VLENs are handled specially. Also, for performance purposes, the datalist is loaded in pieces using nc_put_vara(). This is required if there are UNLIMITED dimensions, but is used for all cases for uniformity.
typedef struct Putvar { int (*putvar)(struct Putvar*, Odometer*, Bytebuffer*); int rank; Bytebuffer* code; size_t startset[NC_MAX_DIMS]; struct CDF { int grpid; int varid; } cdf; struct C { Symbol* var; } c; } Putvar;An instance of the closure is created for each variable that is the target of nc_put_vara(). It is initialized with the variable's symbol, rank, group id and variable id. It is also provided with a Bytebuffer into which it is supposed to store the generated C code. The startset is the cached previous set of dimension indices used for generating the nc_put_vara (see below).
The callback procedure (field "putvar") for generating C code putvar is assigned to the procedure called cputvara() (defined in genc.c). This procedure takes as arguments the closure object, an odometer describing the current set of dimension indices, and a Bytebuffer containing the generated C constants to be assigned to this slice of the variable.
Every time the closure procedure is called, it generates a C variable to hold the generated C constant. It then generates an nc_put_vara() call. The start vector argument for the nc_put_vara is defined by the startset field of the closure. The count vector argument to nc_put_vara is computed from the current cached start vector and from the indices in the odometer. After the nc_put_vara() is generated, the odometer vector is assigned to the startset field in the closure for use on the next call.
There are some important assumptions about the state of the odometer when it is called.
In particular, this means that the start vector is zero for all positions except position zero. The count vector is positions, except zero is the index in the odometer, which is assumed to be the max.
For start position zero, the position is taken from the last saved startset. The count position zero is the difference between that last start position and the current odometer zeroth index.
If all of this sounds complex, it is, and if/when I have time I will rethink the whole process of datalist generation from beginning to end.
As an optimization, ncgen4 tracks which datatypes will require use of vlen constants. This is any type whose definition is a vlen or whose basetype contains a vlen type.
The vlen generation process is two-fold. First, in the procedure processdatalist1() in semantics.c, the location of the struct Datalist objects that correspond to vlen constants is stored in a list called vlenconstants. When detected, each such struct Datalist object is tagged with a unique identifier and the vlen length (count). These will be used later to generate references to the vlen constant.
The second vlen constant processing action is in the procedure genc_vlenconstant() in cdata.c First, it walks the vlenconstants list and generates C code for variables to define the vlen constant and C code to assign the vlen constant's data to that variable.
When, later, the genc_datalist procedure encounters a Datalist tagged as representing a data list, it can generate a nc_vlen_t constant as {<count>,<vlenconstantname>} and use it directly in the generated C datalist constant.
For better or worse, the code acts like a 1-lookahead parser. This means that it decides what to do based on the current type, the current constant and, when necessary, the next constant in the Datasrc. In practice, the lookahead is hidden, so it is not represented in the following table.
Current Type | Current Constant | action |
---|---|---|
NC_PRIM | Primitive Constant | Generate the C constant; convert as necessary. |
NC_OPAQUE | '' | '' |
NC_ENUM | '' | '' |
NC_ENUM | '' | '' |
NC_COMPOUND | Nested Datalist Constant | Push into the datalist and recurse on each field; When done, pop back to previous datalist. |
NC_COMPOUND | Any other Constant | Continue to recurse on each field; This allows specification of fields without enclosing in {...}. |
NC_VLEN | Nested Datalist Constant | Generate the nc_vlen_t instance using the tagged information in the struct Datalist. |
NC_FIELD | NA | If this field is dimensioned, then call genc_fielddata to walk the dimensions. Otherwise, just recurse on genc_datalist1. |
The genc_fielddata() procedure iterates over a field dimension and calls itself recursively to walk the remaining dimensions. It this is the last dimension, then it calls genc_datalist1 to generate C code for the basetype of the field.
The genc_vardata1() procedure, like genc_fielddata, iterates over a top-level dimension and calls itself recursively to iterate over the remaining dimensions. The term "top-level" refers to the fact that these are the dimensions specified for a variable as opposed to field dimensions.
When iterating an UNLIMITED dimension, or when iterating the first dimension, the code generates a datalist for this subslice and then calls the closure to generate the C code.
The pool mechanism wraps malloc and records the malloc'd memory in a circular buffer. When the buffer reaches its maximum size, previously allocated pool buffers are free'd. This is good in that the user does not have to litter code with free() statements. It is bad in that the pool allocated memory can be free'd too early if the memory does not have a short enough life. If you suspect the latter, then bump the size of the circular buffer and see if the problem goes away. If so, then your code is probably holding on to a pool buffer too long and should use regular malloc/free.
In the end, I am not sure if this is a good idea, but if does make the code simpler.
The canonical code for non-destructive walking of a List
Bytebuffer provides two ways to access its internal buffer of characters.
One is "bbContents()", which returns a direct pointer to the buffer,
and the other is "bbDup()", which returns a malloc'd string containing
the contents and is guaranteed to be null terminated.
Suppose we have the declaration
A particular point in the three dimensions, say [x][y][z], is reduced to
a number in the range 0..29 by computing
The Odometer type stores a set of dimensions
and supports operations to iterate over all possible
dimension combinations.
The definition of Odometer is defined by the types Odometer and Dimdata.
for(i=0;i<listlength(list);i++) {
T* element = (T*)listget(list,i);
...
}
Odometer: Multi-Dimensional Array Handling
The odometer data type is used to convert
multiple dimensions into a single integer.
The rule for converting a multi-dimensional
array to a single dimensions is as follows.
int F[2][5][3];
.
There are obviously a total of 2 X 5 X 3 = 30 integers in F.
Thus, these three dimensions will be reduced to a single dimension of size 30.
((x*5)+y)*3+z
.
The corresponding general C code is as follows.
size_t
dimmap(int rank, size_t* indices, size_t* sizes)
{
int i;
size_t count = 0;
for(i=0;i
In this code, the indices variable corresponds to the x,y, and z.
The sizes variable corresponds to the 2,5, and 3.
typedef struct Dimdata {
unsigned long datasize; // actual size of the datalist item
unsigned long index; // 0 <= index < datasize
unsigned long declsize;
} Dimdata;
typedef struct Odometer {
int rank;
Dimdata dims[NC_MAX_DIMS];
} Odometer;
The following primary operations are defined.
Change Log