golem.golem
index
/opt/local/lib/python2.4/site-packages/golem/golem.py

The Golem ontology parsing library.
 
This module contains the main class which parses Golem/CML dictionaries, 
as defined by the CML and  Golem schemata, and allows you to use them to 
extract and convert information found in CML datafiles.

 
Modules
       
copy
lxml.etree
new
os
simplejson
sys

 
Classes
       
__builtin__.dict(__builtin__.object)
Dictionary
__builtin__.object
Entry
GXpath
ImpOnlyEntry

 
class Dictionary(__builtin__.dict)
    Main class for representing CML/Golem dictionaries.
 
Example of usage:
    
>>> from StringIO import StringIO
>>> dictionarystring = """<?xml version="1.0"?>
... <dictionary 
...   namespace="http://www.materialsgrid.org/castep/dictionary"
...   dictionaryPrefix="castep" 
...   title="CASTEP Dictionary"
...   xmlns="http://www.xml-cml.org/schema"
...   xmlns:h="http://www.w3.org/1999/xhtml/"
...   xmlns:cml="http://www.xml-cml.org/schema"
...   xmlns:xsd="http://www.w3.org/2001/XMLSchema"
...   xmlns:golem="http://www.lexical.org.uk/golem"
...   xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
...   <entry id="xcFunctional" term="Exchange-Correlation Functional">
...     <annotation />
...     <definition>
...       The exchange-correlation functional used.
...     </definition>
...     <description>
...      <h:div class="dictDescription">
...         Available values for this are:
...         <h:ul>
...           <h:li>
...             <h:strong>LDA</h:strong>
...             , the Local Density Approximation
...           </h:li>
...           <h:li>
...             <h:strong>PW91</h:strong>
...             , Perdew and Wang's 1991 formulation
...           </h:li>
...           <h:li>
...             <h:strong>PBE</h:strong>
...             Perdew, Burke and Enzerhof's original GGA
...             functional
...           </h:li>
...           <h:li>
...             <h:strong>RPBE</h:strong>
...             , Hammer et al's revised PBE functional
...           </h:li>
...         </h:ul>
...       </h:div>
...     </description>
...     
...     <metadataList>
...       <metadata name="dc:author" content="golem-kiln" />
...     </metadataList>
...     <golem:xpath>/cml:cml/cml:parameterList[@dictRef="input"]/cml:parameter[@dictRef="castep:xcFunctional"]</golem:xpath>
...     <golem:template call="scalar" role="getvalue" binding="pygolem_serialization" />
...     <golem:template role="arb_to_input" binding="input" input="external">
...       <xsl:stylesheet version='1.0' 
...                       xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
...                       xmlns:cml='http://www.xml-cml.org/schema'>
...         <xsl:strip-space elements="*" />
...         <xsl:output method="text" />
...         <xsl:param name="p1" />
...         <xsl:template match="/">
...           <xsl:text>XC_FUNCTIONAL </xsl:text><xsl:value-of select="$p1" />      
...   </xsl:template>
...       </xsl:stylesheet>
...     </golem:template>
...     <golem:implements>convertibleToInput</golem:implements>
...     <golem:implements>value</golem:implements>
...     <golem:implements>absolute</golem:implements>
...     <golem:childOf>input</golem:childOf>
... 
...     <golem:possibleValues type="string">
...       <golem:enumeration>
...         <golem:value>LDA</golem:value>
...         <golem:value>PW91</golem:value>
...         <golem:value>PBE</golem:value>
...         <golem:value>RPBE</golem:value>
...         <golem:value>HF</golem:value>
...         <golem:value>SHF</golem:value>
...         <golem:value>EXX</golem:value>
...         <golem:value>SX</golem:value>
...         <golem:value>ZERO</golem:value>
...         <golem:value>HF-LDA</golem:value>
...         <golem:value>SHF-LDA</golem:value>
...         <golem:value>EXX-LDA</golem:value>
...         <golem:value>SX-LDA</golem:value>
...       </golem:enumeration>
...     </golem:possibleValues>
...   </entry>
... 
... <entry id="scalar" term="Scalar default call">
...     <annotation />
...     <definition />
...     <description />
...     <metadataList />
...     <golem:template role="getvalue" binding="pygolem_serialization">
...         <xsl:stylesheet version='1.0' 
...                 xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
...                 xmlns:cml='http://www.xml-cml.org/schema'
...                 xmlns:str="http://exslt.org/strings"
...                 xmlns:func="http://exslt.org/functions"
...                 xmlns:exsl="http://exslt.org/common"
...                 xmlns:tohw="http://www.uszla.me.uk/xsl/1.0/functions"
...                 extension-element-prefixes="func exsl tohw str"
...                 exclude-result-prefixes="exsl func tohw xsl str">
...         <xsl:output method="text" />
...   
...   
...   <func:function name="tohw:isAListOfDigits">
...     <!-- look only for [0-9]+ -->
...     <xsl:param name="x_"/>
...     <xsl:variable name="x" select="normalize-space($x_)"/>
...     <xsl:choose>
...       <xsl:when test="string-length($x)=0">
...         <func:result select="false()"/>
...       </xsl:when>
...       <xsl:when test="substring($x, 1, 1)='0' or
...                       substring($x, 1, 1)='1' or
...                       substring($x, 1, 1)='2' or
...                       substring($x, 1, 1)='3' or
...                       substring($x, 1, 1)='4' or
...                       substring($x, 1, 1)='5' or
...                       substring($x, 1, 1)='6' or
...                       substring($x, 1, 1)='7' or
...                       substring($x, 1, 1)='8' or
...                       substring($x, 1, 1)='9'">
...         <xsl:choose>
...           <xsl:when test="string-length($x)=1">
...             <func:result select="true()"/>
...           </xsl:when>
...           <xsl:otherwise>
...             <func:result select="tohw:isAListOfDigits(substring($x, 2))"/>
...           </xsl:otherwise>
...         </xsl:choose>
...       </xsl:when>
...       <xsl:otherwise>
...         <func:result select="false()"/>
...       </xsl:otherwise>
...     </xsl:choose>
...   </func:function>
... 
...   <func:function name="tohw:isAnInteger">
...     <!-- numbers fitting [\+-][0-9]+ -->
...     <xsl:param name="x_"/>
...     <xsl:variable name="x" select="normalize-space($x_)"/>
...     <xsl:variable name="try">
...       <xsl:choose>
...         <xsl:when test="starts-with($x, '+')">
...           <xsl:value-of select="substring($x,2)"/>
...         </xsl:when>
...         <xsl:when test="starts-with($x, '-')">
...           <xsl:value-of select="substring($x,2)"/>
...         </xsl:when>
...         <xsl:otherwise>
...           <xsl:value-of select="$x"/>
...         </xsl:otherwise>
...       </xsl:choose>
...     </xsl:variable>
...     <func:result select="tohw:isAListOfDigits($try)"/>
...   </func:function>
... 
...   <func:function name="tohw:isANumberWithoutExponent">
...     <!-- numbers fitting [\+-][0-9]+(\.[0-9]*) -->
...     <xsl:param name="x"/>
...     <xsl:choose>
...       <xsl:when test="contains($x, '.')">
...         <func:result select="tohw:isAnInteger(substring-before($x, '.')) and
...                              tohw:isAListOfDigits(substring-after($x, '.'))"/>
...       </xsl:when>
...       <xsl:otherwise>
...         <func:result select="tohw:isAnInteger($x)"/>
...       </xsl:otherwise>
...     </xsl:choose>
...   </func:function>
... 
...   <func:function name="tohw:isAnFPNumber">
...     <!-- Try and interpret a string as an exponential number -->
...     <!-- should only recognise strings of the form: [\+-][0-9]*\.[0-9]*([DdEe][+-][0-9]+)? -->
...     <xsl:param name="x"/>
...     <xsl:choose>
...       <xsl:when test="contains($x, 'd')">
...         <func:result select="tohw:isANumberWithoutExponent(substring-before($x, 'd')) and
...                              tohw:isAnInteger(substring-after($x, 'd'))"/>
...       </xsl:when>
...       <xsl:when test="contains($x, 'D')">
...         <func:result select="tohw:isANumberWithoutExponent(substring-before($x, 'D')) and
...                              tohw:isAnInteger(substring-after($x, 'D'))"/>
...       </xsl:when>
...       <xsl:when test="contains($x, 'e')">
...         <func:result select="tohw:isANumberWithoutExponent(substring-before($x, 'e')) and
...                              tohw:isAnInteger(substring-after($x, 'e'))"/>
...       </xsl:when>
...       <xsl:when test="contains($x, 'E')">
...         <func:result select="tohw:isANumberWithoutExponent(substring-before($x, 'E')) and
...                              tohw:isAnInteger(substring-after($x, 'E'))"/>
...       </xsl:when>
...       <xsl:otherwise>
...          <func:result select="tohw:isANumberWithoutExponent($x)"/>
...       </xsl:otherwise>
...     </xsl:choose>
...   </func:function>
...         
...   <xsl:template match="/">
...     <xsl:apply-templates />
...   </xsl:template>
...     
...   <xsl:template match="cml:scalar">
...     <xsl:variable name="value">
...       <xsl:choose>
...         <xsl:when test="tohw:isAnFPNumber(.)">
...           <xsl:value-of select="." />
...         </xsl:when>
...         <xsl:otherwise>
...           <xsl:text>"</xsl:text><xsl:value-of select="." /><xsl:text>"</xsl:text>
...         </xsl:otherwise>
...       </xsl:choose>
...     </xsl:variable>
...     <xsl:variable name="units">
...       <xsl:choose>
...         <xsl:when test="@units">
...           <xsl:text>"</xsl:text><xsl:value-of select="@units" /><xsl:text>"</xsl:text>
...         </xsl:when>
...         <xsl:otherwise>
...           <xsl:text>""</xsl:text>
...         </xsl:otherwise>
...       </xsl:choose>
...     </xsl:variable>
...     <xsl:text>[</xsl:text><xsl:value-of select="$value"/><xsl:text>,</xsl:text><xsl:value-of select="$units" /><xsl:text>]</xsl:text>
...   </xsl:template>
... </xsl:stylesheet>
...     </golem:template>
... 
...     <golem:template role="defaultoutput">
...       <xsl:stylesheet version='1.0' 
...                       xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
...                       xmlns:cml='http://www.xml-cml.org/schema'
...                       xmlns:str="http://exslt.org/strings"
...                       extension-element-prefixes="str"
...                       >
...         <xsl:output method="text" />
...         <xsl:param name="name" />
...         <xsl:param name="value" />
...         <xsl:template match="/">
...           <xsl:value-of select='$name' /><xsl:value-of select='$value' />
...         </xsl:template>
...       </xsl:stylesheet>
...     </golem:template>
...     <golem:seealso>gwtsystem</golem:seealso>
...   </entry>
... </dictionary>
... """
>>> d = Dictionary(StringIO(dictionarystring))
>>> xcf = d["{http://www.materialsgrid.org/castep/dictionary}xcFunctional"]
>>> cmlstr = """<?xml version="1.0" encoding="UTF-8"?>
... <?xml-stylesheet href="display.xsl" type="text/xsl"?>
... <cml convention="FoX_wcml-2.0" fileId="NaCl_00GPa.xml" version="2.4"
...   xmlns="http://www.xml-cml.org/schema"
...   xmlns:castep="http://www.materialsgrid.org/castep/dictionary"
...   xmlns:castepunits="http://www.materialsgrid.org/castep/units"
...   xmlns:cml="http://www.xml-cml.org/dict/cmlDict"
...   xmlns:xsd="http://www.w3.org/2001/XMLSchema"
...   xmlns:dc="http://purl.org/dc/elements/1.1/title"
...   xmlns:units="http://www.uszla.me.uk/FoX/units"
...   xmlns:atomicUnits="http://www.xml-cml.org/units/atomic">
...   <metadataList title="Autocaptured metadata">
...     <metadata name="dc:date" content="2007-02-09"/>
...   </metadataList>
...   <parameterList dictRef="input" convention="Input Parameters">
...     <parameter dictRef="castep:xcFunctional"
...       name="Exchange-Correlation Functional">
...       <scalar dataType="xsd:string">PBE</scalar>
...     </parameter>
...   </parameterList>
... </cml>
... """
>>> tree = etree.parse(StringIO(cmlstr))
>>> xcfd = xcf.findin(tree)
>>> print len(xcfd)
1
>>> xcval = xcf.getvalue(xcfd[0])
>>> print xcf.getvalue(xcfd[0])
PBE
>>> # units are not defined on XCFunctional, so:
>>> print xcval.unit
golem:undefined
>>> # by convention
>>> print xcval.entry.definition
<BLANKLINE>
      The exchange-correlation functional used.
<BLANKLINE>
 
 
Method resolution order:
Dictionary
__builtin__.dict
__builtin__.object

Methods defined here:
__init__(self, filename=None, asModel=False)
parsexml(self, filename, asModel=False)
Load and parse a CML dictionary.
serialize(self)
Serialize a dictionary back to XML.

Data and other attributes defined here:
__dict__ = <dictproxy object at 0x23bdf30>
dictionary for instance variables (if defined)
__weakref__ = <attribute '__weakref__' of 'Dictionary' objects>
list of weak references to the object (if defined)

Methods inherited from __builtin__.dict:
__cmp__(...)
x.__cmp__(y) <==> cmp(x,y)
__contains__(...)
D.__contains__(k) -> True if D has a key k, else False
__delitem__(...)
x.__delitem__(y) <==> del x[y]
__eq__(...)
x.__eq__(y) <==> x==y
__ge__(...)
x.__ge__(y) <==> x>=y
__getattribute__(...)
x.__getattribute__('name') <==> x.name
__getitem__(...)
x.__getitem__(y) <==> x[y]
__gt__(...)
x.__gt__(y) <==> x>y
__hash__(...)
x.__hash__() <==> hash(x)
__iter__(...)
x.__iter__() <==> iter(x)
__le__(...)
x.__le__(y) <==> x<=y
__len__(...)
x.__len__() <==> len(x)
__lt__(...)
x.__lt__(y) <==> x<y
__ne__(...)
x.__ne__(y) <==> x!=y
__repr__(...)
x.__repr__() <==> repr(x)
__setitem__(...)
x.__setitem__(i, y) <==> x[i]=y
clear(...)
D.clear() -> None.  Remove all items from D.
copy(...)
D.copy() -> a shallow copy of D
get(...)
D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.
has_key(...)
D.has_key(k) -> True if D has a key k, else False
items(...)
D.items() -> list of D's (key, value) pairs, as 2-tuples
iteritems(...)
D.iteritems() -> an iterator over the (key, value) items of D
iterkeys(...)
D.iterkeys() -> an iterator over the keys of D
itervalues(...)
D.itervalues() -> an iterator over the values of D
keys(...)
D.keys() -> list of D's keys
pop(...)
D.pop(k[,d]) -> v, remove specified key and return the corresponding value
If key is not found, d is returned if given, otherwise KeyError is raised
popitem(...)
D.popitem() -> (k, v), remove and return some (key, value) pair as a
2-tuple; but raise KeyError if D is empty
setdefault(...)
D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D
update(...)
D.update(E, **F) -> None.  Update D from E and F: for k in E: D[k] = E[k]
(if E has keys else: for (k, v) in E: D[k] = v) then: for k in F: D[k] = F[k]
values(...)
D.values() -> list of D's values

Data and other attributes inherited from __builtin__.dict:
__new__ = <built-in method __new__ of type object at 0x2df580>
T.__new__(S, ...) -> a new object with type S, a subtype of T
fromkeys = <built-in method fromkeys of type object at 0x6781d0>
dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.
v defaults to None.

 
class Entry(__builtin__.object)
    The Entry class represents an entry in a Golem/CML dictionary.
 
Entries have the following structure.
   <entry id="template" term="Template entry">
     <annotation>
       <appinfo><!-- CML-specific machine-processable information --></appinfo>
     </annotation>
     <definition>Human-readable one-liner definition</definition>
     <description>Substantial human-readable documentation</description>
     <metadataList><!-- Dublin Core semantics -->
       <metadata name="dc:creator" content="Test Author" />
     </metadataList>
     <golem:xpath></golem:xpath>
     <golem:template role="role" binding="binding"> <!-- and optionally "@input" -->
     </golem:template>
     <golem:possibleValues type="DATATYPE">
       <golem:range>
         <golem:minimum>1</golem:minimum>
         <golem:maximum>100</golem:maximum>
       </golem:range>
       <!-- or -->
       <golem:enumeration>
         <golem:value>1</golem:value>
         <golem:value>2</golem:value>
         <golem:value>3</golem:value>
       </golem:enumeration>
     </golem:possibleValues> <!-- or matrix ... -->
     <golem:implements>otherEntry</golem:implements> <!-- times n -->
     <golem:synonym>synonymousEntry</golem:synonym> <!-- times n -->
     <golem:seealso>similarEntry</golem:seealso> <!-- times n -->
     <golem:childOf>parentEntry</golem:childOf> <!-- times n -->
   </entry>
 
  Methods defined here:
__init__(self, d, xml=None, asModel=False)
boundscheck(self, arb, ctype='')
Check that a given piece of data is of the type, and lies within the bounds,
defined in this dictionary entry.
dcall(self, template, arb)
Internal method (you'll never call this directly); bounds-check a piece
of data and template it into an associated <golem:template> defined in
the dictionary. These are mapped onto Python methods named after the
name of the <golem:template>.
 
In other words, this is where entry.to_value calls come from.
findin(self, *trees)
Find instances of this dictionary entry in a given ElementTree
or set of ElementTrees. This version supplies *new*, rerooted
ElementTrees, not just the old ElementTrees with a pointer to 
the right context - use findin_context for that.
findin_context(self, *trees)
Find instances of this dictionary entry in a (set of) 
ElementTrees or filenames. Returns a set of nodes pointing into the
searched ElementTrees.
getAllImplementations(self)
Recursively identify and return all entries which <golem:implement> the
current class (and which are in currently-loaded dictionaries).
getChildren(self)
Recursively identify and return all entries which are <golem:children>
of the current concept - i.e. only ever appear as childNodes of the
(XML) node, or nodes, with which this dictionary entry is associated.
list_to_arbdict(self, l)
Map a matrix onto a dictionary for subsequent output using XSLT.
 
The algorithm used is:
i) Check that the matrix is of the correct shape and within bounds.
ii) From left to right row-wise from the upper left, number off the 
    matrix elements p1, p2, p3... pn (for an n-element matrix), and
    store these in a dictionary {"p1": p1, "p2": p2 ...}.
iii) Return the resulting dictionary.
matrix_boundscheck(self, l)
Check that the elements of a given matrix have the type, and lie within
the bounds, defined in the current dictionary entry.
matrix_coercelist(self, l)
Coerce a matrix into a list, left-to-right, top-to-bottom.
matrix_shapecheck(self, l)
Check that a given matrix is, or can be coerced, into the shape 
defined in this dictionary entry.
parsexml(self, x, d, asModel=False)
Load a dictionary entry from its XML representation. 
 
arguments: (etree for the entry, parent dictionary object).
 
Set asModel to true if you're using this dictionary as a model
for building a new one: it stashes way more of the native XML in that
case, allowing you to serialize it out directly into your new dictionary.
At present, this is only used by the dictionary generator
(bin/make_dictionary.py in your Golem distribution.)
serialize(self)
Write out this dictionary entry as XML.
with_predicate(self, predicate)
Set a predicate (condition) on a particular Entry instance.
 
This predicate will be honoured on subsequent calls to x.findin for
entry x; it takes the form of an XPath function.

Data and other attributes defined here:
__dict__ = <dictproxy object at 0x23bdc50>
dictionary for instance variables (if defined)
__weakref__ = <attribute '__weakref__' of 'Entry' objects>
list of weak references to the object (if defined)

 
class GXpath(__builtin__.object)
     Methods defined here:
__init__(self, xqnode)

Data and other attributes defined here:
__dict__ = <dictproxy object at 0x23bdfb0>
dictionary for instance variables (if defined)
__weakref__ = <attribute '__weakref__' of 'GXpath' objects>
list of weak references to the object (if defined)

 
class ImpOnlyEntry(__builtin__.object)
    Dictionary helper class: this is used to store information on entries
which have been pointed to (by, say, <golem:implements>), but which haven't
themselves been parsed yet.
 
  Methods defined here:
__init__(self)

Data and other attributes defined here:
__dict__ = <dictproxy object at 0x23bdf30>
dictionary for instance variables (if defined)
__weakref__ = <attribute '__weakref__' of 'ImpOnlyEntry' objects>
list of weak references to the object (if defined)

 
Functions
       
loadDictionary(filename)
Load a dictionary from a default location on the filesystem.
    
On Windows, this is C:\cmldictionaries\ and must be changed by 
editing golem.py by hand: on Unix, it defaults to ~/.cmldictionaries/
but can be overridden by setting the environment variable
CMLDICTIONARIES.
setDataWarning(val)
Set whether warnings will be emitted when unit/type-bearing data is modified.
 
Default is True.
setTypeWarning(val)
Set whether warnings will be emitted when a dictionary Entry without 
a defined type is used.
 
Default is True.

 
Data
        data_warning = True
type_warning = True
version = '0.9beta'