Top

core module

#! /usr/bin/env python
from jpype import *
import sys, os
import uuid
import random
import csv

"""
pyopg core functions
Copyright 2015, Oracle and/or its affiliates. All rights reserved.
"""
pyopg_cp = os.environ.get('OPG_CP', None)

#start the JVM
startJVM(getDefaultJVMPath(), "-ea", "-Djava.class.path="+pyopg_cp)

#define specific Java classes
HashMap = java.util.HashMap
HBaseBytes = JClass('org.apache.hadoop.hbase.util.Bytes')
pgxf = JClass('oracle.pgx.config.PgxConfig$Field')
loader = JClass('oracle.pg.nosql.OraclePropertyGraphUtils')
loader = JClass('oracle.pg.hbase.OraclePropertyGraphUtils')
bulkloader = JClass('oracle.pg.nosql.OraclePropertyGraphDataLoader')
hbulkloader = JClass('oracle.pg.hbase.OraclePropertyGraphDataLoader')
hbasekeyfilter = JClass('oracle.pg.hbase.OracleKeyFilter')
pstream = JClass('java.io.PrintStream')
pgx = JClass('oracle.pgx.api.Pgx')
analyst_class = JClass('oracle.pgx.api.Analyst')
BAOS = JClass('java.io.ByteArrayOutputStream')

#define Java packages
common = JPackage('oracle.pg.common')
nosql = JPackage('oracle.pg.nosql')
nosql_index = JPackage('oracle.pg.nosql.index')
hbase = JPackage('oracle.pg.hbase')
hbase_index = JPackage('oracle.pg.hbase.index')
text = JPackage('oracle.pg.text')
kv = JPackage('oracle.kv')
blueprints = JPackage('com.tinkerpop.blueprints')
pgx_config = JPackage('oracle.pgx.config')
pgx_types = JPackage('oracle.pgx.common.types')
pgx_control = JPackage('oracle.pgx.api')
fexp = JPackage('oracle.pgx.filter.expressions')
jutil = JPackage('java.util')
hadoop_conf = JPackage('org.apache.hadoop.conf')
hadoop_hbase = JPackage('org.apache.hadoop.hbase')
hbase_client = JPackage('org.apache.hadoop.hbase.client')
hbase_filter = JPackage('org.apache.hadoop.hbase.filter')

#from janalyst import *

#set PGX Configuration -- TODO: make this readable from a config file
## do we need to do this in PGX 1.1?
pgx_param_map = HashMap()
pgx_param_map.put("ENABLE_GM_COMPILER", False)
#thisPgxConfig = pgx_config.PgxConfig().init()#.parse(pgx_param_map, False, None)

opg = None #this is the context object

class OPGDALException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

class OPGAnalystException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

class OPGIterator:
    def __init__(self, jitter):
        self.jitter = jitter

    def __iter__(self):
        return self

    def next(self):
        if (self.jitter.hasNext() == 0):
            raise StopIteration
        return self.jitter.next()

""" OPG DAL Commands begin here
This is the base set of commands available when working with the pyopg layer.
"""

def _checkIDType(iid):
    """
    Checks to see if an id is convertable to Java PG types
    """
    if not isinstance(iid,int) and not isinstance(iid,long) and not isinstance(iid, java.lang.Long):
        raise OPGDALException("IDs must be ints or longs")
        return False
    return True

def _checkDBType():
    """
    Checks opg context to determine if ONDB or HBase is the underlying store
    """
    global opg
    opgclass = type(opg).__name__
    if opgclass == "oracle.pg.nosql.OraclePropertyGraph":
        return "nosql"
    elif opgclass == "oracle.pg.hbase.OraclePropertyGraph":
        return "hbase"

def connect(graph_name, store_name, hosts):
    if isinstance(hosts, list):
        kconfig = kv.KVStoreConfig(store_name, hosts)
    else:
        kconfig = kv.KVStoreConfig(store_name, [hosts])

    global opg
    opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name)
    return opg

def connectONDB(graph_name, store_name, hosts):
    if isinstance(hosts, list):
        kconfig = kv.KVStoreConfig(store_name, hosts)
    else:
        kconfig = kv.KVStoreConfig(store_name, [hosts])

    global opg
    opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name)
    return opg

def connectHBase(graph_name, zk_quorum, client_port="2181"):
    hbconf = hadoop_hbase.HBaseConfiguration.create();
    hbconf.set("hbase.zookeeper.quorum", zk_quorum)
    hbconf.set("hbase.zookeeper.property.clientPort", client_port)
    hbconf.set("hbase.client.scanner.timeout.period", "1000000")
    hbconf.set("zookeeper.session.timeout", "1000000")
    hconn = hbase_client.HConnectionManager.createConnection(hbconf)
    global opg
    opg = hbase.OraclePropertyGraph.getInstance(hbconf, hconn, graph_name)
    return opg


def getGraphName():
    """
    Returns the graph name.
    """
    global opg
    return opg.getGraphName()

def clearRepository():
    """
    This method removes all vertices and edges from this property graph instance.
    """
    global opg
    opg.clearRepository()

def dropAllIndicies():
    """
    Drops all existing indices
    """
    global opg
    opg.dropAllIndices()

def getEdges(*keys):
    """
    Gets all edges with a property corresponding to key
    """
    global opg
    return OPGIterator(opg.getEdges(keys).iterator())


def addVertex(vid):
    """
    Add a vertex with ID vid to the graph
    """
    global opg
    result = None
    if isinstance(vid, int):
        result = opg.addVertex(long(vid))
    elif isinstance(vid, long):
        result = opg.addVertex(vid)
    else:
        raise OPGDALException("IDs must be ints or longs")
    return result

def addVertexByName(name):
    """
    Add a new vertex to the graph with a property 'name'
    """
    global opg
    result = opg.addVertex(random.getrandbits(32))
    result.setProperty("name", name, True)
    return result

def getVertexByName(name):
    """
    Retrieve a vertex by the property 'name'
    """
    global opg
    try:
        result = OPGIterator(opg.getVertices("name", name).iterator()).next()
        return result
    except:
        return None

def setVertexProperty(vid, label, value):
    """
    Sets a property on the vertex with ID vid.  Note that
    vertices can have properties added directly to them, this
    is a convenience method.
    """
    global opg
    _checkIDType(vid)
    v = opg.getVertex(long(vid))
    v.setProperty(label, value, True)

def addEdge(eid, out_vid, in_vid, label):
    """
    Add a labeled edge to the graph.
    """
    global opg
    for i in [eid, out_vid, in_vid]:
        _checkIDType(i)
    outv = opg.getVertex(out_vid)
    inv = opg.getVertex(in_vid)
    result = opg.addEdge(eid, outv, inv, label)
    return result

def addEdgeByName(ename, out_vname, in_vname):
    """
    Add an edge to the graph by name.
    Takes names for the edge, out vertex and in vertex as input.
    """
    global opg
    outv = getVertexByName(out_vname)
    inv = getVertexByName(in_vname)
    if not outv:
        outv = addVertexByName(out_vname)
    if not inv:
        inv = addVertexByName(in_vname)
    outvid = long(str(outv.getId()))
    invid = long(str(inv.getId()))
    e = addEdge(random.getrandbits(32),outvid, invid, ename)
    return e

def getEdgeByEndpoints(edge_label, outv_label, inv_label):
    """
    Fetch an edge by its out and in-vertex names.
    """
    global opg
    outv = getVertexByName(outv_label)
    inv = getVertexByName(inv_label)
    try:
        #dumb hack to deal with Java method signature
        oedges = OPGIterator(outv.getEdges(blueprints.Direction.OUT, [edge_label]).iterator())
        for e in oedges:
            if inv == e.getInVertex():
                return e
    except AttributeError:
        return None
    return None

def setEdgeProperty(eid, label, value):
    """
    Convenience method for adding properties to edges.
    """
    global opg
    _checkIDType(eid)
    e = opg.getEdge(long(eid))
    e.setProperty(label, value, True)

def getAllVertexPropertyKeys(getTypes=False, sample=0.1):
    """
    Return the set of all property keys defined on vertices.
    """
    global opg
    props = set()
    if not getTypes:
        for v in OPGIterator(opg.getVertices().iterator()):
            props = props.union(v.getPropertyKeys())
    else:
        for v in OPGIterator(opg.getVertices().iterator()):
            rn = random.random()
            if rn <= sample:
                vkeys = list(OPGIterator(v.getPropertyKeys().iterator()))
                vtypes = [type(v.getProperty(k)).__name__ for k in vkeys]
                props = props.union(zip(vkeys, vtypes))
    return props

def getAllEdgePropertyKeys(getTypes=False, sample=0.1):
    """
    Return the set of all property keys defined on edges.
    Passing getTypes as True returns a set of tuples with the key
    and the value type.
    """
    global opg
    props = set()
    if not getTypes:
        for e in OPGIterator(opg.getEdges().iterator()):
            props = props.union(e.getPropertyKeys())
    else:
        for e in OPGIterator(opg.getEdges().iterator()):
            rn = random.random()
            if rn <= sample:
                ekeys = list(OPGIterator(e.getPropertyKeys().iterator()))
                etypes = [type(e.getProperty(k)).__name__ for k in ekeys]
                props = props.union(zip(ekeys, etypes))
    return props

def getVertexDict(vid):
    """
    Get the properties of a vertex as a dict.
    """
    global opg
    d = {}
    _checkIDType(vid)
    v = opg.getVertex(vid)
    for k in v.getPropertyKeys():
        d[k] = v.getProperty(k)
    return d

def getEdgeDict(eid):
    """
    Get the properties of a vertex as a dict.
    """
    global opg
    d = {}
    _checkIDType(eid)
    e = opg.getEdge(long(eid))
    for k in v.getPropertyKeys():
        d[k] = v.getProperty(k)
    return d

def getVertices(*keys):
    """
    Gets all vertices with a property corresponding to one of the keys
    """
    global opg
    return OPGIterator(opg.getVertices(keys).iterator())

def getVerticesByValue(key, value, wildcard=False):
    """
    Return an iterable to all the vertices in the graph that have a particular key/value property
    """
    global opg
    return OPGIterator(opg.getVertices(key, value, wildcard).iterator())

def V(vid):
    """
    Shorthand to fetch a vertex by id
    """
    global opg
    return opg.getVertex(vid)

def E(eid):
    """
    Shorthand to fetch an edge by id
    """
    global opg
    return opg.getEdge(eid)

def countV(*keys):
    """
    Return the vertex count for the graph.
    """
    global opg
    cv = 0
    for v in OPGIterator(opg.getVertices(keys).iterator()):
        cv += 1
    return cv

def countE(*keys):
    """
    Return the edge count for the graph.
    """
    global opg
    ce = 0
    for e in OPGIterator(opg.getEdges(keys).iterator()):
        ce += 1
    return ce

def createVertexIndex(*index_keys):
    """
    Create automatic indices on all keys in index keys.
    """
    global opg
    indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index");
    opg.setDefaultIndexParameters(indexParams)
    opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Vertex"))

def createEdgeIndex(*index_keys):
    """
    Create automatic indices on all keys in index keys.
    """
    global opg
    indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index");
    opg.setDefaultIndexParameters(indexParams)
    opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Edge"))

def searchVertexIndex(key, term):
    """
    Search the index of a given key for the term.
    """
    global opg
    return OPGIterator(opg.getVertices(key, term, True).iterator())

def searchEdgeIndex(key, term):
    """
    Search the index of a given key for the term.
    """
    global opg
    return OPGIterator(opg.getEdges(key, term, True).iterator())

def dropAllIndices():
    """
    Drops all indices on the graph.
    """
    global opg
    opg.dropAllIndices()

def dropVertexIndex(index_key):
    """
    Drops the specified index.
    """
    global opg
    opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Vertex"))

def dropEdgeIndex(index_key):
    """
    Drops the specified index.
    """
    global opg
    opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Edge"))

def importGML(filename):
    """
    Load a GML file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGML(opg, filename, pstream("/dev/null"))

def importGraphSON(filename):
    """
    Load a GraphSON file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGraphSON(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGraphSON(opg, filename, pstream("/dev/null"))

def importGraphML(filename):
    """
    Load a GraphML file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGraphML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGraphML(opg, filename, pstream("/dev/null"))

def exportGML(filename):
    """
    Write the current graph as a GML file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGML(opg, filename, pstream("/dev/null"))

def exportGraphSON(filename):
    """
    Write the current graph as a GraphSON file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGraphSON(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGraphSON(opg, filename, pstream("/dev/null"))

def exportGraphML(filename):
    """
    Write the current graph as a GraphML file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGraphML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGraphML(opg, filename, pstream("/dev/null"))

def exportFlatFiles(v_filename, e_filename):
    """
    Write the current graph as vertex and edge files optimized for bulk loading
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportFlatFiles(opg, v_filename, e_filename, False)
    elif dbtype == "hbase":
        hloader.exportFlatFiles(opg, v_filename, e_filename, False)

def importFlatFiles(v_filename, e_filename, dop=4):
    """
    Bulk load a graph using vertex and edge files
    dop controls the number of JVM threads used for loading
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        bulkload = bulkloader.getInstance()
        bulkload.loadData(opg, v_filename, e_filename, dop)
    elif dbtype == "hbase":
        hload = hbulkloader.getInstance()
        hload.loadData(opg, v_filename, e_filename, dop)

def unparseMetric(c):
    """
    Simple function for mapping iterators of
    Java Long IDs and metrics into Vertices and
    python floats.
    """
    global opg
    cid = c.getKey()
    metric = c.getValue()
    return opg.getVertex(cid),float(metric.toString())

def analyst_config(props = []):
    """
    Create an in-memory analyst configuration on this graph for
    complex analyses
    """
    global opg

    vertexProps = getAllVertexPropertyKeys(True)
    
    edgeProps = getAllEdgePropertyKeys(True)

    dbtype = _checkDBType()
    analyst_cfg = None
    if dbtype == "nosql":
        kvcfg = opg.kVStoreConfig
        analyst_cfg = pgx_config.PgNosqlGraphConfigBuilder().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName)
        #analyst_cfg = pgx_config.GraphConfigBuilder.forNosql().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName)
    elif dbtype == "hbase":
        quorum = opg.getConfiguration().get("hbase.zookeeper.quorum")
        clientPort = int(opg.getConfiguration().get("hbase.zookeeper.property.clientPort"))
        analyst_cfg = pgx_config.PgHbaseGraphConfigBuilder().setName(opg.getGraphName()).setZkQuorum(quorum).setZkClientPort(clientPort).setSplitsPerRegion(opg.numSplitsPerRegion).setZkSessionTimeout(1000000)
    for vp in vertexProps:
        ptype = pgx_types.PropertyType.STRING
        defaultval = vp[0]
        if vp[0] in props:
            if vp[1] == "java.lang.Integer":
                ptype = pgx_types.PropertyType.INTEGER
                defaultval = 0
            elif vp[1] == "java.lang.Double":
                ptype = pgx_types.PropertyType.DOUBLE
                defaultval = 1.0
            elif vp[1] == "java.lang.Float":
                ptype = pgx_types.PropertyType.FLOAT
                defaultval = 1.0
            elif vp[1] == "java.lang.Long":
                ptype = pgx_types.PropertyType.LONG
                defaultval = 0L
            analyst_cfg.addNodeProperty(vp[0], ptype, defaultval)

    for ep in edgeProps:
        ptype = pgx_types.PropertyType.STRING
        defaultval = ep[0]
        if ep[0] in props:
            if ep[1] == "java.lang.Integer":
                ptype = pgx_types.PropertyType.INTEGER
                defaultval = 0
            elif ep[1] == "java.lang.Double":
                ptype = pgx_types.PropertyType.DOUBLE
                defaultval = 1.0
            elif ep[1] == "java.lang.Float":
                ptype = pgx_types.PropertyType.FLOAT
                defaultval = 0.0
            elif ep[1] == "java.lang.Long":
                ptype = pgx_types.PropertyType.LONG
                defaultval = 0L

            analyst_cfg.addEdgeProperty(ep[0], ptype, defaultval)

    analyst_cfg = analyst_cfg.build()
    if dbtype == "nosql":
        opg = nosql.OraclePropertyGraph.getInstance(analyst_cfg)
    elif dbtype == "hbase":
        opg = hbase.OraclePropertyGraph.getInstance(analyst_cfg)
    return opg

def analyst(url=None, sessionLabel=None, *properties):
    """
    Creates an in-memory analyst for the graph.
    Properties loaded into the analyst must be named:
    e.g. a = analyst("label", "weight")
    """
    global opg
    opg = analyst_config(properties)
    instance = None
    if url:
        instance = pgx.getInstance(url)
    else:
        instance = pgx.getInstance()
    
    if url == None:
        instance.startEngine()
    session = None
    if sessionLabel:
        session = instance.createSession(sessionLabel)
    else:
        session = instance.createSession("my-session")

    pgxGraph = session.readGraphWithProperties(opg.getConfig())
    a = session.createAnalyst()
    a = Janalyst(pgxGraph,a)
    return a    

def subgraph_from_filter(analyst_obj, filter_exp):
    """
    Create a subgraph on this grapha analyst using a
    PGX filter expression
    """
    subgraph = a.graph.filter(filter_exp)
    return Janalyst(subgraph, a.analyst_context) #Janalyst(new_ac)

def saveAnalyst(analyst_obj, filename, overwrite=True):
    """
    Save the current analyst graph as a GraphML file
    """
    core = pgx.getCore()
    ac = analyst_obj.analyst_context
    core.storeGraphWithProperties(ac.getSessionId(), ac.getGraphName(), pgx_config.Format.GRAPHML, filename, overwrite)


def shutdown():
    """
    Shutdown PGX
    """
    global opg
    if opg is not None:
        opg.shutdown()
    #shutdownJVM()


if __name__ == "__main__":
    if len(sys.argv) > 1:
        if sys.argv[1] == "test" or sys.argv[1] == "testclean":
            print "entering test"
            connectONDB("marvel", "kvstore", ["localhost:5000"])
            #connectHBase("marvel", "localhost")
            if sys.argv[1] == "testclean":
                print "entering testclean"
                dropVertexIndex("name")
                dropEdgeIndex("appearedWith")
                clearRepository()
                marvel_1 = csv.reader(open("hero-network.csv"), delimiter=',', quotechar='"')
                for hero in marvel_1:
                    hedge = getEdgeByEndpoints("appearedWith", hero[0], hero[1])
                    if hedge == None:
                        hedge = addEdgeByName("appearedWith", hero[0], hero[1])
                        hedge.setProperty("weight", 1.0)
                    else:
                        weight = hedge.getProperty("weight")
                        weight = weight.floatValue() + 1.0
                        hedge.setProperty("weight", weight)
                    opg.commit()
                createVertexIndex("name")
                createEdgeIndex("appearedWith")
            print "edges", countE()
            print "searching index"
            for v in searchVertexIndex("name", "IRON*"):
                print v
            a = analyst(None, "marvel-analysis", "name", "weight")
            #example against a jetty-hosted PGX instance
            #a = analyst("http://scott:tiger@localhost:8080/pgx", "marvel-analysis", "name", "weight")
            print a.countTriangles(), "triangles"
            print "in degree centrality"
            ind =  a.inDegreeCentrality().topK(10)
            for ddist in ind:
                v = opg.getVertex(ddist.getKey().getId())
                print v.getProperty("name"), ddist.getValue()
            print "10 largest PR values"
            for tk in a.pagerank().getTopKValues(10):
                v = opg.getVertex(tk.getKey().getId())
                print v.getProperty("name"), tk.getValue()
            print "triangles before subgraph", a.countTriangles()
            a = subgraph_from_filter(a, fexp.EdgeFilter("edge.weight > 1.0"))
            print "triangles after subgraph", a.countTriangles()
            shutdown()

Module variables

var KEYWORDS

var opg

var pyopg_cp

Functions

def E(

eid)

Shorthand to fetch an edge by id

def E(eid):
    """
    Shorthand to fetch an edge by id
    """
    global opg
    return opg.getEdge(eid)

def V(

vid)

Shorthand to fetch a vertex by id

def V(vid):
    """
    Shorthand to fetch a vertex by id
    """
    global opg
    return opg.getVertex(vid)

def addEdge(

eid, out_vid, in_vid, label)

Add a labeled edge to the graph.

def addEdge(eid, out_vid, in_vid, label):
    """
    Add a labeled edge to the graph.
    """
    global opg
    for i in [eid, out_vid, in_vid]:
        _checkIDType(i)
    outv = opg.getVertex(out_vid)
    inv = opg.getVertex(in_vid)
    result = opg.addEdge(eid, outv, inv, label)
    return result

def addEdgeByName(

ename, out_vname, in_vname)

Add an edge to the graph by name. Takes names for the edge, out vertex and in vertex as input.

def addEdgeByName(ename, out_vname, in_vname):
    """
    Add an edge to the graph by name.
    Takes names for the edge, out vertex and in vertex as input.
    """
    global opg
    outv = getVertexByName(out_vname)
    inv = getVertexByName(in_vname)
    if not outv:
        outv = addVertexByName(out_vname)
    if not inv:
        inv = addVertexByName(in_vname)
    outvid = long(str(outv.getId()))
    invid = long(str(inv.getId()))
    e = addEdge(random.getrandbits(32),outvid, invid, ename)
    return e

def addVertex(

vid)

Add a vertex with ID vid to the graph

def addVertex(vid):
    """
    Add a vertex with ID vid to the graph
    """
    global opg
    result = None
    if isinstance(vid, int):
        result = opg.addVertex(long(vid))
    elif isinstance(vid, long):
        result = opg.addVertex(vid)
    else:
        raise OPGDALException("IDs must be ints or longs")
    return result

def addVertexByName(

name)

Add a new vertex to the graph with a property 'name'

def addVertexByName(name):
    """
    Add a new vertex to the graph with a property 'name'
    """
    global opg
    result = opg.addVertex(random.getrandbits(32))
    result.setProperty("name", name, True)
    return result

def analyst(

url=None, sessionLabel=None, *properties)

Creates an in-memory analyst for the graph. Properties loaded into the analyst must be named: e.g. a = analyst("label", "weight")

def analyst(url=None, sessionLabel=None, *properties):
    """
    Creates an in-memory analyst for the graph.
    Properties loaded into the analyst must be named:
    e.g. a = analyst("label", "weight")
    """
    global opg
    opg = analyst_config(properties)
    instance = None
    if url:
        instance = pgx.getInstance(url)
    else:
        instance = pgx.getInstance()
    
    if url == None:
        instance.startEngine()
    session = None
    if sessionLabel:
        session = instance.createSession(sessionLabel)
    else:
        session = instance.createSession("my-session")

    pgxGraph = session.readGraphWithProperties(opg.getConfig())
    a = session.createAnalyst()
    a = Janalyst(pgxGraph,a)
    return a    

def analyst_config(

props=[])

Create an in-memory analyst configuration on this graph for complex analyses

def analyst_config(props = []):
    """
    Create an in-memory analyst configuration on this graph for
    complex analyses
    """
    global opg

    vertexProps = getAllVertexPropertyKeys(True)
    
    edgeProps = getAllEdgePropertyKeys(True)

    dbtype = _checkDBType()
    analyst_cfg = None
    if dbtype == "nosql":
        kvcfg = opg.kVStoreConfig
        analyst_cfg = pgx_config.PgNosqlGraphConfigBuilder().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName)
        #analyst_cfg = pgx_config.GraphConfigBuilder.forNosql().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName)
    elif dbtype == "hbase":
        quorum = opg.getConfiguration().get("hbase.zookeeper.quorum")
        clientPort = int(opg.getConfiguration().get("hbase.zookeeper.property.clientPort"))
        analyst_cfg = pgx_config.PgHbaseGraphConfigBuilder().setName(opg.getGraphName()).setZkQuorum(quorum).setZkClientPort(clientPort).setSplitsPerRegion(opg.numSplitsPerRegion).setZkSessionTimeout(1000000)
    for vp in vertexProps:
        ptype = pgx_types.PropertyType.STRING
        defaultval = vp[0]
        if vp[0] in props:
            if vp[1] == "java.lang.Integer":
                ptype = pgx_types.PropertyType.INTEGER
                defaultval = 0
            elif vp[1] == "java.lang.Double":
                ptype = pgx_types.PropertyType.DOUBLE
                defaultval = 1.0
            elif vp[1] == "java.lang.Float":
                ptype = pgx_types.PropertyType.FLOAT
                defaultval = 1.0
            elif vp[1] == "java.lang.Long":
                ptype = pgx_types.PropertyType.LONG
                defaultval = 0L
            analyst_cfg.addNodeProperty(vp[0], ptype, defaultval)

    for ep in edgeProps:
        ptype = pgx_types.PropertyType.STRING
        defaultval = ep[0]
        if ep[0] in props:
            if ep[1] == "java.lang.Integer":
                ptype = pgx_types.PropertyType.INTEGER
                defaultval = 0
            elif ep[1] == "java.lang.Double":
                ptype = pgx_types.PropertyType.DOUBLE
                defaultval = 1.0
            elif ep[1] == "java.lang.Float":
                ptype = pgx_types.PropertyType.FLOAT
                defaultval = 0.0
            elif ep[1] == "java.lang.Long":
                ptype = pgx_types.PropertyType.LONG
                defaultval = 0L

            analyst_cfg.addEdgeProperty(ep[0], ptype, defaultval)

    analyst_cfg = analyst_cfg.build()
    if dbtype == "nosql":
        opg = nosql.OraclePropertyGraph.getInstance(analyst_cfg)
    elif dbtype == "hbase":
        opg = hbase.OraclePropertyGraph.getInstance(analyst_cfg)
    return opg

def clearRepository(

)

This method removes all vertices and edges from this property graph instance.

def clearRepository():
    """
    This method removes all vertices and edges from this property graph instance.
    """
    global opg
    opg.clearRepository()

def connect(

graph_name, store_name, hosts)

def connect(graph_name, store_name, hosts):
    if isinstance(hosts, list):
        kconfig = kv.KVStoreConfig(store_name, hosts)
    else:
        kconfig = kv.KVStoreConfig(store_name, [hosts])

    global opg
    opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name)
    return opg

def connectHBase(

graph_name, zk_quorum, client_port='2181')

def connectHBase(graph_name, zk_quorum, client_port="2181"):
    hbconf = hadoop_hbase.HBaseConfiguration.create();
    hbconf.set("hbase.zookeeper.quorum", zk_quorum)
    hbconf.set("hbase.zookeeper.property.clientPort", client_port)
    hbconf.set("hbase.client.scanner.timeout.period", "1000000")
    hbconf.set("zookeeper.session.timeout", "1000000")
    hconn = hbase_client.HConnectionManager.createConnection(hbconf)
    global opg
    opg = hbase.OraclePropertyGraph.getInstance(hbconf, hconn, graph_name)
    return opg

def connectONDB(

graph_name, store_name, hosts)

def connectONDB(graph_name, store_name, hosts):
    if isinstance(hosts, list):
        kconfig = kv.KVStoreConfig(store_name, hosts)
    else:
        kconfig = kv.KVStoreConfig(store_name, [hosts])

    global opg
    opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name)
    return opg

def countE(

*keys)

Return the edge count for the graph.

def countE(*keys):
    """
    Return the edge count for the graph.
    """
    global opg
    ce = 0
    for e in OPGIterator(opg.getEdges(keys).iterator()):
        ce += 1
    return ce

def countV(

*keys)

Return the vertex count for the graph.

def countV(*keys):
    """
    Return the vertex count for the graph.
    """
    global opg
    cv = 0
    for v in OPGIterator(opg.getVertices(keys).iterator()):
        cv += 1
    return cv

def createEdgeIndex(

*index_keys)

Create automatic indices on all keys in index keys.

def createEdgeIndex(*index_keys):
    """
    Create automatic indices on all keys in index keys.
    """
    global opg
    indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index");
    opg.setDefaultIndexParameters(indexParams)
    opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Edge"))

def createVertexIndex(

*index_keys)

Create automatic indices on all keys in index keys.

def createVertexIndex(*index_keys):
    """
    Create automatic indices on all keys in index keys.
    """
    global opg
    indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index");
    opg.setDefaultIndexParameters(indexParams)
    opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Vertex"))

def dropAllIndices(

)

Drops all indices on the graph.

def dropAllIndices():
    """
    Drops all indices on the graph.
    """
    global opg
    opg.dropAllIndices()

def dropAllIndicies(

)

Drops all existing indices

def dropAllIndicies():
    """
    Drops all existing indices
    """
    global opg
    opg.dropAllIndices()

def dropEdgeIndex(

index_key)

Drops the specified index.

def dropEdgeIndex(index_key):
    """
    Drops the specified index.
    """
    global opg
    opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Edge"))

def dropVertexIndex(

index_key)

Drops the specified index.

def dropVertexIndex(index_key):
    """
    Drops the specified index.
    """
    global opg
    opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Vertex"))

def exportFlatFiles(

v_filename, e_filename)

Write the current graph as vertex and edge files optimized for bulk loading

def exportFlatFiles(v_filename, e_filename):
    """
    Write the current graph as vertex and edge files optimized for bulk loading
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportFlatFiles(opg, v_filename, e_filename, False)
    elif dbtype == "hbase":
        hloader.exportFlatFiles(opg, v_filename, e_filename, False)

def exportGML(

filename)

Write the current graph as a GML file

def exportGML(filename):
    """
    Write the current graph as a GML file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGML(opg, filename, pstream("/dev/null"))

def exportGraphML(

filename)

Write the current graph as a GraphML file

def exportGraphML(filename):
    """
    Write the current graph as a GraphML file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGraphML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGraphML(opg, filename, pstream("/dev/null"))

def exportGraphSON(

filename)

Write the current graph as a GraphSON file

def exportGraphSON(filename):
    """
    Write the current graph as a GraphSON file
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.exportGraphSON(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.exportGraphSON(opg, filename, pstream("/dev/null"))

def getAllEdgePropertyKeys(

getTypes=False, sample=0.1)

Return the set of all property keys defined on edges. Passing getTypes as True returns a set of tuples with the key and the value type.

def getAllEdgePropertyKeys(getTypes=False, sample=0.1):
    """
    Return the set of all property keys defined on edges.
    Passing getTypes as True returns a set of tuples with the key
    and the value type.
    """
    global opg
    props = set()
    if not getTypes:
        for e in OPGIterator(opg.getEdges().iterator()):
            props = props.union(e.getPropertyKeys())
    else:
        for e in OPGIterator(opg.getEdges().iterator()):
            rn = random.random()
            if rn <= sample:
                ekeys = list(OPGIterator(e.getPropertyKeys().iterator()))
                etypes = [type(e.getProperty(k)).__name__ for k in ekeys]
                props = props.union(zip(ekeys, etypes))
    return props

def getAllVertexPropertyKeys(

getTypes=False, sample=0.1)

Return the set of all property keys defined on vertices.

def getAllVertexPropertyKeys(getTypes=False, sample=0.1):
    """
    Return the set of all property keys defined on vertices.
    """
    global opg
    props = set()
    if not getTypes:
        for v in OPGIterator(opg.getVertices().iterator()):
            props = props.union(v.getPropertyKeys())
    else:
        for v in OPGIterator(opg.getVertices().iterator()):
            rn = random.random()
            if rn <= sample:
                vkeys = list(OPGIterator(v.getPropertyKeys().iterator()))
                vtypes = [type(v.getProperty(k)).__name__ for k in vkeys]
                props = props.union(zip(vkeys, vtypes))
    return props

def getEdgeByEndpoints(

edge_label, outv_label, inv_label)

Fetch an edge by its out and in-vertex names.

def getEdgeByEndpoints(edge_label, outv_label, inv_label):
    """
    Fetch an edge by its out and in-vertex names.
    """
    global opg
    outv = getVertexByName(outv_label)
    inv = getVertexByName(inv_label)
    try:
        #dumb hack to deal with Java method signature
        oedges = OPGIterator(outv.getEdges(blueprints.Direction.OUT, [edge_label]).iterator())
        for e in oedges:
            if inv == e.getInVertex():
                return e
    except AttributeError:
        return None
    return None

def getEdgeDict(

eid)

Get the properties of a vertex as a dict.

def getEdgeDict(eid):
    """
    Get the properties of a vertex as a dict.
    """
    global opg
    d = {}
    _checkIDType(eid)
    e = opg.getEdge(long(eid))
    for k in v.getPropertyKeys():
        d[k] = v.getProperty(k)
    return d

def getEdges(

*keys)

Gets all edges with a property corresponding to key

def getEdges(*keys):
    """
    Gets all edges with a property corresponding to key
    """
    global opg
    return OPGIterator(opg.getEdges(keys).iterator())

def getGraphName(

)

Returns the graph name.

def getGraphName():
    """
    Returns the graph name.
    """
    global opg
    return opg.getGraphName()

def getVertexByName(

name)

Retrieve a vertex by the property 'name'

def getVertexByName(name):
    """
    Retrieve a vertex by the property 'name'
    """
    global opg
    try:
        result = OPGIterator(opg.getVertices("name", name).iterator()).next()
        return result
    except:
        return None

def getVertexDict(

vid)

Get the properties of a vertex as a dict.

def getVertexDict(vid):
    """
    Get the properties of a vertex as a dict.
    """
    global opg
    d = {}
    _checkIDType(vid)
    v = opg.getVertex(vid)
    for k in v.getPropertyKeys():
        d[k] = v.getProperty(k)
    return d

def getVertices(

*keys)

Gets all vertices with a property corresponding to one of the keys

def getVertices(*keys):
    """
    Gets all vertices with a property corresponding to one of the keys
    """
    global opg
    return OPGIterator(opg.getVertices(keys).iterator())

def getVerticesByValue(

key, value, wildcard=False)

Return an iterable to all the vertices in the graph that have a particular key/value property

def getVerticesByValue(key, value, wildcard=False):
    """
    Return an iterable to all the vertices in the graph that have a particular key/value property
    """
    global opg
    return OPGIterator(opg.getVertices(key, value, wildcard).iterator())

def importFlatFiles(

v_filename, e_filename, dop=4)

Bulk load a graph using vertex and edge files dop controls the number of JVM threads used for loading

def importFlatFiles(v_filename, e_filename, dop=4):
    """
    Bulk load a graph using vertex and edge files
    dop controls the number of JVM threads used for loading
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        bulkload = bulkloader.getInstance()
        bulkload.loadData(opg, v_filename, e_filename, dop)
    elif dbtype == "hbase":
        hload = hbulkloader.getInstance()
        hload.loadData(opg, v_filename, e_filename, dop)

def importGML(

filename)

Load a GML file into the database

def importGML(filename):
    """
    Load a GML file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGML(opg, filename, pstream("/dev/null"))

def importGraphML(

filename)

Load a GraphML file into the database

def importGraphML(filename):
    """
    Load a GraphML file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGraphML(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGraphML(opg, filename, pstream("/dev/null"))

def importGraphSON(

filename)

Load a GraphSON file into the database

def importGraphSON(filename):
    """
    Load a GraphSON file into the database
    """
    global opg
    dbtype = _checkDBType()
    if dbtype == "nosql":
        loader.importGraphSON(opg, filename, pstream("/dev/null"))
    elif dbtype == "hbase":
        hloader.importGraphSON(opg, filename, pstream("/dev/null"))

def saveAnalyst(

analyst_obj, filename, overwrite=True)

Save the current analyst graph as a GraphML file

def saveAnalyst(analyst_obj, filename, overwrite=True):
    """
    Save the current analyst graph as a GraphML file
    """
    core = pgx.getCore()
    ac = analyst_obj.analyst_context
    core.storeGraphWithProperties(ac.getSessionId(), ac.getGraphName(), pgx_config.Format.GRAPHML, filename, overwrite)

def searchEdgeIndex(

key, term)

Search the index of a given key for the term.

def searchEdgeIndex(key, term):
    """
    Search the index of a given key for the term.
    """
    global opg
    return OPGIterator(opg.getEdges(key, term, True).iterator())

def searchVertexIndex(

key, term)

Search the index of a given key for the term.

def searchVertexIndex(key, term):
    """
    Search the index of a given key for the term.
    """
    global opg
    return OPGIterator(opg.getVertices(key, term, True).iterator())

def setEdgeProperty(

eid, label, value)

Convenience method for adding properties to edges.

def setEdgeProperty(eid, label, value):
    """
    Convenience method for adding properties to edges.
    """
    global opg
    _checkIDType(eid)
    e = opg.getEdge(long(eid))
    e.setProperty(label, value, True)

def setVertexProperty(

vid, label, value)

Sets a property on the vertex with ID vid. Note that vertices can have properties added directly to them, this is a convenience method.

def setVertexProperty(vid, label, value):
    """
    Sets a property on the vertex with ID vid.  Note that
    vertices can have properties added directly to them, this
    is a convenience method.
    """
    global opg
    _checkIDType(vid)
    v = opg.getVertex(long(vid))
    v.setProperty(label, value, True)

def shutdown(

)

Shutdown PGX

def shutdown():
    """
    Shutdown PGX
    """
    global opg
    if opg is not None:
        opg.shutdown()

def subgraph_from_filter(

analyst_obj, filter_exp)

Create a subgraph on this grapha analyst using a PGX filter expression

def subgraph_from_filter(analyst_obj, filter_exp):
    """
    Create a subgraph on this grapha analyst using a
    PGX filter expression
    """
    subgraph = a.graph.filter(filter_exp)
    return Janalyst(subgraph, a.analyst_context) #Janalyst(new_ac)

def unparseMetric(

c)

Simple function for mapping iterators of Java Long IDs and metrics into Vertices and python floats.

def unparseMetric(c):
    """
    Simple function for mapping iterators of
    Java Long IDs and metrics into Vertices and
    python floats.
    """
    global opg
    cid = c.getKey()
    metric = c.getValue()
    return opg.getVertex(cid),float(metric.toString())

Classes

class OPGAnalystException

class OPGAnalystException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

Ancestors (in MRO)

Class variables

var args

var message

Instance variables

var value

Methods

def __init__(

self, value)

def __init__(self, value):
    self.value = value

class OPGDALException

class OPGDALException(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)

Ancestors (in MRO)

  • OPGDALException
  • exceptions.Exception
  • exceptions.BaseException
  • __builtin__.object

Class variables

var args

var message

Instance variables

var value

Methods

def __init__(

self, value)

def __init__(self, value):
    self.value = value

class OPGIterator

class OPGIterator:
    def __init__(self, jitter):
        self.jitter = jitter

    def __iter__(self):
        return self

    def next(self):
        if (self.jitter.hasNext() == 0):
            raise StopIteration
        return self.jitter.next()

Ancestors (in MRO)

Instance variables

var jitter

Methods

def __init__(

self, jitter)

def __init__(self, jitter):
    self.jitter = jitter

def next(

self)

def next(self):
    if (self.jitter.hasNext() == 0):
        raise StopIteration
    return self.jitter.next()