core module
#! /usr/bin/env python from jpype import * import sys, os import uuid import random import csv """ pyopg core functions Copyright 2015, Oracle and/or its affiliates. All rights reserved. """ pyopg_cp = os.environ.get('OPG_CP', None) #start the JVM startJVM(getDefaultJVMPath(), "-ea", "-Djava.class.path="+pyopg_cp) #define specific Java classes HashMap = java.util.HashMap HBaseBytes = JClass('org.apache.hadoop.hbase.util.Bytes') pgxf = JClass('oracle.pgx.config.PgxConfig$Field') loader = JClass('oracle.pg.nosql.OraclePropertyGraphUtils') loader = JClass('oracle.pg.hbase.OraclePropertyGraphUtils') bulkloader = JClass('oracle.pg.nosql.OraclePropertyGraphDataLoader') hbulkloader = JClass('oracle.pg.hbase.OraclePropertyGraphDataLoader') hbasekeyfilter = JClass('oracle.pg.hbase.OracleKeyFilter') pstream = JClass('java.io.PrintStream') pgx = JClass('oracle.pgx.api.Pgx') analyst_class = JClass('oracle.pgx.api.Analyst') BAOS = JClass('java.io.ByteArrayOutputStream') #define Java packages common = JPackage('oracle.pg.common') nosql = JPackage('oracle.pg.nosql') nosql_index = JPackage('oracle.pg.nosql.index') hbase = JPackage('oracle.pg.hbase') hbase_index = JPackage('oracle.pg.hbase.index') text = JPackage('oracle.pg.text') kv = JPackage('oracle.kv') blueprints = JPackage('com.tinkerpop.blueprints') pgx_config = JPackage('oracle.pgx.config') pgx_types = JPackage('oracle.pgx.common.types') pgx_control = JPackage('oracle.pgx.api') fexp = JPackage('oracle.pgx.filter.expressions') jutil = JPackage('java.util') hadoop_conf = JPackage('org.apache.hadoop.conf') hadoop_hbase = JPackage('org.apache.hadoop.hbase') hbase_client = JPackage('org.apache.hadoop.hbase.client') hbase_filter = JPackage('org.apache.hadoop.hbase.filter') #from janalyst import * #set PGX Configuration -- TODO: make this readable from a config file ## do we need to do this in PGX 1.1? pgx_param_map = HashMap() pgx_param_map.put("ENABLE_GM_COMPILER", False) #thisPgxConfig = pgx_config.PgxConfig().init()#.parse(pgx_param_map, False, None) opg = None #this is the context object class OPGDALException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class OPGAnalystException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class OPGIterator: def __init__(self, jitter): self.jitter = jitter def __iter__(self): return self def next(self): if (self.jitter.hasNext() == 0): raise StopIteration return self.jitter.next() """ OPG DAL Commands begin here This is the base set of commands available when working with the pyopg layer. """ def _checkIDType(iid): """ Checks to see if an id is convertable to Java PG types """ if not isinstance(iid,int) and not isinstance(iid,long) and not isinstance(iid, java.lang.Long): raise OPGDALException("IDs must be ints or longs") return False return True def _checkDBType(): """ Checks opg context to determine if ONDB or HBase is the underlying store """ global opg opgclass = type(opg).__name__ if opgclass == "oracle.pg.nosql.OraclePropertyGraph": return "nosql" elif opgclass == "oracle.pg.hbase.OraclePropertyGraph": return "hbase" def connect(graph_name, store_name, hosts): if isinstance(hosts, list): kconfig = kv.KVStoreConfig(store_name, hosts) else: kconfig = kv.KVStoreConfig(store_name, [hosts]) global opg opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name) return opg def connectONDB(graph_name, store_name, hosts): if isinstance(hosts, list): kconfig = kv.KVStoreConfig(store_name, hosts) else: kconfig = kv.KVStoreConfig(store_name, [hosts]) global opg opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name) return opg def connectHBase(graph_name, zk_quorum, client_port="2181"): hbconf = hadoop_hbase.HBaseConfiguration.create(); hbconf.set("hbase.zookeeper.quorum", zk_quorum) hbconf.set("hbase.zookeeper.property.clientPort", client_port) hbconf.set("hbase.client.scanner.timeout.period", "1000000") hbconf.set("zookeeper.session.timeout", "1000000") hconn = hbase_client.HConnectionManager.createConnection(hbconf) global opg opg = hbase.OraclePropertyGraph.getInstance(hbconf, hconn, graph_name) return opg def getGraphName(): """ Returns the graph name. """ global opg return opg.getGraphName() def clearRepository(): """ This method removes all vertices and edges from this property graph instance. """ global opg opg.clearRepository() def dropAllIndicies(): """ Drops all existing indices """ global opg opg.dropAllIndices() def getEdges(*keys): """ Gets all edges with a property corresponding to key """ global opg return OPGIterator(opg.getEdges(keys).iterator()) def addVertex(vid): """ Add a vertex with ID vid to the graph """ global opg result = None if isinstance(vid, int): result = opg.addVertex(long(vid)) elif isinstance(vid, long): result = opg.addVertex(vid) else: raise OPGDALException("IDs must be ints or longs") return result def addVertexByName(name): """ Add a new vertex to the graph with a property 'name' """ global opg result = opg.addVertex(random.getrandbits(32)) result.setProperty("name", name, True) return result def getVertexByName(name): """ Retrieve a vertex by the property 'name' """ global opg try: result = OPGIterator(opg.getVertices("name", name).iterator()).next() return result except: return None def setVertexProperty(vid, label, value): """ Sets a property on the vertex with ID vid. Note that vertices can have properties added directly to them, this is a convenience method. """ global opg _checkIDType(vid) v = opg.getVertex(long(vid)) v.setProperty(label, value, True) def addEdge(eid, out_vid, in_vid, label): """ Add a labeled edge to the graph. """ global opg for i in [eid, out_vid, in_vid]: _checkIDType(i) outv = opg.getVertex(out_vid) inv = opg.getVertex(in_vid) result = opg.addEdge(eid, outv, inv, label) return result def addEdgeByName(ename, out_vname, in_vname): """ Add an edge to the graph by name. Takes names for the edge, out vertex and in vertex as input. """ global opg outv = getVertexByName(out_vname) inv = getVertexByName(in_vname) if not outv: outv = addVertexByName(out_vname) if not inv: inv = addVertexByName(in_vname) outvid = long(str(outv.getId())) invid = long(str(inv.getId())) e = addEdge(random.getrandbits(32),outvid, invid, ename) return e def getEdgeByEndpoints(edge_label, outv_label, inv_label): """ Fetch an edge by its out and in-vertex names. """ global opg outv = getVertexByName(outv_label) inv = getVertexByName(inv_label) try: #dumb hack to deal with Java method signature oedges = OPGIterator(outv.getEdges(blueprints.Direction.OUT, [edge_label]).iterator()) for e in oedges: if inv == e.getInVertex(): return e except AttributeError: return None return None def setEdgeProperty(eid, label, value): """ Convenience method for adding properties to edges. """ global opg _checkIDType(eid) e = opg.getEdge(long(eid)) e.setProperty(label, value, True) def getAllVertexPropertyKeys(getTypes=False, sample=0.1): """ Return the set of all property keys defined on vertices. """ global opg props = set() if not getTypes: for v in OPGIterator(opg.getVertices().iterator()): props = props.union(v.getPropertyKeys()) else: for v in OPGIterator(opg.getVertices().iterator()): rn = random.random() if rn <= sample: vkeys = list(OPGIterator(v.getPropertyKeys().iterator())) vtypes = [type(v.getProperty(k)).__name__ for k in vkeys] props = props.union(zip(vkeys, vtypes)) return props def getAllEdgePropertyKeys(getTypes=False, sample=0.1): """ Return the set of all property keys defined on edges. Passing getTypes as True returns a set of tuples with the key and the value type. """ global opg props = set() if not getTypes: for e in OPGIterator(opg.getEdges().iterator()): props = props.union(e.getPropertyKeys()) else: for e in OPGIterator(opg.getEdges().iterator()): rn = random.random() if rn <= sample: ekeys = list(OPGIterator(e.getPropertyKeys().iterator())) etypes = [type(e.getProperty(k)).__name__ for k in ekeys] props = props.union(zip(ekeys, etypes)) return props def getVertexDict(vid): """ Get the properties of a vertex as a dict. """ global opg d = {} _checkIDType(vid) v = opg.getVertex(vid) for k in v.getPropertyKeys(): d[k] = v.getProperty(k) return d def getEdgeDict(eid): """ Get the properties of a vertex as a dict. """ global opg d = {} _checkIDType(eid) e = opg.getEdge(long(eid)) for k in v.getPropertyKeys(): d[k] = v.getProperty(k) return d def getVertices(*keys): """ Gets all vertices with a property corresponding to one of the keys """ global opg return OPGIterator(opg.getVertices(keys).iterator()) def getVerticesByValue(key, value, wildcard=False): """ Return an iterable to all the vertices in the graph that have a particular key/value property """ global opg return OPGIterator(opg.getVertices(key, value, wildcard).iterator()) def V(vid): """ Shorthand to fetch a vertex by id """ global opg return opg.getVertex(vid) def E(eid): """ Shorthand to fetch an edge by id """ global opg return opg.getEdge(eid) def countV(*keys): """ Return the vertex count for the graph. """ global opg cv = 0 for v in OPGIterator(opg.getVertices(keys).iterator()): cv += 1 return cv def countE(*keys): """ Return the edge count for the graph. """ global opg ce = 0 for e in OPGIterator(opg.getEdges(keys).iterator()): ce += 1 return ce def createVertexIndex(*index_keys): """ Create automatic indices on all keys in index keys. """ global opg indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index"); opg.setDefaultIndexParameters(indexParams) opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Vertex")) def createEdgeIndex(*index_keys): """ Create automatic indices on all keys in index keys. """ global opg indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index"); opg.setDefaultIndexParameters(indexParams) opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Edge")) def searchVertexIndex(key, term): """ Search the index of a given key for the term. """ global opg return OPGIterator(opg.getVertices(key, term, True).iterator()) def searchEdgeIndex(key, term): """ Search the index of a given key for the term. """ global opg return OPGIterator(opg.getEdges(key, term, True).iterator()) def dropAllIndices(): """ Drops all indices on the graph. """ global opg opg.dropAllIndices() def dropVertexIndex(index_key): """ Drops the specified index. """ global opg opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Vertex")) def dropEdgeIndex(index_key): """ Drops the specified index. """ global opg opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Edge")) def importGML(filename): """ Load a GML file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGML(opg, filename, pstream("/dev/null")) def importGraphSON(filename): """ Load a GraphSON file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGraphSON(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGraphSON(opg, filename, pstream("/dev/null")) def importGraphML(filename): """ Load a GraphML file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGraphML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGraphML(opg, filename, pstream("/dev/null")) def exportGML(filename): """ Write the current graph as a GML file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGML(opg, filename, pstream("/dev/null")) def exportGraphSON(filename): """ Write the current graph as a GraphSON file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGraphSON(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGraphSON(opg, filename, pstream("/dev/null")) def exportGraphML(filename): """ Write the current graph as a GraphML file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGraphML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGraphML(opg, filename, pstream("/dev/null")) def exportFlatFiles(v_filename, e_filename): """ Write the current graph as vertex and edge files optimized for bulk loading """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportFlatFiles(opg, v_filename, e_filename, False) elif dbtype == "hbase": hloader.exportFlatFiles(opg, v_filename, e_filename, False) def importFlatFiles(v_filename, e_filename, dop=4): """ Bulk load a graph using vertex and edge files dop controls the number of JVM threads used for loading """ global opg dbtype = _checkDBType() if dbtype == "nosql": bulkload = bulkloader.getInstance() bulkload.loadData(opg, v_filename, e_filename, dop) elif dbtype == "hbase": hload = hbulkloader.getInstance() hload.loadData(opg, v_filename, e_filename, dop) def unparseMetric(c): """ Simple function for mapping iterators of Java Long IDs and metrics into Vertices and python floats. """ global opg cid = c.getKey() metric = c.getValue() return opg.getVertex(cid),float(metric.toString()) def analyst_config(props = []): """ Create an in-memory analyst configuration on this graph for complex analyses """ global opg vertexProps = getAllVertexPropertyKeys(True) edgeProps = getAllEdgePropertyKeys(True) dbtype = _checkDBType() analyst_cfg = None if dbtype == "nosql": kvcfg = opg.kVStoreConfig analyst_cfg = pgx_config.PgNosqlGraphConfigBuilder().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName) #analyst_cfg = pgx_config.GraphConfigBuilder.forNosql().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName) elif dbtype == "hbase": quorum = opg.getConfiguration().get("hbase.zookeeper.quorum") clientPort = int(opg.getConfiguration().get("hbase.zookeeper.property.clientPort")) analyst_cfg = pgx_config.PgHbaseGraphConfigBuilder().setName(opg.getGraphName()).setZkQuorum(quorum).setZkClientPort(clientPort).setSplitsPerRegion(opg.numSplitsPerRegion).setZkSessionTimeout(1000000) for vp in vertexProps: ptype = pgx_types.PropertyType.STRING defaultval = vp[0] if vp[0] in props: if vp[1] == "java.lang.Integer": ptype = pgx_types.PropertyType.INTEGER defaultval = 0 elif vp[1] == "java.lang.Double": ptype = pgx_types.PropertyType.DOUBLE defaultval = 1.0 elif vp[1] == "java.lang.Float": ptype = pgx_types.PropertyType.FLOAT defaultval = 1.0 elif vp[1] == "java.lang.Long": ptype = pgx_types.PropertyType.LONG defaultval = 0L analyst_cfg.addNodeProperty(vp[0], ptype, defaultval) for ep in edgeProps: ptype = pgx_types.PropertyType.STRING defaultval = ep[0] if ep[0] in props: if ep[1] == "java.lang.Integer": ptype = pgx_types.PropertyType.INTEGER defaultval = 0 elif ep[1] == "java.lang.Double": ptype = pgx_types.PropertyType.DOUBLE defaultval = 1.0 elif ep[1] == "java.lang.Float": ptype = pgx_types.PropertyType.FLOAT defaultval = 0.0 elif ep[1] == "java.lang.Long": ptype = pgx_types.PropertyType.LONG defaultval = 0L analyst_cfg.addEdgeProperty(ep[0], ptype, defaultval) analyst_cfg = analyst_cfg.build() if dbtype == "nosql": opg = nosql.OraclePropertyGraph.getInstance(analyst_cfg) elif dbtype == "hbase": opg = hbase.OraclePropertyGraph.getInstance(analyst_cfg) return opg def analyst(url=None, sessionLabel=None, *properties): """ Creates an in-memory analyst for the graph. Properties loaded into the analyst must be named: e.g. a = analyst("label", "weight") """ global opg opg = analyst_config(properties) instance = None if url: instance = pgx.getInstance(url) else: instance = pgx.getInstance() if url == None: instance.startEngine() session = None if sessionLabel: session = instance.createSession(sessionLabel) else: session = instance.createSession("my-session") pgxGraph = session.readGraphWithProperties(opg.getConfig()) a = session.createAnalyst() a = Janalyst(pgxGraph,a) return a def subgraph_from_filter(analyst_obj, filter_exp): """ Create a subgraph on this grapha analyst using a PGX filter expression """ subgraph = a.graph.filter(filter_exp) return Janalyst(subgraph, a.analyst_context) #Janalyst(new_ac) def saveAnalyst(analyst_obj, filename, overwrite=True): """ Save the current analyst graph as a GraphML file """ core = pgx.getCore() ac = analyst_obj.analyst_context core.storeGraphWithProperties(ac.getSessionId(), ac.getGraphName(), pgx_config.Format.GRAPHML, filename, overwrite) def shutdown(): """ Shutdown PGX """ global opg if opg is not None: opg.shutdown() #shutdownJVM() if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == "test" or sys.argv[1] == "testclean": print "entering test" connectONDB("marvel", "kvstore", ["localhost:5000"]) #connectHBase("marvel", "localhost") if sys.argv[1] == "testclean": print "entering testclean" dropVertexIndex("name") dropEdgeIndex("appearedWith") clearRepository() marvel_1 = csv.reader(open("hero-network.csv"), delimiter=',', quotechar='"') for hero in marvel_1: hedge = getEdgeByEndpoints("appearedWith", hero[0], hero[1]) if hedge == None: hedge = addEdgeByName("appearedWith", hero[0], hero[1]) hedge.setProperty("weight", 1.0) else: weight = hedge.getProperty("weight") weight = weight.floatValue() + 1.0 hedge.setProperty("weight", weight) opg.commit() createVertexIndex("name") createEdgeIndex("appearedWith") print "edges", countE() print "searching index" for v in searchVertexIndex("name", "IRON*"): print v a = analyst(None, "marvel-analysis", "name", "weight") #example against a jetty-hosted PGX instance #a = analyst("http://scott:tiger@localhost:8080/pgx", "marvel-analysis", "name", "weight") print a.countTriangles(), "triangles" print "in degree centrality" ind = a.inDegreeCentrality().topK(10) for ddist in ind: v = opg.getVertex(ddist.getKey().getId()) print v.getProperty("name"), ddist.getValue() print "10 largest PR values" for tk in a.pagerank().getTopKValues(10): v = opg.getVertex(tk.getKey().getId()) print v.getProperty("name"), tk.getValue() print "triangles before subgraph", a.countTriangles() a = subgraph_from_filter(a, fexp.EdgeFilter("edge.weight > 1.0")) print "triangles after subgraph", a.countTriangles() shutdown()
Module variables
var KEYWORDS
var opg
var pyopg_cp
Functions
def E(
eid)
Shorthand to fetch an edge by id
def E(eid): """ Shorthand to fetch an edge by id """ global opg return opg.getEdge(eid)
def V(
vid)
Shorthand to fetch a vertex by id
def V(vid): """ Shorthand to fetch a vertex by id """ global opg return opg.getVertex(vid)
def addEdge(
eid, out_vid, in_vid, label)
Add a labeled edge to the graph.
def addEdge(eid, out_vid, in_vid, label): """ Add a labeled edge to the graph. """ global opg for i in [eid, out_vid, in_vid]: _checkIDType(i) outv = opg.getVertex(out_vid) inv = opg.getVertex(in_vid) result = opg.addEdge(eid, outv, inv, label) return result
def addEdgeByName(
ename, out_vname, in_vname)
Add an edge to the graph by name. Takes names for the edge, out vertex and in vertex as input.
def addEdgeByName(ename, out_vname, in_vname): """ Add an edge to the graph by name. Takes names for the edge, out vertex and in vertex as input. """ global opg outv = getVertexByName(out_vname) inv = getVertexByName(in_vname) if not outv: outv = addVertexByName(out_vname) if not inv: inv = addVertexByName(in_vname) outvid = long(str(outv.getId())) invid = long(str(inv.getId())) e = addEdge(random.getrandbits(32),outvid, invid, ename) return e
def addVertex(
vid)
Add a vertex with ID vid to the graph
def addVertex(vid): """ Add a vertex with ID vid to the graph """ global opg result = None if isinstance(vid, int): result = opg.addVertex(long(vid)) elif isinstance(vid, long): result = opg.addVertex(vid) else: raise OPGDALException("IDs must be ints or longs") return result
def addVertexByName(
name)
Add a new vertex to the graph with a property 'name'
def addVertexByName(name): """ Add a new vertex to the graph with a property 'name' """ global opg result = opg.addVertex(random.getrandbits(32)) result.setProperty("name", name, True) return result
def analyst(
url=None, sessionLabel=None, *properties)
Creates an in-memory analyst for the graph. Properties loaded into the analyst must be named: e.g. a = analyst("label", "weight")
def analyst(url=None, sessionLabel=None, *properties): """ Creates an in-memory analyst for the graph. Properties loaded into the analyst must be named: e.g. a = analyst("label", "weight") """ global opg opg = analyst_config(properties) instance = None if url: instance = pgx.getInstance(url) else: instance = pgx.getInstance() if url == None: instance.startEngine() session = None if sessionLabel: session = instance.createSession(sessionLabel) else: session = instance.createSession("my-session") pgxGraph = session.readGraphWithProperties(opg.getConfig()) a = session.createAnalyst() a = Janalyst(pgxGraph,a) return a
def analyst_config(
props=[])
Create an in-memory analyst configuration on this graph for complex analyses
def analyst_config(props = []): """ Create an in-memory analyst configuration on this graph for complex analyses """ global opg vertexProps = getAllVertexPropertyKeys(True) edgeProps = getAllEdgePropertyKeys(True) dbtype = _checkDBType() analyst_cfg = None if dbtype == "nosql": kvcfg = opg.kVStoreConfig analyst_cfg = pgx_config.PgNosqlGraphConfigBuilder().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName) #analyst_cfg = pgx_config.GraphConfigBuilder.forNosql().setName(opg.getGraphName()).setHosts(jutil.Arrays.asList(kvcfg.helperHosts)).setStoreName(kvcfg.storeName) elif dbtype == "hbase": quorum = opg.getConfiguration().get("hbase.zookeeper.quorum") clientPort = int(opg.getConfiguration().get("hbase.zookeeper.property.clientPort")) analyst_cfg = pgx_config.PgHbaseGraphConfigBuilder().setName(opg.getGraphName()).setZkQuorum(quorum).setZkClientPort(clientPort).setSplitsPerRegion(opg.numSplitsPerRegion).setZkSessionTimeout(1000000) for vp in vertexProps: ptype = pgx_types.PropertyType.STRING defaultval = vp[0] if vp[0] in props: if vp[1] == "java.lang.Integer": ptype = pgx_types.PropertyType.INTEGER defaultval = 0 elif vp[1] == "java.lang.Double": ptype = pgx_types.PropertyType.DOUBLE defaultval = 1.0 elif vp[1] == "java.lang.Float": ptype = pgx_types.PropertyType.FLOAT defaultval = 1.0 elif vp[1] == "java.lang.Long": ptype = pgx_types.PropertyType.LONG defaultval = 0L analyst_cfg.addNodeProperty(vp[0], ptype, defaultval) for ep in edgeProps: ptype = pgx_types.PropertyType.STRING defaultval = ep[0] if ep[0] in props: if ep[1] == "java.lang.Integer": ptype = pgx_types.PropertyType.INTEGER defaultval = 0 elif ep[1] == "java.lang.Double": ptype = pgx_types.PropertyType.DOUBLE defaultval = 1.0 elif ep[1] == "java.lang.Float": ptype = pgx_types.PropertyType.FLOAT defaultval = 0.0 elif ep[1] == "java.lang.Long": ptype = pgx_types.PropertyType.LONG defaultval = 0L analyst_cfg.addEdgeProperty(ep[0], ptype, defaultval) analyst_cfg = analyst_cfg.build() if dbtype == "nosql": opg = nosql.OraclePropertyGraph.getInstance(analyst_cfg) elif dbtype == "hbase": opg = hbase.OraclePropertyGraph.getInstance(analyst_cfg) return opg
def clearRepository(
)
This method removes all vertices and edges from this property graph instance.
def clearRepository(): """ This method removes all vertices and edges from this property graph instance. """ global opg opg.clearRepository()
def connect(
graph_name, store_name, hosts)
def connect(graph_name, store_name, hosts): if isinstance(hosts, list): kconfig = kv.KVStoreConfig(store_name, hosts) else: kconfig = kv.KVStoreConfig(store_name, [hosts]) global opg opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name) return opg
def connectHBase(
graph_name, zk_quorum, client_port='2181')
def connectHBase(graph_name, zk_quorum, client_port="2181"): hbconf = hadoop_hbase.HBaseConfiguration.create(); hbconf.set("hbase.zookeeper.quorum", zk_quorum) hbconf.set("hbase.zookeeper.property.clientPort", client_port) hbconf.set("hbase.client.scanner.timeout.period", "1000000") hbconf.set("zookeeper.session.timeout", "1000000") hconn = hbase_client.HConnectionManager.createConnection(hbconf) global opg opg = hbase.OraclePropertyGraph.getInstance(hbconf, hconn, graph_name) return opg
def connectONDB(
graph_name, store_name, hosts)
def connectONDB(graph_name, store_name, hosts): if isinstance(hosts, list): kconfig = kv.KVStoreConfig(store_name, hosts) else: kconfig = kv.KVStoreConfig(store_name, [hosts]) global opg opg = nosql.OraclePropertyGraph.getInstance(kconfig, graph_name) return opg
def countE(
*keys)
Return the edge count for the graph.
def countE(*keys): """ Return the edge count for the graph. """ global opg ce = 0 for e in OPGIterator(opg.getEdges(keys).iterator()): ce += 1 return ce
def countV(
*keys)
Return the vertex count for the graph.
def countV(*keys): """ Return the vertex count for the graph. """ global opg cv = 0 for v in OPGIterator(opg.getVertices(keys).iterator()): cv += 1 return cv
def createEdgeIndex(
*index_keys)
Create automatic indices on all keys in index keys.
def createEdgeIndex(*index_keys): """ Create automatic indices on all keys in index keys. """ global opg indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index"); opg.setDefaultIndexParameters(indexParams) opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Edge"))
def createVertexIndex(
*index_keys)
Create automatic indices on all keys in index keys.
def createVertexIndex(*index_keys): """ Create automatic indices on all keys in index keys. """ global opg indexParams = text.OracleIndexParameters.buildFS(1, 4, 10000, 50000, True, "./lucene-index"); opg.setDefaultIndexParameters(indexParams) opg.createKeyIndex(index_keys, JClass("com.tinkerpop.blueprints.Vertex"))
def dropAllIndices(
)
Drops all indices on the graph.
def dropAllIndices(): """ Drops all indices on the graph. """ global opg opg.dropAllIndices()
def dropAllIndicies(
)
Drops all existing indices
def dropAllIndicies(): """ Drops all existing indices """ global opg opg.dropAllIndices()
def dropEdgeIndex(
index_key)
Drops the specified index.
def dropEdgeIndex(index_key): """ Drops the specified index. """ global opg opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Edge"))
def dropVertexIndex(
index_key)
Drops the specified index.
def dropVertexIndex(index_key): """ Drops the specified index. """ global opg opg.dropKeyIndex(index_key, JClass("com.tinkerpop.blueprints.Vertex"))
def exportFlatFiles(
v_filename, e_filename)
Write the current graph as vertex and edge files optimized for bulk loading
def exportFlatFiles(v_filename, e_filename): """ Write the current graph as vertex and edge files optimized for bulk loading """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportFlatFiles(opg, v_filename, e_filename, False) elif dbtype == "hbase": hloader.exportFlatFiles(opg, v_filename, e_filename, False)
def exportGML(
filename)
Write the current graph as a GML file
def exportGML(filename): """ Write the current graph as a GML file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGML(opg, filename, pstream("/dev/null"))
def exportGraphML(
filename)
Write the current graph as a GraphML file
def exportGraphML(filename): """ Write the current graph as a GraphML file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGraphML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGraphML(opg, filename, pstream("/dev/null"))
def exportGraphSON(
filename)
Write the current graph as a GraphSON file
def exportGraphSON(filename): """ Write the current graph as a GraphSON file """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.exportGraphSON(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.exportGraphSON(opg, filename, pstream("/dev/null"))
def getAllEdgePropertyKeys(
getTypes=False, sample=0.1)
Return the set of all property keys defined on edges. Passing getTypes as True returns a set of tuples with the key and the value type.
def getAllEdgePropertyKeys(getTypes=False, sample=0.1): """ Return the set of all property keys defined on edges. Passing getTypes as True returns a set of tuples with the key and the value type. """ global opg props = set() if not getTypes: for e in OPGIterator(opg.getEdges().iterator()): props = props.union(e.getPropertyKeys()) else: for e in OPGIterator(opg.getEdges().iterator()): rn = random.random() if rn <= sample: ekeys = list(OPGIterator(e.getPropertyKeys().iterator())) etypes = [type(e.getProperty(k)).__name__ for k in ekeys] props = props.union(zip(ekeys, etypes)) return props
def getAllVertexPropertyKeys(
getTypes=False, sample=0.1)
Return the set of all property keys defined on vertices.
def getAllVertexPropertyKeys(getTypes=False, sample=0.1): """ Return the set of all property keys defined on vertices. """ global opg props = set() if not getTypes: for v in OPGIterator(opg.getVertices().iterator()): props = props.union(v.getPropertyKeys()) else: for v in OPGIterator(opg.getVertices().iterator()): rn = random.random() if rn <= sample: vkeys = list(OPGIterator(v.getPropertyKeys().iterator())) vtypes = [type(v.getProperty(k)).__name__ for k in vkeys] props = props.union(zip(vkeys, vtypes)) return props
def getEdgeByEndpoints(
edge_label, outv_label, inv_label)
Fetch an edge by its out and in-vertex names.
def getEdgeByEndpoints(edge_label, outv_label, inv_label): """ Fetch an edge by its out and in-vertex names. """ global opg outv = getVertexByName(outv_label) inv = getVertexByName(inv_label) try: #dumb hack to deal with Java method signature oedges = OPGIterator(outv.getEdges(blueprints.Direction.OUT, [edge_label]).iterator()) for e in oedges: if inv == e.getInVertex(): return e except AttributeError: return None return None
def getEdgeDict(
eid)
Get the properties of a vertex as a dict.
def getEdgeDict(eid): """ Get the properties of a vertex as a dict. """ global opg d = {} _checkIDType(eid) e = opg.getEdge(long(eid)) for k in v.getPropertyKeys(): d[k] = v.getProperty(k) return d
def getEdges(
*keys)
Gets all edges with a property corresponding to key
def getEdges(*keys): """ Gets all edges with a property corresponding to key """ global opg return OPGIterator(opg.getEdges(keys).iterator())
def getGraphName(
)
Returns the graph name.
def getGraphName(): """ Returns the graph name. """ global opg return opg.getGraphName()
def getVertexByName(
name)
Retrieve a vertex by the property 'name'
def getVertexByName(name): """ Retrieve a vertex by the property 'name' """ global opg try: result = OPGIterator(opg.getVertices("name", name).iterator()).next() return result except: return None
def getVertexDict(
vid)
Get the properties of a vertex as a dict.
def getVertexDict(vid): """ Get the properties of a vertex as a dict. """ global opg d = {} _checkIDType(vid) v = opg.getVertex(vid) for k in v.getPropertyKeys(): d[k] = v.getProperty(k) return d
def getVertices(
*keys)
Gets all vertices with a property corresponding to one of the keys
def getVertices(*keys): """ Gets all vertices with a property corresponding to one of the keys """ global opg return OPGIterator(opg.getVertices(keys).iterator())
def getVerticesByValue(
key, value, wildcard=False)
Return an iterable to all the vertices in the graph that have a particular key/value property
def getVerticesByValue(key, value, wildcard=False): """ Return an iterable to all the vertices in the graph that have a particular key/value property """ global opg return OPGIterator(opg.getVertices(key, value, wildcard).iterator())
def importFlatFiles(
v_filename, e_filename, dop=4)
Bulk load a graph using vertex and edge files dop controls the number of JVM threads used for loading
def importFlatFiles(v_filename, e_filename, dop=4): """ Bulk load a graph using vertex and edge files dop controls the number of JVM threads used for loading """ global opg dbtype = _checkDBType() if dbtype == "nosql": bulkload = bulkloader.getInstance() bulkload.loadData(opg, v_filename, e_filename, dop) elif dbtype == "hbase": hload = hbulkloader.getInstance() hload.loadData(opg, v_filename, e_filename, dop)
def importGML(
filename)
Load a GML file into the database
def importGML(filename): """ Load a GML file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGML(opg, filename, pstream("/dev/null"))
def importGraphML(
filename)
Load a GraphML file into the database
def importGraphML(filename): """ Load a GraphML file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGraphML(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGraphML(opg, filename, pstream("/dev/null"))
def importGraphSON(
filename)
Load a GraphSON file into the database
def importGraphSON(filename): """ Load a GraphSON file into the database """ global opg dbtype = _checkDBType() if dbtype == "nosql": loader.importGraphSON(opg, filename, pstream("/dev/null")) elif dbtype == "hbase": hloader.importGraphSON(opg, filename, pstream("/dev/null"))
def saveAnalyst(
analyst_obj, filename, overwrite=True)
Save the current analyst graph as a GraphML file
def saveAnalyst(analyst_obj, filename, overwrite=True): """ Save the current analyst graph as a GraphML file """ core = pgx.getCore() ac = analyst_obj.analyst_context core.storeGraphWithProperties(ac.getSessionId(), ac.getGraphName(), pgx_config.Format.GRAPHML, filename, overwrite)
def searchEdgeIndex(
key, term)
Search the index of a given key for the term.
def searchEdgeIndex(key, term): """ Search the index of a given key for the term. """ global opg return OPGIterator(opg.getEdges(key, term, True).iterator())
def searchVertexIndex(
key, term)
Search the index of a given key for the term.
def searchVertexIndex(key, term): """ Search the index of a given key for the term. """ global opg return OPGIterator(opg.getVertices(key, term, True).iterator())
def setEdgeProperty(
eid, label, value)
Convenience method for adding properties to edges.
def setEdgeProperty(eid, label, value): """ Convenience method for adding properties to edges. """ global opg _checkIDType(eid) e = opg.getEdge(long(eid)) e.setProperty(label, value, True)
def setVertexProperty(
vid, label, value)
Sets a property on the vertex with ID vid. Note that vertices can have properties added directly to them, this is a convenience method.
def setVertexProperty(vid, label, value): """ Sets a property on the vertex with ID vid. Note that vertices can have properties added directly to them, this is a convenience method. """ global opg _checkIDType(vid) v = opg.getVertex(long(vid)) v.setProperty(label, value, True)
def shutdown(
)
Shutdown PGX
def shutdown(): """ Shutdown PGX """ global opg if opg is not None: opg.shutdown()
def subgraph_from_filter(
analyst_obj, filter_exp)
Create a subgraph on this grapha analyst using a PGX filter expression
def subgraph_from_filter(analyst_obj, filter_exp): """ Create a subgraph on this grapha analyst using a PGX filter expression """ subgraph = a.graph.filter(filter_exp) return Janalyst(subgraph, a.analyst_context) #Janalyst(new_ac)
def unparseMetric(
c)
Simple function for mapping iterators of Java Long IDs and metrics into Vertices and python floats.
def unparseMetric(c): """ Simple function for mapping iterators of Java Long IDs and metrics into Vertices and python floats. """ global opg cid = c.getKey() metric = c.getValue() return opg.getVertex(cid),float(metric.toString())
Classes
class OPGAnalystException
class OPGAnalystException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value)
Ancestors (in MRO)
- OPGAnalystException
- exceptions.Exception
- exceptions.BaseException
- __builtin__.object
Class variables
var args
var message
Instance variables
var value
Methods
def __init__(
self, value)
def __init__(self, value): self.value = value
class OPGDALException
class OPGDALException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value)
Ancestors (in MRO)
- OPGDALException
- exceptions.Exception
- exceptions.BaseException
- __builtin__.object
Class variables
var args
var message
Instance variables
var value
Methods
def __init__(
self, value)
def __init__(self, value): self.value = value
class OPGIterator
class OPGIterator: def __init__(self, jitter): self.jitter = jitter def __iter__(self): return self def next(self): if (self.jitter.hasNext() == 0): raise StopIteration return self.jitter.next()
Ancestors (in MRO)
Instance variables
var jitter
Methods
def __init__(
self, jitter)
def __init__(self, jitter): self.jitter = jitter
def next(
self)
def next(self): if (self.jitter.hasNext() == 0): raise StopIteration return self.jitter.next()