Stochastic block model
とある身内のご要望にお応えして、stochastic block model (SBM)を生成するコードを載せます。
- Poisson次数分布の、一番基本的なSBMを生成します。
- Affinity matrix (p_rs)の要素を陽に指定して、任意のブロック構造を作れます。
cin
とcout
は、rescaleされたaffinity matrixの値:cin = Ntot*pin
,cout = Ntot*pout
。- Self-loops, multi-edgesはありません。
function StructuredPoissonSBM
の部分では、グラフが非連結になっているかもしれないので、function LinksConnected
でつながっている部分を抽出します。- 出力はedgelistだけですが、
Nblock
で各ブロックの頂点数がわかります。(非連結成分を除いた後は、Nblockinput
から変わっているかもしれないので、overlapなどを測るときには必要。) - 元々自分で使うためだけのコードだったので、ツッコミどころがあるのはご勘弁。
#/////////////////////// # Generate Edgelist #/////////////////////// function StructuredPoissonSBM(Nblock,prs) Ntot = sum(Nblock) B = length(Nblock) Ktot = Ntot # Ntot is an offset, just for safety. Ktot is usually larger than the actual total degree without this offset already, because Ktot has nonzero prob. for diagonal elements. for r = 1:size(prs,1) for s = 1:size(prs,1) Ktot += round(Int64, prs[r,s]*Nblock[r]*Nblock[s]) end end links = zeros(Int64, Ktot,2) # intra-links offset = 0 linkoffset = 0 for k = 1:B offset = sum(Nblock[1:k-1]) Lktot = round(Int64,prs[k,k]*Nblock[k]^2) linkind = rand(1:Nblock[k]^2,Lktot) linksub = hcat(ind2sub((Nblock[k],Nblock[k]),linkind)[1],ind2sub((Nblock[k],Nblock[k]),linkind)[2]) boolean = trues(size(linksub,1)) for row = 1:size(linksub,1) linksub[row,1] >= linksub[row,2] ? boolean[row]=false : continue end utlinks = unique(linksub[boolean,:],1) + offset # Exclude multi-edges links[linkoffset+1:linkoffset+2*size(utlinks,1),:] = vcat(utlinks,hcat(utlinks[:,2],utlinks[:,1])) linkoffset += 2*size(utlinks,1) end # inter-links for k = 1:B-1 for l = k+1:B offset1 = sum(Nblock[1:k-1]) offset2 = sum(Nblock[1:l-1]) Lktot = round(Int64,prs[k,l]*Nblock[k]*Nblock[l]) linkind = rand(1:Nblock[k]*Nblock[l],Lktot) linksub = hcat(ind2sub((Nblock[k],Nblock[l]),linkind)[1]+offset1,ind2sub((Nblock[k],Nblock[l]),linkind)[2]+offset2) newlinks = unique(linksub,1) # Exclude multi-edges links[linkoffset+1:linkoffset+2*size(newlinks,1),:] = vcat(newlinks,hcat(newlinks[:,2],newlinks[:,1])) linkoffset += 2*size(newlinks,1) end end return links = links[1:linkoffset,:] end #//////////////////////////// # Extract Connected Component #//////////////////////////// function DFS(nb,root) visited = Int64[] stack = push!(Int64[],root) while !isempty(stack) node = pop!(stack) if node in visited continue else push!(visited,node) append!(stack,filter(x->!(x in visited), nb[node])) stack = unique(stack) end end return visited end function LinksConnected(links,Nblock,cc) B = length(Nblock) # Nblock of connected component ----- cc = sort(cc) ccNblock = zeros(Int64,B) t = 1 defects = Int64[] for k = 1:B ndef = 0 offset = sum(Nblock[1:k-1]) for i = 1:Nblock[k] if t <= length(cc) if offset + i == cc[t] t += 1 continue end end ndef += 1 push!(defects,offset+i) # ndef = # of defect nodes in Nblock[k] end ccNblock[k] = Nblock[k] - ndef end #--------------------------------------------------- # links of connected component ------------ boolean = trues(size(links,1)) for u = 1:size(links,1) links[u,1] in cc ? continue : boolean[u] = false end links = links[boolean,:] #---------------------------------------------------- for u = 1:size(links,1) links[u,1] -= countnz(defects.<links[u,1]) links[u,2] -= countnz(defects.<links[u,2]) end return (ccNblock,links) end #////////////////// # Main #////////////////// cm = 6 # Average degree Nblockinput = [50,50] # Block sizes Ntotinput = sum(Nblockinput) # Total number of nodes. (The actual graph has Ntot <= Ntotinput) Nthreshold = round(Ntotinput/2) # Connected component must be larger than this size. Bsbm = 2 # Number of SBM blocks epsilon = 0.15 # Strength of block structure: p_out/p_in samples = 1 cin = Bsbm*cm/(1 + (Bsbm-1)*epsilon) cout = epsilon*cin #= ##### partially bipartite structure ##### pin = cin/Ntotinput pout = cout/Ntotinput prs = pout*ones(Bsbm,Bsbm) prs[1,2] = pin prs[2,1] = pin prs[3,4] = pin prs[4,3] = pin ############################## ##### 4 blocks (hierarchical structure)#### cin1 = 40 cin2 = 20 cout = 4 pin1 = cin1/Ntotinput pin2 = cin2/Ntotinput pout = cout/Ntotinput prs = pout*ones(Bsbm,Bsbm) prs[1,1] = pin1 prs[2,2] = pin1 prs[3,3] = pin1 prs[4,4] = pin1 prs[1,2] = pin2 prs[2,1] = pin2 prs[3,4] = pin2 prs[4,3] = pin2 ################ =# ### pure SBM ######### pin = cin/Ntotinput pout = cout/Ntotinput prs = pout*ones(Bsbm,Bsbm) for r = 1:Bsbm prs[r,r] = pin end #################### Ntot = 0 Nblock = zeros(Bsbm) sm = 0 while sm < samples links = StructuredPoissonSBM(Nblockinput,prs) nb = [links[links[:,1].==lnode,2] for lnode = 1:Ntotinput] cc = DFS(nb,1) # Assume node 1 belongs to the connected component println("cc found...") (Nblock,links) = LinksConnected(links,Nblockinput,cc) Ntot = sum(Nblock) println("nodes & links updated... : Ntot = $(Ntot) L = $(size(links,1)/2)") println("Nblock = $(Nblock)") Ntot > Nthreshold ? sm += 1 : continue strfp = "edgelist_partialSBM_Nblock=$(Nblock)_cm=$(cm)_epsilon=$(epsilon)_$(sm).txt" fp = open(strfp,"w") for row = 1:size(links,1) write(fp, string(links[row,1])*" "*string(links[row,2])*"\n") end close(fp) end