MATLAB移民のためのJulia tips

MATLAB移民のためのJulia tips

何も考えずに速く計算できないのならば、何もやりたくない。

Stochastic block model

とある身内のご要望にお応えして、stochastic block model (SBM)を生成するコードを載せます。

  • Poisson次数分布の、一番基本的なSBMを生成します。
  • Affinity matrix (p_rs)の要素を陽に指定して、任意のブロック構造を作れます。
  • cincoutは、rescaleされたaffinity matrixの値:cin = Ntot*pin, cout = Ntot*pout
  • Self-loops, multi-edgesはありません。
  • function StructuredPoissonSBMの部分では、グラフが非連結になっているかもしれないので、function LinksConnectedでつながっている部分を抽出します。
  • 出力はedgelistだけですが、Nblockで各ブロックの頂点数がわかります。(非連結成分を除いた後は、Nblockinputから変わっているかもしれないので、overlapなどを測るときには必要。)
  • 元々自分で使うためだけのコードだったので、ツッコミどころがあるのはご勘弁。
#///////////////////////
# Generate Edgelist
#///////////////////////
function StructuredPoissonSBM(Nblock,prs)
    Ntot = sum(Nblock)
    B = length(Nblock)

    Ktot = Ntot #  Ntot is an offset, just for safety. Ktot is usually larger than the actual total degree without this offset already, because Ktot has nonzero prob. for diagonal elements.
    for r = 1:size(prs,1)
    for s = 1:size(prs,1)
            Ktot += round(Int64, prs[r,s]*Nblock[r]*Nblock[s])
    end
    end
    links = zeros(Int64, Ktot,2)
    
    # intra-links
    offset = 0
    linkoffset = 0
    for k = 1:B
        offset = sum(Nblock[1:k-1])
        Lktot = round(Int64,prs[k,k]*Nblock[k]^2)
        linkind = rand(1:Nblock[k]^2,Lktot)
        linksub = hcat(ind2sub((Nblock[k],Nblock[k]),linkind)[1],ind2sub((Nblock[k],Nblock[k]),linkind)[2])
        boolean = trues(size(linksub,1))
        for row = 1:size(linksub,1)
            linksub[row,1] >= linksub[row,2] ? boolean[row]=false : continue
        end
        utlinks = unique(linksub[boolean,:],1) + offset # Exclude multi-edges
        links[linkoffset+1:linkoffset+2*size(utlinks,1),:] = vcat(utlinks,hcat(utlinks[:,2],utlinks[:,1]))
        linkoffset += 2*size(utlinks,1)
    end
    # inter-links
    for k = 1:B-1
    for l = k+1:B
        offset1 = sum(Nblock[1:k-1])
        offset2 = sum(Nblock[1:l-1])
        Lktot = round(Int64,prs[k,l]*Nblock[k]*Nblock[l])
        linkind = rand(1:Nblock[k]*Nblock[l],Lktot)        
        linksub = hcat(ind2sub((Nblock[k],Nblock[l]),linkind)[1]+offset1,ind2sub((Nblock[k],Nblock[l]),linkind)[2]+offset2)
        newlinks = unique(linksub,1) # Exclude multi-edges
        links[linkoffset+1:linkoffset+2*size(newlinks,1),:] = vcat(newlinks,hcat(newlinks[:,2],newlinks[:,1]))
        linkoffset += 2*size(newlinks,1)
    end
    end
    
    return links = links[1:linkoffset,:]    
    
end



#////////////////////////////
# Extract Connected Component
#////////////////////////////
function DFS(nb,root)
    visited = Int64[]
    stack = push!(Int64[],root)
    while !isempty(stack)
        node = pop!(stack)
        if node in visited
            continue
        else
            push!(visited,node)
            append!(stack,filter(x->!(x in visited), nb[node]))
            stack = unique(stack)
        end
    end
    return visited
end

function LinksConnected(links,Nblock,cc)
    B = length(Nblock)
    # Nblock of connected component -----
    cc = sort(cc)
    ccNblock = zeros(Int64,B)
    t = 1
    defects = Int64[]
    for k = 1:B
        ndef = 0
        offset = sum(Nblock[1:k-1])
        for i = 1:Nblock[k]
            if t <= length(cc)
                if offset + i == cc[t]
                    t += 1
                    continue
                end
            end
            ndef += 1
            push!(defects,offset+i) # ndef = # of defect nodes in Nblock[k]
        end
        ccNblock[k] = Nblock[k] - ndef
    end
    #---------------------------------------------------
    
    # links of connected component ------------
    boolean = trues(size(links,1))
    for u = 1:size(links,1)
        links[u,1] in cc ? continue : boolean[u] = false
    end
    links = links[boolean,:]
    #----------------------------------------------------
    
    for u = 1:size(links,1)
        links[u,1] -= countnz(defects.<links[u,1])
        links[u,2] -= countnz(defects.<links[u,2])
    end
    
    return (ccNblock,links)
end




#//////////////////
# Main
#//////////////////

cm = 6 # Average degree
Nblockinput = [50,50] # Block sizes
Ntotinput = sum(Nblockinput) # Total number of nodes. (The actual graph has Ntot <= Ntotinput)
Nthreshold = round(Ntotinput/2) # Connected component must be larger than this size.
Bsbm = 2 # Number of SBM blocks
epsilon = 0.15 # Strength of block structure: p_out/p_in
samples = 1

cin = Bsbm*cm/(1 + (Bsbm-1)*epsilon)
cout = epsilon*cin

#=
##### partially bipartite structure #####
pin = cin/Ntotinput
pout = cout/Ntotinput
prs = pout*ones(Bsbm,Bsbm)
prs[1,2] = pin
prs[2,1] = pin
prs[3,4] = pin
prs[4,3] = pin
##############################
##### 4 blocks (hierarchical structure)####
cin1 = 40
cin2 = 20
cout = 4
pin1 = cin1/Ntotinput
pin2 = cin2/Ntotinput
pout = cout/Ntotinput
prs = pout*ones(Bsbm,Bsbm)
prs[1,1] = pin1
prs[2,2] = pin1
prs[3,3] = pin1
prs[4,4] = pin1
prs[1,2] = pin2
prs[2,1] = pin2
prs[3,4] = pin2
prs[4,3] = pin2
################
=#
### pure SBM #########
pin = cin/Ntotinput
pout = cout/Ntotinput
prs = pout*ones(Bsbm,Bsbm)
for r = 1:Bsbm
    prs[r,r] = pin
end
####################


Ntot = 0
Nblock = zeros(Bsbm)
sm = 0
while sm < samples
    links = StructuredPoissonSBM(Nblockinput,prs)
    nb = [links[links[:,1].==lnode,2] for lnode = 1:Ntotinput]
    cc = DFS(nb,1) # Assume node 1 belongs to the connected component
    println("cc found...")
    (Nblock,links) = LinksConnected(links,Nblockinput,cc)
    Ntot = sum(Nblock)
    println("nodes & links updated... : Ntot = $(Ntot) L = $(size(links,1)/2)")
    println("Nblock = $(Nblock)")
    Ntot > Nthreshold ? sm += 1 : continue

    strfp = "edgelist_partialSBM_Nblock=$(Nblock)_cm=$(cm)_epsilon=$(epsilon)_$(sm).txt"
    fp = open(strfp,"w")
    for row = 1:size(links,1)
        write(fp, string(links[row,1])*" "*string(links[row,2])*"\n")
    end
    close(fp)
end