diff --git a/src/Graphs.jl b/src/Graphs.jl index c8d209cf5..66e2b2feb 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -35,7 +35,8 @@ using Random: seed!, shuffle, shuffle! -using SparseArrays: SparseMatrixCSC, nonzeros, nzrange, rowvals +using SparseArrays: + SparseMatrixCSC, nonzeros, nzrange, rowvals, spzeros, AbstractSparseMatrix import SparseArrays: blockdiag, sparse import Base: adjoint, @@ -250,6 +251,7 @@ export desopo_pape_shortest_paths, # centrality + edge_betweenness_centrality, betweenness_centrality, closeness_centrality, degree_centrality, @@ -504,6 +506,7 @@ include("operators.jl") include("persistence/common.jl") include("persistence/lg.jl") include("centrality/betweenness.jl") +include("centrality/edge-betweenness.jl") include("centrality/closeness.jl") include("centrality/stress.jl") include("centrality/degree.jl") diff --git a/src/centrality/edge-betweenness.jl b/src/centrality/edge-betweenness.jl new file mode 100644 index 000000000..c9dd81fc6 --- /dev/null +++ b/src/centrality/edge-betweenness.jl @@ -0,0 +1,145 @@ +""" + edge_betweenness_centrality(g[, vertices, distmx]; [normalize]) + edge_betweenness_centrality(g, k[, distmx]; [normalize, rng]) + +Compute the [edge betweenness centrality](https://en.wikipedia.org/wiki/Centrality#Betweenness_centrality) +of every edge `e` in a graph `g`. Or use a random subset of `k<|V|` vertices +to get an estimate of the edge betweenness centrality. Including more nodes yields better more accurate estimates. +Return a Sparse Matrix representing the centrality calculated for each edge in `g`. +It is defined as the sum of the fraction of all-pairs shortest paths that pass through `e` +`` +bc(e) = \\sum_{s, t \\in V} +\\frac{\\sigma_{st}(e)}{\\sigma_{st}} +``. + +where `V`, is the set of nodes, \\frac{\\sigma_{st}} is the number of shortest-paths, and \\frac{\\sigma_{st}(e)} is the number of those paths passing through edge. + +### Optional Arguments +`normalize=true` : If set to true, the edge betweenness values will be normalized by the total number of possible distinct paths between all pairs of nodes in the graph. +For undirected graphs, the normalization factor is calculated as ``2 / (|V|(|V|-1))``, where |V| is the number of vertices. For directed graphs, the normalization factor +is calculated as ``1 / (|V|(|V|-1))``. +`vs=vertices(g)`: A subset of nodes in the graph g for which the edge betweenness centrality is to be estimated. By including more nodes in this subset, +you can achieve a better estimate of the edge betweenness centrality. +`distmx=weights(g)`: The weights of the edges in the graph g represented as a matrix. This argument allows you to specify custom weights for the edges. +The weights can be used to influence the calculation of betweenness centrality, giving higher importance to certain edges over others. +`rng`: A random number generator used for selecting k vertices. This argument allows you to provide a custom random number generator that will be used for the vertex selection process. + + +### References +- Brandes 2001 & Brandes 2008 + +# Examples +```jldoctest +julia> using Graphs + +julia> Matrix(edge_betweenness_centrality(star_graph(5))) +5×5 Matrix{Float64}: + 0.0 0.4 0.4 0.4 0.4 + 0.4 0.0 0.0 0.0 0.0 + 0.4 0.0 0.0 0.0 0.0 + 0.4 0.0 0.0 0.0 0.0 + 0.4 0.0 0.0 0.0 0.0 + + julia> Matrix(edge_betweenness_centrality(path_digraph(6), normalize=false)) + 6×6 Matrix{Float64}: + 0.0 5.0 0.0 0.0 0.0 0.0 + 0.0 0.0 8.0 0.0 0.0 0.0 + 0.0 0.0 0.0 9.0 0.0 0.0 + 0.0 0.0 0.0 0.0 8.0 0.0 + 0.0 0.0 0.0 0.0 0.0 5.0 + 0.0 0.0 0.0 0.0 0.0 0.0 + + julia> g = SimpleGraph(Edge.([(1, 2), (2, 3), (2, 5), (3, 4), (4, 5), (5, 6)])); + julia> distmx = [ + 0.0 2.0 0.0 0.0 0.0 0.0 + 2.0 0.0 4.2 0.0 1.2 0.0 + 0.0 4.2 0.0 5.5 0.0 0.0 + 0.0 0.0 5.5 0.0 0.9 0.0 + 0.0 1.2 0.0 0.9 0.0 0.6 + 0.0 0.0 0.0 0.0 0.6 0.0 + ]; + + julia> Matrix(edge_betweenness_centrality(g; distmx=distmx, normalize=true)) + 6×6 Matrix{Float64}: + 0.0 0.333333 … 0.0 0.0 + 0.333333 0.0 0.533333 0.0 + 0.0 0.266667 0.0 0.0 + 0.0 0.0 0.266667 0.0 + 0.0 0.533333 0.0 0.333333 + 0.0 0.0 … 0.333333 0.0 +""" + +function edge_betweenness_centrality( + g::AbstractGraph; + vs=vertices(g), + distmx::AbstractMatrix=weights(g), + normalize::Bool=true, +) + k = length(vs) + edge_betweenness = spzeros(nv(g), nv(g)) + for source in vs + state = dijkstra_shortest_paths( + g, source, distmx; allpaths=true, trackvertices=true + ) + _accumulate_edges!(edge_betweenness, state) + end + _rescale_e!(edge_betweenness, nv(g), normalize, is_directed(g), k) + + return edge_betweenness +end + +function edge_betweenness_centrality( + g::AbstractGraph, + k::Integer; + distmx::AbstractMatrix=weights(g), + normalize=true, + rng::Union{Nothing,AbstractRNG}=nothing, +) + return edge_betweenness_centrality( + g; + vs=sample(collect_if_not_vector(vertices(g)), k; rng=rng), + distmx=distmx, + normalize=normalize, + ) +end + +function _accumulate_edges!( + edge_betweenness::AbstractSparseMatrix, state::Graphs.AbstractPathState +) + σ = state.pathcounts + pred = state.predecessors + seen = state.closest_vertices + δ = Dict(seen .=> 0.0) + + while length(seen) > 0 + w = pop!(seen) + + coeff = (1.0 + δ[w]) / σ[w] + for v in pred[w] + c = σ[v] * coeff + edge_betweenness[v, w] += c + δ[v] += c + end + end + return nothing +end + +function _rescale_e!( + edge_betweenness::AbstractSparseMatrix, + n::Integer, + normalize::Bool, + directed::Bool, + k::Integer, +) + scale = n / k + if normalize + if n > 1 + scale *= 1 / (n * (n - 1)) + end + if !directed + scale *= 2 + end + end + edge_betweenness .*= scale + return nothing +end diff --git a/test/centrality/edge-betweenness.jl b/test/centrality/edge-betweenness.jl new file mode 100644 index 000000000..881893cca --- /dev/null +++ b/test/centrality/edge-betweenness.jl @@ -0,0 +1,81 @@ + +@testset "Edge Betweenness" begin + rng = StableRNG(1) + # self loops + s1 = GenericGraph(SimpleGraph(Edge.([(1, 2), (2, 3), (3, 3)]))) + s2 = GenericDiGraph(SimpleDiGraph(Edge.([(1, 2), (2, 3), (3, 3)]))) + + g3 = GenericGraph(path_graph(5)) + + @test @inferred(edge_betweenness_centrality(s1)) == + sparse([1, 2, 3, 2], [2, 1, 2, 3], [2 / 3, 2 / 3, 2 / 3, 2 / 3], 3, 3) + @test @inferred(edge_betweenness_centrality(s2)) == + sparse([1, 2], [2, 3], [1 / 3, 1 / 3], 3, 3) + + g = GenericGraph(path_graph(2)) + z = @inferred(edge_betweenness_centrality(g; normalize=true)) + @test z[1, 2] == z[2, 1] == 1.0 + z2 = @inferred(edge_betweenness_centrality(g; vs=vertices(g))) + z3 = @inferred(edge_betweenness_centrality(g, nv(g))) + @test z == z2 == z3 + z = @inferred(edge_betweenness_centrality(g3; normalize=false)) + @test z[1, 2] == z[5, 4] == 4.0 + + ## + # Weighted Graph tests + g = GenericGraph(SimpleGraph(Edge.([(1, 2), (2, 3), (2, 5), (3, 4), (4, 5), (5, 6)]))) + + distmx = [ + 0.0 2.0 0.0 0.0 0.0 0.0 + 2.0 0.0 4.2 0.0 1.2 0.0 + 0.0 4.2 0.0 5.5 0.0 0.0 + 0.0 0.0 5.5 0.0 0.9 0.0 + 0.0 1.2 0.0 0.9 0.0 0.6 + 0.0 0.0 0.0 0.0 0.6 0.0 + ] + + @test isapprox( + nonzeros( + edge_betweenness_centrality(g; vs=vertices(g), distmx=distmx, normalize=false) + ), + [5.0, 5.0, 4.0, 8.0, 4.0, 1.0, 1.0, 4.0, 8.0, 4.0, 5.0, 5.0], + ) + + @test isapprox( + nonzeros( + edge_betweenness_centrality(g; vs=vertices(g), distmx=distmx, normalize=true) + ), + [5.0, 5.0, 4.0, 8.0, 4.0, 1.0, 1.0, 4.0, 8.0, 4.0, 5.0, 5.0] / + (nv(g) * (nv(g) - 1)) * 2, + ) + + adjmx2 = [0 1 0; 1 0 1; 1 1 0] # digraph + a2 = SimpleDiGraph(adjmx2) + + for g in test_generic_graphs(a2) + distmx2 = [Inf 2.0 Inf; 3.2 Inf 4.2; 5.5 6.1 Inf] + c2 = [0.24390243902439027, 0.27027027027027023, 0.1724137931034483] + + @test isapprox( + nonzeros( + edge_betweenness_centrality( + g; vs=vertices(g), distmx=distmx2, normalize=false + ), + ), + [1.0, 1.0, 2.0, 1.0, 2.0], + ) + + @test isapprox( + nonzeros( + edge_betweenness_centrality( + g; vs=vertices(g), distmx=distmx2, normalize=true + ), + ), + [1.0, 1.0, 2.0, 1.0, 2.0] * (1 / 6), + ) + end + # test #1405 / #1406 + g = GenericGraph(grid([50, 50])) + z = edge_betweenness_centrality(g; normalize=false) + @test maximum(z) < nv(g) * (nv(g) - 1) +end diff --git a/test/runtests.jl b/test/runtests.jl index cbb8763bb..4bf800cad 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -31,7 +31,9 @@ end @testset "Code quality (JET.jl)" begin if VERSION >= v"1.9" @assert get_pkg_version("JET") >= v"0.8.4" - JET.test_package(Graphs; target_defined_modules=true, ignore_missing_comparison=true) + JET.test_package( + Graphs; target_defined_modules=true, ignore_missing_comparison=true + ) end end @@ -70,7 +72,7 @@ function test_generic_graphs(g; eltypes=[UInt8, Int16], skip_if_too_large::Bool= SG = is_directed(g) ? SimpleDiGraph : SimpleGraph GG = is_directed(g) ? GenericDiGraph : GenericGraph result = GG[] - for T in eltypes + for T in eltypes if skip_if_too_large && nv(g) > typemax(T) continue end @@ -79,8 +81,11 @@ function test_generic_graphs(g; eltypes=[UInt8, Int16], skip_if_too_large::Bool= return result end -test_large_generic_graphs(g; skip_if_too_large::Bool=false) = test_generic_graphs(g; eltypes=[UInt16, Int32], skip_if_too_large=skip_if_too_large) - +function test_large_generic_graphs(g; skip_if_too_large::Bool=false) + return test_generic_graphs( + g; eltypes=[UInt16, Int32], skip_if_too_large=skip_if_too_large + ) +end tests = [ "simplegraphs/runtests", @@ -124,6 +129,7 @@ tests = [ "community/clique_percolation", "community/assortativity", "community/rich_club", + "centrality/edge-betweenness", "centrality/betweenness", "centrality/closeness", "centrality/degree",