Skip to content

Commit

Permalink
Fix temporal order of edges for directed networks
Browse files Browse the repository at this point in the history
In the course of PR se-sic#137, it occurred that the temporal order of edges
was broken in directed networks. The cause of this error is the
technical implementation to obtain all combinations of items in a
key-value set: The call 'combn(nodes, 2)' in the function
'construct.edge.list.from.key.value.list' does not care for the order of
the items, although we need to care.

To fix this problem, we need to extract the edge list for each item in
an item combination separately and use the second one as the receiver of
the edge.

Additionally, the function 'construct.edge.list.from.key.value.list' is
adapted to match the coding conventions:
- access the vertex column via its name 'data.vertices',
- use 'seq_len' instead of the ':' operator, and
- add curly braces for a single-line if-statement.

Note: This temporarily breaks some tests due to igraph's normalization
on undirected edge lists which reorders some edges (apparently)
randomly.

Signed-off-by: Claus Hunsen <[email protected]>
  • Loading branch information
clhunsen authored and fehnkera committed Sep 23, 2020
1 parent 7309bc7 commit 9feae29
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 17 deletions.
4 changes: 2 additions & 2 deletions tests/test-networks-author.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ test_that("Network construction of the directed author-cochange network without
type = TYPE.AUTHOR)

## edge attributes
data = data.frame(comb.1. = c("Björn", "Björn", "Olaf", "Olaf", "Olaf", "Olaf", "Karl", "Karl"),
comb.2. = c("Olaf", "Olaf", "Karl", "Karl", "Thomas", "Thomas", "Thomas", "Thomas"),
data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl", "Olaf", "Thomas", "Karl", "Thomas"),
comb.2. = c("Olaf", "Björn", "Karl", "Olaf", "Thomas", "Olaf", "Thomas", "Karl"),
date = get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", "2016-07-12 16:05:41",
"2016-07-12 16:06:10", "2016-07-12 16:05:41", "2016-07-12 16:06:32",
"2016-07-12 16:06:10", "2016-07-12 16:06:32")),
Expand Down
39 changes: 24 additions & 15 deletions util-networks.R
Original file line number Diff line number Diff line change
Expand Up @@ -881,7 +881,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed

## for all subsets (sets), connect all items in there with the previous ones
edge.list.data = parallel::mclapply(list, function(set) {
number.edges = sum(0:(nrow(set) - 1))
number.edges = sum(seq_len(nrow(set)) - 1)
logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).",
match(attr(set, "group.name"), keys), keys.number,
attr(set, "group.type"), attr(set, "group.name"), number.edges)
Expand All @@ -898,20 +898,21 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed
nodes.processed.set = c()

## connect the current item to all previous ones
for (item.no in 1:nrow(set)) {
for (item.no in seq_len(nrow(set))) {
item = set[item.no, ]

## get vertex data
item.node = item[, 1]
item.node = item[["data.vertices"]]

## get edge attributes
cols.which = network.conf$get.value("edge.attributes") %in% colnames(item)
item.edge.attrs = item[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]

## construct edges
combinations = expand.grid(item.node, nodes.processed.set, stringsAsFactors = default.stringsAsFactors())
if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1)
if (nrow(combinations) > 0 & nrow(item.edge.attrs) == 1) {
combinations = cbind(combinations, item.edge.attrs, row.names = NULL) # add edge attributes
}
edge.list.set = rbind(edge.list.set, combinations) # add to edge list

## mark current item as processed
Expand All @@ -933,7 +934,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed

## for all items in the sublists, construct the cartesian product
edge.list.data = parallel::mclapply(list, function(set) {
number.edges = sum(table(set[, 1]) * (dim(table(set[, 1])) - 1))
number.edges = sum(table(set[["data.vertices"]]) * (dim(table(set[["data.vertices"]])) - 1))
logging::logdebug("[%s/%s] Constructing edges for %s '%s': starting (%s edges to construct).",
match(attr(set, "group.name"), keys), keys.number,
attr(set, "group.type"), attr(set, "group.name"), number.edges)
Expand All @@ -946,7 +947,7 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed
}

## get vertex data
nodes = unique(set[, 1])
nodes = unique(set[["data.vertices"]])

## break if there is no author
if (length(nodes) < 1) {
Expand All @@ -965,18 +966,26 @@ construct.edge.list.from.key.value.list = function(list, network.conf, directed

## construct edge list
edges = apply(combinations, 2, function(comb) {
## basic edge data
edge = data.frame(comb[1], comb[2])

## get edge attibutes
edge.attrs = set[ set[, 1] %in% comb, ] # get data for current combination
cols.which = network.conf$get.value("edge.attributes") %in% colnames(edge.attrs)
edge.attrs = edge.attrs[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]
## iterate over each item of the current combination
edges.by.comb.item = lapply(comb, function(comb.item) {
## basic edge data
edge = data.frame(comb.item, comb[comb != comb.item])

## get edge attibutes
edge.attrs = set[set[["data.vertices"]] %in% comb.item, ] # get data for current combination item
cols.which = network.conf$get.value("edge.attributes") %in% colnames(edge.attrs)
edge.attrs = edge.attrs[, network.conf$get.value("edge.attributes")[cols.which], drop = FALSE]

# add edge attributes to edge list
edgelist = cbind(edge, edge.attrs)
return(edgelist)
})

## add edge attributes to edge list
edgelist = cbind(edge, edge.attrs)
## union the edge lists for the combination items
edges.union = plyr::rbind.fill(edges.by.comb.item)
return(edges.union)

return(edgelist)
})
edges = plyr::rbind.fill(edges)

Expand Down

0 comments on commit 9feae29

Please sign in to comment.