Skip to content

Commit

Permalink
New element-wise trainer
Browse files Browse the repository at this point in the history
  • Loading branch information
guoyongzhi committed Jan 7, 2021
1 parent 042051f commit e2cd980
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 178 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "WordCloud"
uuid = "6385f0a0-cb03-45b6-9089-4e0acc74b26b"
authors = ["guoyongzhi <[email protected]>"]
version = "0.2.6"
version = "0.3.0"

[deps]
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
Expand Down
22 changes: 11 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,6 @@
wordcloud in Julia
---

* [x] 排序 & 预放置
* [x] 基于四叉树碰撞检测
* [x] 根据局部灰度梯度位置调整(训练迭代)
* [x] 引入动量加速训练
* [x] 分代调整以优化性能
* [x] 控制字体大小和填充密度的策略
* [x] 重新放置和缩放的策略
* [x] 文字颜色和方向
* [ ] 并行计算

# Basic Usage
```julia
]add WordCloud
Expand All @@ -30,7 +20,7 @@ wc = wordcloud(
mask = loadmask("res/alice_mask.png", color="#faeef8"),
colors = (WordCloud.colorschemes[:Set1_5].colors..., ),
angles = (0, 90),
filling_rate = 0.6) |> generate!
fillingrate = 0.75) |> generate!
paint(wc, "alice.png", ratio=0.5)
```
![alice](res/alice.png)
Expand All @@ -41,6 +31,16 @@ paint(wc, "alice.png", ratio=0.5)
![compare](res/compare.png)
[Comparison of Obama's and Trump's inaugural address](./examples/compare.jl)

***
* [x] 排序 & 预放置
* [x] 基于四叉树碰撞检测
* [x] 根据局部灰度梯度位置调整(训练迭代)
* [x] 引入动量加速训练
* [x] 分代调整以优化性能
* [x] 控制字体大小和填充密度的策略
* [x] 重新放置和缩放的策略
* [x] 文字颜色和方向
* [ ] 并行计算
***
linux添加中文字体
> mv wqy-microhei.ttc ~/.fonts
Expand Down
2 changes: 1 addition & 1 deletion examples/animation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ weights = df[!, "Column3"]
wc = wordcloud(texts, weights,
# colors = ("red", 0.7, "#00ff00"),
mask=shape(box, 400, 300, 40, color=0.15),
filling_rate=0.65)
fillingrate=0.8)

gifdirectory = "guxiang_animation"
generate_animation!(wc, 100, optimiser=Momentum=1/4, ρ=0.5), patient=10, retry=2, outputdir=gifdirectory)
8 changes: 4 additions & 4 deletions examples/compare.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ stwords = ["us", "will"];
println("==Obama's==")
cs = WordCloud.randomscheme()
as = WordCloud.randomangles()
fr = 0.6 #not too high
fr = 0.65 #not too high
wca = wordcloud(
process(open("res/Barack Obama's First Inaugural Address.txt"), stopwords=WordCloud.stopwords_en stwords),
colors = cs,
angles = as,
filling_rate = fr) |> generate!
fillingrate = fr) |> generate!
println("==Trump's==")
tb, wb = process(open("res/Donald Trump's Inaugural Address.txt"), stopwords=WordCloud.stopwords_en stwords)
samemask = tb .∈ Ref(wca.words)
Expand All @@ -29,7 +29,7 @@ wcb = wordcloud(
mask = wca.mask,
colors = csb,
angles = asb,
filling_rate = fr)
fillingrate = fr)
for i in 1:length(tb)
if samemask[i]
ii = wainds[tb[i]]
Expand Down Expand Up @@ -62,4 +62,4 @@ WordCloud.ImageMagick.save("address_compare/compare.png", [ma space mb])
gif = WordCloud.GIF("address_compare")
record(wca, "Obama", gif)
record(wcb, "Trump", gif)
WordCloud.Render.generate(gif, framerate=0.5)
WordCloud.Render.generate(gif, framerate=1)
28 changes: 28 additions & 0 deletions examples/trainer_metric.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using WordCloud
using Random

println("This test may take several minutes")

words = [Random.randstring(rand(1:8)) for i in 1:200]
weights = randexp(length(words)) .* 2000 .+ rand(20:100, length(words));
wc1 = wordcloud(words, weights, mask=shape(ellipse, 500, 500, color=0.15), angles=(0,90,45), fillingrate=0.7)

words = [Random.randstring(rand(1:8)) for i in 1:500]
weights = randexp(length(words)) .* 2000 .+ rand(20:100, length(words));
wc2 = wordcloud(words, weights, mask=shape(ellipse, 500, 500, color=0.15), angles=(0,90,45))

words = [Random.randstring(rand(1:8)) for i in 1:5000]
weights = randexp(length(words)) .* 2000 .+ rand(20:100, length(words));
wc3 = wordcloud(words, weights, mask=shape(box, 2000, 2000, 100, color=0.15), angles=(0,90,45))

wcs = [wc1, wc1, wc2, wc3] #repeat wc1 to trigger compiling
ts = [WordCloud.trainepoch_E!,WordCloud.trainepoch_EM!,WordCloud.trainepoch_EM2!,
WordCloud.trainepoch_P!,WordCloud.trainepoch_P2!,WordCloud.trainepoch_level!]
for (i,wc) in enumerate(wcs)
println("\n\n", "*"^10, "wordcloud - $(length(wc.words)) words on mask$(size(wc.mask))", "*"^10)
for (j,t) in enumerate(ts)
println("\n", i-1, "==== ", j, "/", length(ts), " ", nameof(t))
placement!(wc)
@time generate!(wc, trainer=t, retry=1)
end
end
37 changes: 23 additions & 14 deletions src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function randommask(color, sz=800)
end
function randomangles()
a = rand((0, (0,90),(0,90,45),(0,-90),(0,-45,-90),-90:90))
println("angles: ", a)
println("angles = ", a)
a
end

Expand Down Expand Up @@ -80,7 +80,7 @@ end
* angles = (0, 90, 45) #choose entries randomly
* angles = 0:180 #choose entries randomly
* angles = [0, 22, 4, 1, 100, 10, ......] #use entries sequentially in cycle
* filling_rate = 0.5
* fillingrate = 0.5
* border = 1
### mask kargs
* mask = loadmask("res/heart.jpg", 256, 256) #see doc of `loadmask`
Expand All @@ -94,7 +94,7 @@ wordcloud(counter::AbstractVector{<:Pair}; kargs...) = wordcloud(first.(counter)

function wordcloud(words::AbstractVector{<:AbstractString}, weights::AbstractVector{<:Real};
colors=randomscheme(), angles=randomangles(), font="",
filling_rate=0.5, border=1, minfontsize=:auto, kargs...)
fillingrate=0.65, border=1, minfontsize=:auto, kargs...)

@assert length(words) == length(weights) > 0
# @show words,weights
Expand Down Expand Up @@ -147,11 +147,11 @@ function wordcloud(words::AbstractVector{<:AbstractString}, weights::AbstractVec
end
weights = weights ./ (sum(weights.^2 .* length.(words)) / length(weights))
params[:weights] = weights
scale = find_weight_scale(words, weights, groundoccupied, border=border, initial_scale=0,
filling_rate=filling_rate, max_iter=5, error=0.03, minfontsize=minfontsize)
scale = find_weight_scale(words, weights, groundoccupied, border=border, initialscale=0,
fillingrate=fillingrate, maxiter=5, error=0.03, minfontsize=minfontsize)
params[:scale] = scale
params[:filling_rate] = filling_rate
println("set filling_rate to $filling_rate, with scale=$scale")
params[:fillingrate] = fillingrate
println("set fillingrate to $fillingrate, with scale=$scale")
imgs, mimgs, qtrees = prepareforeground(words, weights * scale, colors, angles, groundsize,
bgcolor=(0, 0, 0, 0), border=border, font=font, minfontsize=minfontsize);
params[:mimgs] = mimgs
Expand Down Expand Up @@ -195,8 +195,16 @@ function record(wc::wordcloud, label::AbstractString, gif_callback=x->x)
gif_callback(resultpic)
end

function generate!(wc::wordcloud, nepoch::Number=100, args...; retry=3,
trainer=trainepoch_gen!, optimiser=Momentum=1/4, ρ=0.5), patient=10, krags...)
"""
# Positional Args
* wc: the wordcloud to train
* nepoch: training epoch nums
# Keyword Args
* retry: shrink & retrain times, default 3
* patient: number of epochs before teleporting & number of identical teleportation before giving up
* trainer: appoint a training engine
"""
function generate!(wc::wordcloud, args...; retry=3, krags...)
ep, nc = -1, -1
for r in 1:retry
# fr = feelingoccupied(wc.params[:mimgs])/wc.params[:groundoccupied]
Expand All @@ -205,8 +213,7 @@ function generate!(wc::wordcloud, nepoch::Number=100, args...; retry=3,
rescale!(wc, sc)
end
println("#$r. scale = $(wc.params[:scale])")
ep, nc = train_with_teleport!(wc.qtrees, wc.maskqtree, nepoch, args...;
trainer=trainer, optimiser=optimiser, patient=patient, krags...)
ep, nc = train!(wc.qtrees, wc.maskqtree, args...; krags...)
wc.params[:epoch] += ep
if nc == 0
break
Expand All @@ -215,15 +222,17 @@ function generate!(wc::wordcloud, nepoch::Number=100, args...; retry=3,
@show ep, nc
if nc == 0
wc.params[:completed] = true
else
wc.params[:completed] = false
else #check
colllist = first.(listcollision(wc.qtrees, wc.maskqtree))
get_text(i) = i>0 ? wc.words[i] : "#MASK#"
collwords = [(get_text(i), get_text(j)) for (i,j) in colllist]
if length(colllist) > 0
wc.params[:completed] = false
println("have $(length(colllist)) collision.",
" try setting a larger `nepoch` and `retry`, or lower `filling_rate` in `wordcloud` to fix that")
" try setting a larger `nepoch` and `retry`, or lower `fillingrate` in `wordcloud` to fix that")
println("$collwords")
else
wc.params[:completed] = true
end
end
wc
Expand Down
2 changes: 1 addition & 1 deletion src/qtree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ callefttop(t::ShiftedQtree, center) = center .- kernelsize(t) .÷ 2
setcenter!(t::ShiftedQtree, center) = setshift!(t, 1, callefttop(t, center))

function inbounds(bgqt::ShiftedQtree, qt::ShiftedQtree)
inbounds(bgqt[1], (getshift(qt[1]) .+ kernelsize(qt[1]) 2)...)
inbounds(bgqt[1], center(qt)...)
end
function outofbounds(bgqt::ShiftedQtree, qts)
[i for (i,t) in enumerate(qts) if !inbounds(bgqt, t)]
Expand Down
5 changes: 3 additions & 2 deletions src/qtreetools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ function locate!(qts::AbstractVector, loctree::QtreeNode=LocQtreeInt((levelnum(q
end
loctree
end
function locate!(qts::AbstractVector, inds::AbstractVector{Int}, loctree::QtreeNode=LocQtreeInt((levelnum(qts[1]), 1, 1))) #must have same levelnum
function locate!(qts::AbstractVector, inds::Union{AbstractVector{Int}, AbstractSet{Int}},
loctree::QtreeNode=LocQtreeInt((levelnum(qts[1]), 1, 1))) #must have same levelnum
for i in inds
locate!(qts[i], loctree, label=i, newnode=LocQtreeInt)
end
Expand Down Expand Up @@ -353,7 +354,7 @@ function listcollision_qtree(qtrees::AbstractVector, mask::AbstractStackedQtree;
loctree = locate!(mask, loctree, label=0, newnode=LocQtreeInt)
listcollision_qtree(qtrees, mask, loctree; kargs...)
end
function listcollision_qtree(qtrees::AbstractVector, mask::AbstractStackedQtree, inds::AbstractVector{Int}; kargs...)
function listcollision_qtree(qtrees::AbstractVector, mask::AbstractStackedQtree, inds::Union{AbstractVector{Int}, AbstractSet{Int}}; kargs...)
loctree = locate!(qtrees, inds)
loctree = locate!(mask, loctree, label=0, newnode=LocQtreeInt)
listcollision_qtree(qtrees, mask, loctree; kargs...)
Expand Down
56 changes: 18 additions & 38 deletions src/strategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ function box_occupied(imgs::AbstractVector)
return sum(box_occupied, imgs)
end
function feelingoccupied(imgs)
m = length(imgs) ÷ 4
occupied(imgs[1:m]) + box_occupied(imgs[m+1:end]) #兼顾大字的内隙和小字的占据
m = length(imgs) ÷ 100
occupied(imgs[1:m])/4 + 3box_occupied(imgs[1:m])/4 + box_occupied(imgs[m+1:end]) #兼顾大字的内隙和小字的占据
end

function text_occupied(words, weights, scale; font="", border=0, minfontsize=0)
Expand Down Expand Up @@ -65,29 +65,29 @@ function prepareforeground(words, weights, colors, angles, groundsize; bgcolor=(
end

## weight_scale
function cal_weight_scale(words, weights, target; border=0, initial_scale=64, kargs...)
input = initial_scale
function cal_weight_scale(words, weights, target; border=0, initialscale=64, kargs...)
input = initialscale
output = text_occupied(words, weights, input; border=border, kargs...)
# @show input,output
return output, sqrt(target/output) * (input+2border) - 2border# 假设output=k*(input+2border)^2
end

function find_weight_scale(words, weights, ground_size; initial_scale=0, filling_rate=0.3, max_iter=5, error=0.05, kargs...)
if initial_scale <= 0
initial_scale = (ground_size/length(words))
function find_weight_scale(words, weights, ground_size; initialscale=0, fillingrate=0.3, maxiter=5, error=0.05, kargs...)
if initialscale <= 0
initialscale = (ground_size/length(words))
end
@assert sum(weights.^2 .* length.(words)) / length(weights) 1.0
target_lower = (filling_rate - error) * ground_size
target_upper = (filling_rate + error) * ground_size
target_lower = (fillingrate - error) * ground_size
target_upper = (fillingrate + error) * ground_size
step = 0
sc = initial_scale
sc = initialscale
while true
step = step + 1
if step > max_iter
@warn "find_weight_scale reach max_iter. This may be caused by too small background image or too many words or too big `minfontsize`."
if step > maxiter
@warn "find_weight_scale reach maxiter. This may be caused by too small background image or too many words or too big `minfontsize`."
break
end
tg, sc = cal_weight_scale(words, weights, filling_rate * ground_size, initial_scale=sc; kargs...)
tg, sc = cal_weight_scale(words, weights, fillingrate * ground_size, initialscale=sc; kargs...)
@show sc, tg, tg/ground_size
if target_lower <= tg <= target_upper
break
Expand All @@ -112,29 +112,6 @@ function max_collisional_index(qtrees, mask)
nothing
end

# function max_collisional_index_rand(qtrees, mask)
# l = length(qtrees)
# b = l - floor(Int, l / 8 * randexp()) #从末尾1/8起
# getqtree(i) = i==0 ? mask : qtrees[i]
# for i in b:-1:1
# for j in 0:i-1
# cp = collision(getqtree(i), getqtree(j))
# if cp[1] >= 0
# return i
# end
# end
# end
# for i in l:-1:b+1
# for j in 0:i-1
# cp = collision(getqtree(i), getqtree(j))
# if cp[1] >= 0
# return i
# end
# end
# end
# nothing
# end

function max_collisional_index_rand(qtrees, mask; collpool)
l = length(collpool)
b = l - floor(Int, l / 8 * randexp()) #从末尾1/8起
Expand All @@ -156,7 +133,8 @@ function max_collisional_index_rand(qtrees, mask; collpool)
end
return nothing
end
function collisional_indexes_rand(qtrees, mask; collpool)

function collisional_indexes_rand(qtrees, mask, collpool::Vector{Tuple{Int,Int}})
cinds = Vector{Int}()
l = length(collpool)
if l == 0
Expand All @@ -180,7 +158,9 @@ function collisional_indexes_rand(qtrees, mask; collpool)
end
return cinds
end

function collisional_indexes_rand(qtrees, mask, collpool::Vector{QTree.ColItemType})
collisional_indexes_rand(qtrees, mask, first.(collpool))
end
function rescale!(wc::wordcloud, scale::Real)
qts = wc.qtrees
centers = QTree.center.(qts)
Expand Down
Loading

0 comments on commit e2cd980

Please sign in to comment.