Skip to content

Commit 9a21669

Browse files
committed
docker setup update
1 parent d3d63d4 commit 9a21669

File tree

11 files changed

+332
-173
lines changed

11 files changed

+332
-173
lines changed

example/docker/cluster_setup.py

Lines changed: 76 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,117 +1,93 @@
1-
import paramiko
1+
import json
22
import argparse
3+
import paramiko
34
import traceback
5+
import zk
6+
import hdfs
7+
import tera
48

5-
ZK = 'zk'
6-
HDFS = 'hdfs'
9+
class SSH():
10+
def __init__(self):
11+
self.s = paramiko.SSHClient()
12+
self.s.load_system_host_keys()
13+
self.s.set_missing_host_key_policy(paramiko.AutoAddPolicy())
714

8-
def parse_input():
9-
parser = argparse.ArgumentParser()
10-
parser.add_argument('file', type=str, help='A file describes the zk cluster')
11-
args = parser.parse_args()
12-
return args
13-
14-
def read_ip_list(args):
15-
try:
16-
fp = open(args.file, 'r')
17-
except:
18-
traceback.print_exc()
19-
20-
zk_ip_dicts = []
21-
hdfs_ip_dicts = {'master':[], 'slave':[]}
22-
start_port = 2888
23-
end_port = 3888
24-
client_port = 2181
25-
myid = 1
26-
while True:
15+
def run_cmd(self, ip, cmd):
2716
try:
28-
comp = fp.readline().split(' ')
29-
if comp[0].startswith(ZK):
30-
zk_ip_dicts.append([comp[1], {'start_port': str(start_port), 'end_port': str(end_port), 'client_port': str(client_port), 'myid': str(myid), 'path': comp[2][:-1]}])
31-
start_port += 1
32-
end_port += 1
33-
client_port += 1
34-
myid += 1
35-
elif comp[0].startswith(HDFS):
36-
if comp[3].startswith('master'):
37-
hdfs_ip_dicts['master'].append([comp[1], {'path': comp[2]}])
38-
else:
39-
hdfs_ip_dicts['slave'].append([comp[1], {'path': comp[2]}])
40-
else:
41-
break
17+
self.s.connect(ip)
18+
stdin, stdout, stderr = self.s.exec_command(cmd)
19+
self.s.close()
4220
except:
43-
break
44-
if hdfs_ip_dicts['slave'] != [] and hdfs_ip_dicts == []:
45-
print 'must have a master!'
46-
return
47-
return zk_ip_dicts, hdfs_ip_dicts
21+
traceback.print_exc()
22+
return stdin, stdout, stderr
23+
24+
def parse_input():
25+
parser = argparse.ArgumentParser()
26+
parser.add_argument('file', type=str, help='A file describes the zk cluster')
27+
parser.add_argument('--docker', type=str, required=True, help='ID of the docker image')
28+
parser.add_argument('--zk', action='store_true', help='Launch zk')
29+
parser.add_argument('--hdfs', action='store_true', help='Launch hdfs')
30+
parser.add_argument('--tera', action='store_true', help='Launch tera')
31+
args = parser.parse_args()
32+
return args
33+
34+
def config(args):
35+
config = json.load(open(args.file, 'r'))
36+
ip_list = config['ip'].split(':')
37+
log_prefix = config['log_prefix']
38+
zk_cluster = zk.ZkCluster(ip_list, config['zk'], log_prefix)
39+
zk_cluster.populate_zk_cluster()
40+
for z in zk_cluster.cluster:
41+
print z.to_string()
42+
43+
hdfs_cluster = hdfs.HdfsCluster(ip_list, config['hdfs'], log_prefix)
44+
ret = hdfs_cluster.populate_hdfs_cluster()
45+
if ret is False:
46+
exit(1)
47+
for h in hdfs_cluster.cluster:
48+
print h.to_string()
4849

49-
def start_zks(ip_dicts):
50-
if ip_dicts == []:
50+
tera_cluster = tera.TeraCluster(ip_list, config['tera'], log_prefix)
51+
tera_cluster.populate_tera_cluster()
52+
for t in tera_cluster.cluster:
53+
print t.to_string()
54+
55+
return zk_cluster, hdfs_cluster, tera_cluster
56+
57+
def start_zk(args, zk_cluster, s):
58+
if (args.hdfs or args.tera) and not args.zk:
5159
return
52-
s=paramiko.SSHClient()
53-
s.load_system_host_keys()
54-
s.set_missing_host_key_policy(paramiko.AutoAddPolicy())
55-
ips = []
56-
for item in ip_dicts:
57-
ips.append(item[0])
58-
ips = ' '.join(ips)
59-
for details in ip_dicts:
60-
ip = details[0]
61-
details = details[1]
62-
try:
63-
s.connect(ip)
64-
cmd = 'docker run -t -d -v {dir}:/opt/share -p {cport}:{cport} -p {sport}:{sport} -p {eport}:{eport} --net=host d8 /usr/bin/python /opt/zk_setup.py --servers {ip} --port {cport} --myid {myid}'.\
65-
format(dir=details['path'], cport=details['client_port'], sport=details['start_port'], eport=details['end_port'], ip=ips, myid=details['myid'])
66-
stdin, stdout, stderr = s.exec_command(cmd)
67-
print cmd
68-
print stdout.read()
69-
print '\n', stderr.read()
70-
s.close()
71-
except:
72-
traceback.print_exc()
73-
74-
def start_hdfs(ip_dicts):
75-
if ip_dicts == {}:
60+
for zk_instance in zk_cluster.cluster:
61+
#print zk_instance.to_string()
62+
cmd = zk_instance.to_cmd(' '.join(zk_cluster.ip_zk), args.docker)
63+
print cmd
64+
s.run_cmd(zk_instance.ip, cmd)
65+
66+
def start_hdfs(args, hdfs_cluster, s):
67+
if (args.zk or args.tera) and not args.hdfs:
7668
return
77-
s = paramiko.SSHClient()
78-
s.load_system_host_keys()
79-
s.set_missing_host_key_policy(paramiko.AutoAddPolicy())
80-
ips = []
81-
master = ip_dicts['master'][0]
82-
master_ip = master[0]
83-
master_details = master[1]
84-
slave_list = ip_dicts['slave']
85-
for item in slave_list:
86-
ips.append(item[0])
87-
ips = ' '.join(ips)
88-
89-
cmd = 'docker run -t -d -v {dir}:/opt/share -p 9000:9000 -p 9001:9001 --net=host 67 /usr/bin/python /opt/hdfs_setup.py --masters {master} --slaves {slaves} --mode master'.\
90-
format(dir=master_details['path'], master=master_ip, slaves=ips)
91-
print cmd
92-
s.connect(master_ip)
93-
#stdin, stdout, stderr = s.exec_command(cmd)
94-
#print stdout.read()
95-
#print '\n', stderr.read()
96-
s.close()
69+
for hdfs_instance in hdfs_cluster.cluster:
70+
#print hdfs_instance.to_string()
71+
cmd = hdfs_instance.to_cmd(args.docker, hdfs_cluster.master_ip, ' '.join(hdfs_cluster.slave_ip))
72+
print cmd
73+
s.run_cmd(hdfs_instance.ip, cmd)
9774

98-
for slave in slave_list:
99-
slave_ip = slave[0]
100-
slave_details = slave[1]
101-
cmd = 'docker run -t -d -v {dir}:/opt/share -p 9000:9000 -p 9001:9001 --net=host 67 /usr/bin/python /opt/hdfs_setup.py --masters {master} --slaves {slaves} --mode slave'.\
102-
format(dir=slave_details['path'], master=master_ip, slaves=ips)
75+
def start_tera(args, tera_cluster, zk_cluster, s):
76+
if (args.zk or args.hdfs) and not args.tera:
77+
return
78+
for tera_instance in tera_cluster.cluster:
79+
#print tera_instance.to_string()
80+
cmd = tera_instance.to_cmd(args.docker, ','.join(zk_cluster.ip_tera))
10381
print cmd
104-
s.connect(slave_ip)
105-
#stdin, stdout, stderr = s.exec_command(cmd)
106-
#print stdout.read()
107-
#print '\n', stderr.read()
108-
s.close()
82+
s.run_cmd(tera_instance.ip, cmd)
10983

11084
def main():
11185
args = parse_input()
112-
zk_ip_dicts, hdfs_ip_dicts = read_ip_list(args)
113-
start_zks(zk_ip_dicts)
114-
start_hdfs(hdfs_ip_dicts)
86+
zk_cluster, hdfs_cluster, tera_cluster = config(args)
87+
s = SSH()
88+
start_zk(args, zk_cluster, s)
89+
start_hdfs(args, hdfs_cluster, s)
90+
start_tera(args, tera_cluster, zk_cluster, s)
11591

11692
if __name__ == '__main__':
11793
main()

example/docker/dockerfile

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
FROM debian:jessie
2+
3+
RUN apt-get update && apt-get install -y openjdk-7-jre-headless wget
4+
RUN wget -q -O - http://apache.mirrors.pair.com/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz | tar -xzf - -C /opt \
5+
&& mv /opt/zookeeper-3.4.6 /opt/zookeeper \
6+
&& cp /opt/zookeeper/conf/zoo_sample.cfg /opt/zookeeper/conf/zoo.cfg \
7+
&& mkdir -p /tmp/zookeeper
8+
9+
ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64
10+
11+
RUN apt-get -y install python
12+
RUN apt-get -y install python-dev
13+
RUN apt-get -y install gcc
14+
RUN apt-get -y install make
15+
RUN cd /opt/zookeeper/src/c && ./configure && make && make install
16+
17+
ADD workspace /opt/workspace
18+
19+
RUN wget https://bootstrap.pypa.io/ez_setup.py -O - | python
20+
RUN cd /opt/workspace/argparse-1.3.0/ && python setup.py install
21+
RUN cd /opt/workspace/zkpython-0.4/ && python setup.py install
22+
RUN cp /opt/workspace/zk_setup.py /opt/
23+
RUN mkdir /opt/share
24+
25+
ENV LD_LIBRARY_PATH /usr/local/lib/
26+
27+
WORKDIR /opt/
28+
29+
ADD hadoop-1.2.1 /opt/hadoop-1.2.1
30+
RUN cp /opt/hadoop-1.2.1/hdfs_setup.py /opt
31+
RUN /opt/hadoop-1.2.1/bin/hadoop-config.sh
32+
33+
RUN apt-get -y install build-essential
34+
RUN cd /opt/workspace/zlib-1.2.8 && ./configure && make && make install
35+
RUN apt-get -y install libssl-dev
36+
ENV PATH /usr/bin:/bin:/opt/hadoop-1.2.1/bin:/usr/local/sbin:/usr/sbin:/sbin
37+
ENV LD_LIBRARY_PATH $JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64/:/opt/hadoop-1.2.1/c++/Linux-amd64-64/lib:/usr/local/lib/
38+
RUN cp /opt/workspace/ifconfig /sbin/
39+
40+
ADD terawork /opt/tera
41+
RUN mv /opt/tera/bin/tera_setup.py /opt

example/docker/hdfs.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import time
2+
import os
3+
4+
class Hdfs:
5+
def __init__(self, ip, mode, log_prefix):
6+
self.ip = ip
7+
self.mode = mode
8+
self.path = self.get_log_path(log_prefix)
9+
10+
def get_log_path(self, log_prefix):
11+
path = '{pre}/hdfs/{ip}-{mode}-{time}'.format(pre=log_prefix, ip=self.ip, mode=self.mode, time=time.strftime('%Y%m%d%H%M%S'))
12+
os.makedirs(path)
13+
return path
14+
15+
def to_string(self):
16+
info = 'hdfs\t{ip}:\t{mode}\tlog:{log}'.format(ip=self.ip, mode=self.mode, log=self.path)
17+
return info
18+
19+
def to_cmd(self, docker, masters, slaves):
20+
cmd = 'docker run -t -d -v {dir}:/opt/share -p 9000:9000 -p 9001:9001 --net=host {docker} /usr/bin/python /opt/hdfs_setup.py --masters {master} --slaves {slaves} --mode {mode}'.\
21+
format(dir=self.path, docker=docker, master=masters, slaves=slaves, mode=self.mode)
22+
return cmd
23+
24+
class HdfsCluster:
25+
def __init__(self, ip_list, num_of_hdfs, log_prefix):
26+
self.ip_list = ip_list
27+
self.ip_index = 0
28+
self.num_of_hdfs = num_of_hdfs
29+
self.cluster = []
30+
self.log_prefix = log_prefix
31+
self.master_ip = self.ip_list[0]
32+
self.slave_ip = []
33+
34+
def add_hdfs(self):
35+
hdfs = Hdfs(self.ip_list[self.ip_index], 'slave', self.log_prefix)
36+
self.cluster.append(hdfs)
37+
self.slave_ip.append(hdfs.ip)
38+
self.ip_index += 1
39+
40+
def populate_hdfs_cluster(self):
41+
if self.num_of_hdfs > len(self.ip_list):
42+
print 'not enough ip address for hdfs!!'
43+
return False
44+
master = Hdfs(self.ip_list[0], 'master', self.log_prefix)
45+
self.cluster.append(master)
46+
for i in range(self.num_of_hdfs):
47+
self.add_hdfs()

example/docker/hdfs_setup.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import subprocess
22
import argparse
3+
import time
34
from xml.dom.minidom import Document
45

5-
conf_path_prefix = '/home/ubuntu/leiliyuan/hadoop-1.2.1/conf/'
6-
share_path_prefix = '/home/ubuntu/leiliyuan/share/'
6+
conf_path_prefix = '/opt/hadoop-1.2.1/conf/'
7+
share_path_prefix = '/opt/share/'
78

89
def parse_input():
910
parser = argparse.ArgumentParser()
@@ -54,7 +55,7 @@ def write_core_site_xml(args):
5455

5556
def write_hdfs_site_xml():
5657
doc, hdfssite = get_doc()
57-
prop = write_property(doc, 'dfs.data.dir', share_path_prefix + 'name', True)
58+
prop = write_property(doc, 'dfs.name.dir', share_path_prefix + 'name', True)
5859
hdfssite.appendChild(prop)
5960
prop = write_property(doc, 'dfs.data.dir', share_path_prefix + 'data', True)
6061
hdfssite.appendChild(prop)
@@ -91,7 +92,7 @@ def write_maters_slaves(args):
9192
f.close()
9293

9394
def start_hdfs(args):
94-
cmd_prefix = '/home/ubuntu/leiliyuan/hadoop-1.2.1/bin/hadoop-daemon.sh --config /home/ubuntu/leiliyuan/hadoop-1.2.1/'
95+
cmd_prefix = '/opt/hadoop-1.2.1/bin/hadoop-daemon.sh --config /opt/hadoop-1.2.1/'
9596
if args.mode == 'master':
9697
p = subprocess.Popen('/opt/hadoop-1.2.1/bin/hadoop namenode -format', stdout=subprocess.PIPE, shell=True)
9798
print p.stdout.read()
@@ -103,14 +104,19 @@ def start_hdfs(args):
103104
p = subprocess.Popen(cmd_prefix + 'conf start datanode', stdout=subprocess.PIPE, shell=True)
104105
print p.stdout.read()
105106

107+
def doing_nothing():
108+
while True:
109+
time.sleep(1000)
110+
106111
def main():
107112
args = parse_input()
108113
write_core_site_xml(args)
109114
write_hdfs_site_xml()
110115
#write_mapred_site_xml(args)
111116
write_hadoop_env()
112117
write_maters_slaves(args)
113-
#start_hdfs(args)
118+
start_hdfs(args)
119+
doing_nothing()
114120

115121
if __name__ == '__main__':
116122
main()

example/docker/master

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
3+
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$JAVA_HOME/jre/lib/amd64/:/home/ubuntu/leiliyuan/hadoop-1.2.1/lib:$(hadoop classpath)
4+
cd /opt/tera/bin
5+
nohup ./tera_main --flagfile=../conf/tera.flag --tera_role=master &> ../../share/log/master.stderr &

example/docker/tabletnode

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/bin/bash
2+
3+
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib:$JAVA_HOME/jre/lib/amd64/:/home/ubuntu/leiliyuan/hadoop-1.2.1/lib:$(hadoop classpath)
4+
cd /opt/tera/bin
5+
nohup ./tera_main --flagfile=../conf/tera.flag --tera_role=tabletnode &> ../../share/log/tabletserver.stderr &

example/docker/tera.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import time
2+
import os
3+
4+
class Tera:
5+
def __init__(self, ip, port, mode, log_prefix):
6+
self.ip = ip
7+
self.port = port
8+
self.mode = mode
9+
self.path = self.get_log_path(log_prefix)
10+
11+
def get_log_path(self, log_prefix):
12+
path = '{pre}/tera/{ip}:{port}-{mode}-{time}'.format(pre=log_prefix, ip=self.ip, port=self.port,
13+
mode=self.mode, time=time.strftime('%Y%m%d%H%M%S'))
14+
os.makedirs(path)
15+
return path
16+
17+
def to_string(self):
18+
info = 'tera\t{ip}:{port}\t{mode}\tlog:{log}'.format(ip=self.ip, port=self.port, mode=self.mode, log=self.path)
19+
return info
20+
21+
def to_cmd(self, docker, zk):
22+
cmd = 'docker run -t -d -v {dir}:/opt/share -p {port}:{port} --net=host {docker} /usr/bin/python /opt/tera_setup.py --zk {zk} --port {port} --mode {mode}'.\
23+
format(dir=self.path, port=self.port, zk=zk, docker=docker, mode=self.mode)
24+
return cmd
25+
26+
class TeraCluster():
27+
def __init__(self, ip_list, num_of_tera, log_prefix):
28+
self.ip_list = ip_list
29+
self.ip_index = 0
30+
self.port = 2200
31+
self.num_of_tera = num_of_tera
32+
self.log_prefix = log_prefix
33+
self.cluster = []
34+
35+
def add_tera(self):
36+
tera = Tera(self.ip_list[self.ip_index % len(self.ip_list)], str(self.port), 'tabletnode', self.log_prefix)
37+
self.cluster.append(tera)
38+
self.ip_index += 1
39+
self.port += 1
40+
41+
def populate_tera_cluster(self):
42+
for i in range(self.num_of_tera):
43+
self.add_tera()
44+
master = Tera(self.ip_list[0], '1100', 'master', self.log_prefix)
45+
self.cluster.append(master)

0 commit comments

Comments
 (0)