[toc]

sge高性能集群的搭建与使用

集群环境的准备

Node1(master) CentOS7.4 iptables/selinux(off) IP:10.180.66.11 hostname:node1 ali yum源
Node2(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.12 hostname:node2 ali yum源
Node3(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.13 hostname:node3 ali yum源
Node4(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.14 hostname:node4 ali yum源
Node5(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.15 hostname:node5 ali yum源

master 节点安装

安装相关依赖包

1
# yum -y install jemalloc-devel openssl-devel ncurses-devel pam-devel libXmu-devel hwloc-devel hwloc hwloc-libs java-devel javacc ant-junit libdb-devel motif-devel csh ksh xterm db4-utils perl-XML-Simple perl-Env xorg-x11-fonts-ISO8859-1-100dpi xorg-x11-fonts-ISO8859-1-75dpi

新建sge管理员用户

1
2
3
#  groupadd -g 490 sgeadmin
# useradd -u 495 -g 490 -r -m -d /home/sgeadmin -s /bin/bash -c "SGE Admin" sgeadmin
# sed -i '/^%wheel/a\%sgeadmin ALL=(ALL) NOPASSWD: ALL' /etc/sudoers

安装sge

sge 链接 密码:c7hy

1
2
3
4
5
# cd /usr/local/src/
# tar -xvf ge2011.11.tar.gz
# mkdir -pv /data
# cp -a ge2011.11 /data/sge
# chown sgeadmin.sgeadmin /data/sge

qmaster 安装自动回答脚本,依赖软件包expect, 所有节点都需要安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# cd /data/sge/
# vim master.sh
#!/bin/bash
user="sgeadmin"
/usr/bin/expect <<-EOF
spawn ./install_qmaster
expect "*>>"
send "
"
expect "*>>"
send "y
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "n
"

expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect eof
EOF
# sh master.sh

修改主节点环境变量

1
2
3
4
5
6
# export SGE_ROOT=/data/sge
# echo 'export SGE_ROOT=/data/sge' >> ~/.bashrc
# echo 'PATH=$PATH:/data/sge/bin/linux-x64/:/data/sge/bin/' >> ~/.bashrc
# cp /data/sge/default/common/settings.sh /etc/profile.d/
# sh /etc/profile.d/settings.sh
# source /etc/profile

添加节点

1
2
3
4
5
# qconf -ah node1
# qconf -ah node2
# qconf -ah node3
# qconf -ah node4
# qconf -ah node5

master 服务器搭建 nfs 服务

所有节点都需要安装

1
# yum -y install nfs-utils

master节点操作

1
2
3
# vim /etc/exports
/data/sge 10.180.66.0/24(rw,sync)
# systemctl restart nfs

slave 节点挂载

(node2,node3,node4,node5)执行

1
2
3
# mkdir  /data/sge -pv
# mount -t nfs 10.180.66.11:/data/sge /data/sge/
# chown sgeadmin.sgeadmin /data/

slave 服务器安装sgeexecd

(node2,node3,node4,node5) 执行

1
2
3
# yum -y install hwloc-devel
# useradd -u 495 -g 490 -r -m -d /home/sgeadmin -s /bin/bash -c "SGE Admin" sgeadmin
# sed -i '/^%wheel/a\%sgeadmin ALL=(ALL) NOPASSWD: ALL' /etc/sudoers

生效环境变量

1
2
3
4
5
6
# echo 'export SGE_ROOT=/data/sge' >> ~/.bashrc
# echo 'PATH=$PATH:/data/sge/bin/linux-x64/:/data/sge/bin/' >> ~/.bashrc
# echo 'export SGE_CELL=default' >> ~/.bashrc
# cp /data/sge/default/common/settings.sh /etc/profile.d/ -a
# source ~/.bashrc
# source /etc/profile

进行安装,所有节点都执行此脚本

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# vim slave.sh
# cat slave.sh
#!/bin/bash
user="sgeadmin"
/usr/bin/expect <<-EOF
spawn ./install_execd

expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect eof
EOF
# sh slave.sh

完成集群搭建

1
2
3
4
5
6
7
8
9
# qhost
HOSTNAME ARCH NCPU LOAD MEMTOT MEMUSE SWAPTO SWAPUS
-------------------------------------------------------------------------------
global - - - - - - -
node1 linux-x64 1 0.01 968.3M 193.0M 2.0G 64.0K
node2 linux-x64 1 0.01 976.3M 151.0M 2.0G 0.0
node3 linux-x64 1 0.02 978.3M 152.2M 2.0G 84.0K
node4 linux-x64 1 0.02 976.3M 155.4M 2.0G 0.0
node5 linux-x64 1 0.01 978.3M 148.5M 2.0G 84.0K

sge集群的使用

评论