抱歉,您的浏览器无法访问本站
本页面需要浏览器支持(启用)JavaScript
了解详情 >

[toc]

sge高性能集群的搭建与使用

集群环境的准备

Node1(master) CentOS7.4 iptables/selinux(off) IP:10.180.66.11 hostname:node1 ali yum源
Node2(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.12 hostname:node2 ali yum源
Node3(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.13 hostname:node3 ali yum源
Node4(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.14 hostname:node4 ali yum源
Node5(slave) CentOS7.4 iptables/selinux(off) IP:10.180.66.15 hostname:node5 ali yum源

master 节点安装

安装相关依赖包

# yum -y install jemalloc-devel openssl-devel ncurses-devel pam-devel libXmu-devel hwloc-devel hwloc hwloc-libs java-devel javacc ant-junit libdb-devel motif-devel csh ksh xterm db4-utils perl-XML-Simple perl-Env xorg-x11-fonts-ISO8859-1-100dpi xorg-x11-fonts-ISO8859-1-75dpi

新建sge管理员用户

#  groupadd -g 490 sgeadmin
# useradd -u 495 -g 490 -r -m  -d /home/sgeadmin -s /bin/bash -c "SGE Admin" sgeadmin
# sed -i '/^%wheel/a\%sgeadmin       ALL=(ALL)       NOPASSWD: ALL' /etc/sudoers

安装sge

sge 链接 密码:c7hy

# cd /usr/local/src/
# tar -xvf ge2011.11.tar.gz
# mkdir -pv /data
# cp -a ge2011.11 /data/sge
# chown sgeadmin.sgeadmin /data/sge

qmaster 安装自动回答脚本,依赖软件包expect, 所有节点都需要安装

# cd /data/sge/
# vim master.sh
#!/bin/bash
user="sgeadmin"
/usr/bin/expect <<-EOF
spawn ./install_qmaster
expect "*>>"
send "
"
expect "*>>"
send "y
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "n
"

expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect eof
EOF
# sh  master.sh

修改主节点环境变量

# export SGE_ROOT=/data/sge
# echo 'export SGE_ROOT=/data/sge' >> ~/.bashrc
# echo 'PATH=$PATH:/data/sge/bin/linux-x64/:/data/sge/bin/' >> ~/.bashrc
# cp /data/sge/default/common/settings.sh  /etc/profile.d/
# sh /etc/profile.d/settings.sh
# source  /etc/profile

添加节点

# qconf -ah node1
# qconf -ah node2
# qconf -ah node3
# qconf -ah node4
# qconf -ah node5

master 服务器搭建 nfs 服务

所有节点都需要安装

# yum -y install nfs-utils

master节点操作

# vim /etc/exports
/data/sge 10.180.66.0/24(rw,sync)
# systemctl restart nfs

slave 节点挂载

(node2,node3,node4,node5)执行

# mkdir  /data/sge -pv
# mount -t nfs 10.180.66.11:/data/sge /data/sge/
# chown sgeadmin.sgeadmin /data/

slave 服务器安装sgeexecd

(node2,node3,node4,node5) 执行

# yum -y install hwloc-devel
# useradd -u 495 -g 490 -r -m  -d /home/sgeadmin -s /bin/bash -c "SGE Admin" sgeadmin
# sed -i '/^%wheel/a\%sgeadmin       ALL=(ALL)       NOPASSWD: ALL' /etc/sudoers

生效环境变量

# echo 'export SGE_ROOT=/data/sge' >> ~/.bashrc
# echo 'PATH=$PATH:/data/sge/bin/linux-x64/:/data/sge/bin/' >> ~/.bashrc
# echo 'export SGE_CELL=default' >> ~/.bashrc
# cp /data/sge/default/common/settings.sh /etc/profile.d/ -a
# source ~/.bashrc
# source /etc/profile

进行安装,所有节点都执行此脚本

# vim slave.sh
# cat slave.sh
#!/bin/bash
user="sgeadmin"
/usr/bin/expect <<-EOF
spawn  ./install_execd

expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect "*>>"
send "
"
expect eof
EOF
# sh slave.sh

完成集群搭建

# qhost
HOSTNAME                ARCH         NCPU  LOAD  MEMTOT  MEMUSE  SWAPTO  SWAPUS
-------------------------------------------------------------------------------
global                  -               -     -       -       -       -       -
node1                   linux-x64       1  0.01  968.3M  193.0M    2.0G   64.0K
node2                   linux-x64       1  0.01  976.3M  151.0M    2.0G     0.0
node3                   linux-x64       1  0.02  978.3M  152.2M    2.0G   84.0K
node4                   linux-x64       1  0.02  976.3M  155.4M    2.0G     0.0
node5                   linux-x64       1  0.01  978.3M  148.5M    2.0G   84.0K

sge集群的使用

评论