30 from typing
import List, Dict, Any
31 from cortx.utils.conf_store
import Conf
32 from cortx.utils.cortx
import Const
34 MOTR_SERVER_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-start" 35 MOTR_MKFS_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-mkfs" 36 MOTR_FSM_SCRIPT_PATH =
"/usr/libexec/cortx-motr/motr-free-space-monitor" 37 MOTR_CONFIG_SCRIPT =
"/opt/seagate/cortx/motr/libexec/motr_cfg.sh" 38 LNET_CONF_FILE =
"/etc/modprobe.d/lnet.conf" 39 LIBFAB_CONF_FILE =
"/etc/libfab.conf" 40 SYS_CLASS_NET_DIR =
"/sys/class/net/" 41 MOTR_SYS_CFG =
"/etc/sysconfig/motr" 42 MOTR_WORKLOAD_DIR =
"/opt/seagate/cortx/motr/workload" 44 LOGFILE =
"/var/log/seagate/motr/mini_provisioner" 45 LOGDIR =
"/var/log/seagate/motr" 46 LOGGER =
"mini_provisioner" 47 IVT_DIR =
"/var/log/seagate/motr/ivt" 48 MOTR_LOG_DIR =
"/var/motr" 51 MOTR_LOG_DIRS = [LOGDIR, MOTR_LOG_DIR]
52 BE_LOG_SZ = 4*1024*1024*1024
53 BE_SEG0_SZ = 128 * 1024 *1024
55 MACHINE_ID_FILE =
"/etc/machine-id" 56 TEMP_FID_FILE=
"/opt/seagate/cortx/motr/conf/service_fid.yaml" 58 MEM_THRESHOLD = 4*1024*1024*1024
59 CVG_COUNT_KEY =
"num_cvg" 62 """ Generic Exception with error code and output """ 66 self.
_desc = message % (args)
69 return f
"error[{self._rc}]: {self._desc}" 72 verbose = False, retries = 1, stdin = None, logging=False):
73 ps = subprocess.Popen(cmd, stdin=subprocess.PIPE,
74 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
77 ps.stdin.write(stdin.encode())
78 stdout, stderr = ps.communicate(timeout=timeout_secs);
79 stdout = str(stdout,
'utf-8')
82 if ps.returncode != 0:
83 raise MotrError(ps.returncode, f
"\"{cmd}\" command execution failed")
89 logger = logging.getLogger(
"console")
90 if not os.path.exists(LOGDIR):
92 os.makedirs(LOGDIR, exist_ok=
True)
93 with open(f
'{self.logfile}',
'w'):
pass 95 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
97 if not os.path.exists(self.logfile):
99 with open(f
'{self.logfile}',
'w'):
pass 101 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
102 logger.setLevel(logging.DEBUG)
104 fh = logging.FileHandler(self.logfile)
105 fh.setLevel(logging.DEBUG)
107 ch = logging.StreamHandler()
108 ch.setLevel(logging.INFO)
109 formatter = logging.Formatter(
'%(asctime)s - %(message)s')
110 fh.setFormatter(formatter)
111 ch.setFormatter(formatter)
112 logger.addHandler(fh)
113 logger.addHandler(ch)
114 logger.info(f
"executing command {command}")
116 process = subprocess.Popen(command, stdin=subprocess.PIPE,
117 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
119 except Exception
as e:
120 logger.error(
"ERROR {} when running {} with exception {}".
format(sys.exc_info()[1],
124 stdout = process.stdout.readline()
125 if process.poll()
is not None:
128 logger.info(stdout.strip().
decode())
133 def execute_command(self, cmd, timeout_secs = TIMEOUT_SECS, verbose = False,
134 retries = 1, stdin = None, logging=True):
140 for i
in range(retries):
142 self.logger.info(f
"Retry: {i}. Executing cmd: '{cmd}'")
144 ps = subprocess.Popen(cmd, stdin=subprocess.PIPE,
145 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
148 ps.stdin.write(stdin.encode())
149 stdout, stderr = ps.communicate(timeout=timeout_secs);
150 stdout = str(stdout,
'utf-8')
153 self.logger.info(f
"ret={ps.returncode}\n")
155 if (self._debug
or verbose)
and (logging ==
True):
156 self.logger.debug(f
"[CMD] {cmd}\n")
157 self.logger.debug(f
"[OUT]\n{stdout}\n")
158 self.logger.debug(f
"[RET] {ps.returncode}\n")
159 if ps.returncode == 0:
162 if ps.returncode != 0:
163 raise MotrError(ps.returncode, f
"\"{cmd}\" command execution failed")
164 return stdout, ps.returncode
168 def execute_command_verbose(self, cmd, timeout_secs = TIMEOUT_SECS, verbose = False, set_timeout=True, retry_count = CMD_RETRY_COUNT):
169 self.logger.info(f
"Executing cmd : '{cmd}' \n")
171 if set_timeout ==
False:
175 for cmd_retry_count
in range(retry_count):
176 ps = subprocess.run(cmd, stdin=subprocess.PIPE,
177 stdout=subprocess.PIPE, timeout=timeout_secs,
178 stderr=subprocess.PIPE, shell=
True)
179 self.logger.info(f
"ret={ps.returncode}")
180 self.logger.debug(f
"Executing {cmd_retry_count} time")
181 stdout = ps.stdout.decode(
'utf-8')
182 self.logger.debug(f
"[OUT]{stdout}")
183 self.logger.debug(f
"[ERR]{ps.stderr.decode('utf-8')}")
184 self.logger.debug(f
"[RET] {ps.returncode}")
185 if ps.returncode != 0:
186 time.sleep(cmd_retry_delay)
188 return stdout, ps.returncode
192 for i
in range(retries):
193 self.logger.info(f
"Retry: {i}. Executing cmd : '{cmd}'\n")
194 ps = subprocess.run(
list(cmd.split(
' ')), timeout=timeout_secs)
195 self.logger.info(f
"ret={ps.returncode}\n")
196 if ps.returncode == 0:
202 if not isinstance(var, vtype):
203 raise MotrError(errno.EINVAL, f
"Invalid {msg} type. Expected: {vtype}")
205 raise MotrError(errno.EINVAL, f
"Empty {msg}.")
209 self.machine_id = Conf.machine_id
210 if not os.path.exists(f
"{MACHINE_ID_FILE}"):
212 with open(f
"{MACHINE_ID_FILE}",
"w")
as fp:
213 fp.write(f
"{self.machine_id}\n")
215 op =
execute_command(self, f
"cat {MACHINE_ID_FILE}", logging=
False)[0].strip(
"\n")
216 if op != self.machine_id:
217 raise MotrError(errno.EINVAL,
"machine id does not match")
219 raise MotrError(errno.ENOENT,
"machine id not available in conf")
222 """Get current node name using machine-id.""" 224 machine_id = self.machine_id;
225 server_node = Conf.get(self._index,
'node')[machine_id]
227 raise MotrError(errno.EINVAL, f
"MACHINE_ID {machine_id} does not exist in ConfStore")
234 suffixes = [
'K',
'Ki',
'Kib',
'M',
'Mi',
'Mib',
'G',
'Gi',
'Gib']
236 "K": 1024,
"M": 1024*1024,
"G": 1024*1024*1024,
237 "Ki": 1024,
"Mi": 1024*1024,
"Gi": 1024*1024*1024,
238 "Kib": 1024,
"Mib": 1024*1024,
"Gib": 1024*1024*1024 }
244 num_sz = re.sub(
r'[^0-9]',
'', sz)
245 map_val = sz_map[suffix]
246 ret =
int(num_sz) *
int(map_val)
249 self.logger.
error(f
"Invalid format of mem limit: {sz}\n")
250 self.logger.
error(
"Please use valid format Ex: 1024, 1Ki, 1Mi, 1Gi etc..\n")
255 sevices_limits = Conf.get(self._index,
'cortx>motr>limits')[
'services']
258 self.setup_size =
"small" 262 if service
not in [
"ioservice",
"ios",
"io",
"all",
"confd"]:
263 self.setup_size =
"small" 264 self.logger.info(f
"service is {service}. So seting setup size to {self.setup_size}\n")
270 if service
in [
"io",
"ioservice"]:
274 for arr_elem
in sevices_limits:
276 if arr_elem[
'name'] == svc:
277 min_mem = arr_elem[
'memory'][
'min']
279 if min_mem.isnumeric():
284 self.logger.info(f
"mem limit in config is {min_mem} i.e. {sz}\n")
291 elif sz > MEM_THRESHOLD:
292 self.setup_size =
"large" 293 self.logger.info(f
"setup_size set to {self.setup_size}\n")
297 self.setup_size =
"small" 298 self.logger.info(f
"setup_size set to {self.setup_size}\n")
302 raise MotrError(errno.EINVAL, f
"Setup size is not set properly for service {service}." 303 f
"Please update valid mem limits for {service}")
305 self.logger.info(f
"service={service} and setup_size={self.setup_size}\n")
311 val = Conf.get(self._index, key)
313 raise MotrError(errno.EINVAL,
"{key} does not exist in ConfStore")
319 """Get logical_node_class.""" 321 logical_node_class = self.cluster[
'logical_node_class']
323 raise MotrError(errno.EINVAL, f
"{logical_node_class} does not exist in ConfStore")
324 check_type(logical_node_class, list,
"logical_node_class")
325 return logical_node_class
328 storage = self.node[
'storage']
333 for service
in services:
334 self.logger.info(f
"Restarting {service} service\n")
335 cmd = f
"systemctl stop {service}" 337 cmd = f
"systemctl start {service}" 339 cmd = f
"systemctl status {service}" 343 if not os.path.exists(file):
344 raise MotrError(errno.ENOENT, f
"{file} does not exist")
349 if not os.path.exists(file):
350 raise MotrError(errno.ENOENT, f
"{file} does not exist")
355 if not os.path.exists(entry):
356 cmd = f
"mkdir -p {entry}" 361 node_type = self.server_node[
'type']
363 raise MotrError(errno.EINVAL,
"node_type not found")
366 if node_type ==
"HW":
373 1. check m0tr.ko exists in current kernel modules 374 2. check /etc/sysconfig/motr 379 kernel_ver = op.replace(
'\n',
'')
382 kernel_module = f
"/lib/modules/{kernel_ver}/kernel/fs/motr/m0tr.ko" 383 self.logger.info(f
"Checking for {kernel_module}\n")
386 self.logger.info(f
"Checking for {MOTR_SYS_CFG}\n")
392 with open(f
"{MOTR_SYS_CFG}",
"r") as fp: 395 num_lines = len(lines)
396 self.logger.info(f
"Before update, in file {fname}, num_lines={num_lines}\n")
399 for (k, v)
in kv_list:
401 for lno
in range(num_lines):
403 if lines[lno].startswith(f
"{k}="):
404 lines[lno] = f
"{k}={v}\n" 409 lines.append(f
"{k}={v}\n")
412 num_lines = len(lines)
413 self.logger.info(f
"After update, in file {fname}, num_lines={num_lines}\n")
416 with open(f
"{MOTR_SYS_CFG}",
"w+")
as fp:
421 local_path = self.local_path
422 log_path = self.log_path
423 machine_id = self.machine_id
425 MOTR_M0D_DATA_DIR = f
"{local_path}/motr" 426 if not os.path.exists(MOTR_M0D_DATA_DIR):
428 MOTR_LOCAL_SYSCONFIG_DIR = f
"{MOTR_M0D_DATA_DIR}/sysconfig" 429 if not os.path.exists(MOTR_LOCAL_SYSCONFIG_DIR):
432 MOTR_M0D_CONF_DIR = f
"{MOTR_LOCAL_SYSCONFIG_DIR}/{machine_id}" 433 MOTR_M0D_CONF_XC = f
"{MOTR_M0D_CONF_DIR}/confd.xc" 434 MOTR_M0D_ADDB_STOB_DIR = f
"{log_path}/motr/{machine_id}/addb" 435 MOTR_M0D_TRACE_DIR = f
"{log_path}/motr/{machine_id}/trace" 437 dirs = [MOTR_M0D_DATA_DIR, MOTR_M0D_ADDB_STOB_DIR, MOTR_M0D_TRACE_DIR, MOTR_M0D_CONF_DIR]
441 config_kvs = [(
"MOTR_M0D_CONF_DIR", f
"{MOTR_M0D_CONF_DIR}"),
442 (
"MOTR_M0D_DATA_DIR", f
"{MOTR_M0D_DATA_DIR}"),
443 (
"MOTR_M0D_CONF_XC", f
"{MOTR_M0D_CONF_XC}"),
444 (
"MOTR_M0D_ADDB_STOB_DIR", f
"{MOTR_M0D_ADDB_STOB_DIR}"),
445 (
"MOTR_M0D_TRACE_DIR", f
"{MOTR_M0D_TRACE_DIR}")]
450 cmd = f
"cp {MOTR_SYS_CFG} {MOTR_M0D_CONF_DIR}" 460 cvg_count = node_info[
'storage'][CVG_COUNT_KEY]
461 cvg = node_info[
'storage'][
'cvg']
462 for i
in range(cvg_count):
464 if temp_cvg[
'devices'][
'metadata']:
465 md_disks_lists.append(temp_cvg[
'devices'][
'metadata'])
466 self.logger.info(f
"md_disks lists on node = {md_disks_lists}\n")
467 return md_disks_lists
477 md_len_outer = len(md_lists)
478 for i
in range(md_len_outer):
479 md_len_innner = len(md_lists[i])
480 for j
in range(md_len_innner):
481 md_disks.append(md_lists[i][j])
482 self.logger.info(f
"md_disks on node = {md_disks}\n")
487 ncvgs = len(md_disks)
488 for i
in range(ncvgs):
491 for j
in range(len_md):
493 self.logger.info(f
"setting key server>{machine_id}>cvg[{i}]>m0d[{j}]>md_seg1" 494 f
" with value {md_disk} in {url}")
495 Conf.set(index, f
"server>{machine_id}>cvg[{i}]>m0d[{j}]>md_seg1",f
"{md_disk}")
500 machines: Dict[str,Any] = self.nodes
501 storage_nodes: List[str] = []
502 services = Conf.search(self._index,
'node',
'services', Const.SERVICE_MOTR_IO.value)
503 for machine_id
in machines.keys():
504 result = [svc
for svc
in services
if machine_id
in svc]
507 storage_nodes.append(machine_id)
512 for machine_id
in self.storage_nodes:
513 node_info = nodes.get(machine_id)
515 update_to_file(self, self._index_motr_hare, self._url_motr_hare, machine_id, md_disks_lists)
519 raise MotrError(errno.EINVAL,
"libfabric is not up.")
521 if self.machine_id
not in self.storage_nodes:
529 if self.setup_size ==
"large":
530 cmd =
"{} {}".
format(MOTR_CONFIG_SCRIPT,
" -c")
546 transport_type = self.server_node[
'network'][
'data'][
'transport_type']
548 raise MotrError(errno.EINVAL,
"transport_type not found")
550 check_type(transport_type, str,
"transport_type")
552 if transport_type ==
"lnet":
554 raise MotrError(errno.EINVAL,
"lent is not up.")
555 elif transport_type ==
"libfabric":
557 raise MotrError(errno.EINVAL,
"libfabric is not up.")
561 self.logger.info(f
"Executing {MOTR_CONFIG_SCRIPT}")
565 """Wrapper function to detect lnet/libfabric transport.""" 567 transport_type = Conf.get(self._index,
'cortx>motr>transport_type')
569 raise MotrError(errno.EINVAL,
"transport_type not found")
571 check_type(transport_type, str,
"transport_type")
573 if transport_type ==
"lnet":
575 elif transport_type ==
"libfab":
578 raise MotrError(errno.EINVAL,
"Unknown data transport type\n")
582 Get iface and /etc/modprobe.d/lnet.conf params from 583 conf store. Configure lnet. Start lnet service 586 iface = self.server_node[
'network'][
'data'][
'private_interfaces'][0]
588 raise MotrError(errno.EINVAL,
"private_interfaces[0] not found\n")
590 self.logger.info(f
"Validate private_interfaces[0]: {iface}\n")
591 cmd = f
"ip addr show {iface}" 595 iface_type = self.server_node[
'network'][
'data'][
'interface_type']
597 raise MotrError(errno.EINVAL,
"interface_type not found\n")
599 lnet_config = (f
"options lnet networks={iface_type}({iface}) " 600 f
"config_on_load=1 lnet_peer_discovery_disabled=1\n")
601 self.logger.info(f
"lnet config: {lnet_config}")
603 with open(LNET_CONF_FILE,
"w")
as fp:
604 fp.write(lnet_config)
610 self.logger.info(
"Doing ping to nids\n")
613 raise MotrError(errno.EINVAL,
"lent self ping failed\n")
634 1. check swap entry found in /etc/fstab 635 2. if found, do nothing 636 3. if not found, add swap entry in /etc/fstab 638 swap_entry = f
"{dev_name} swap swap defaults 0 0\n" 643 with open(FSTAB,
"r") as fp: 644 lines = fp.readlines() 646 ret = line.find(dev_name)
649 self.logger.info(f
"Swap entry found: {swap_entry}\n")
652 raise MotrError(errno.EINVAL, f
"Cant read f{FSTAB}\n")
656 with open(FSTAB,
"a")
as fp:
658 self.logger.info(f
"Swap entry added: {swap_entry}\n")
660 raise MotrError(errno.EINVAL, f
"Cant append f{FSTAB}\n")
667 cmd = f
"sed -i '/{vg_name}/d' {FSTAB}" 673 self.logger.info(f
"Make swap of {swap_dev}\n")
674 cmd = f
"mkswap -f {swap_dev}" 677 self.logger.info(f
"Test {swap_dev} swap device\n")
678 cmd = f
"test -e {swap_dev}" 681 self.logger.info(f
"Adding {swap_dev} swap device to {FSTAB}\n")
687 1. validate /etc/fstab 688 2. validate metadata device file 689 3. check requested volume group exist 690 4. if exist, remove volume group and swap related with it. 691 because if user request same volume group with different device. 692 5. If not exist, create volume group and lvm 693 6. create swap from lvm 696 cmd = f
"fdisk -l {metadata_dev}2" 701 metadata_dev = f
"{metadata_dev}2" 704 cmd = f
"pvdisplay {metadata_dev}" 709 self.logger.warning(f
"Volumes are already created on {metadata_dev}\n{out[0]}\n")
713 node_name = self.server_node[
'name']
714 vg_name = f
"vg_{node_name}_md{index}" 715 lv_swap_name = f
"lv_main_swap{index}" 716 lv_md_name = f
"lv_raw_md{index}" 717 swap_dev = f
"/dev/{vg_name}/{lv_swap_name}" 719 self.logger.info(f
"metadata device: {metadata_dev}\n")
721 self.logger.info(f
"Checking for {FSTAB}\n")
724 self.logger.info(f
"Checking for {metadata_dev}\n")
727 cmd = f
"fdisk -l {metadata_dev}" 731 cmd = f
"vgs {vg_name}" 736 self.logger.info(f
"Removing {vg_name} volume group\n")
740 cmd = f
"vgchange -an {vg_name}" 743 cmd = f
"vgremove {vg_name} -ff" 746 self.logger.info(f
"Creating physical volume from {metadata_dev}\n")
747 cmd = f
"pvcreate {metadata_dev} --yes" 750 self.logger.info(f
"Creating {vg_name} volume group from {metadata_dev}\n")
751 cmd = f
"vgcreate {vg_name} {metadata_dev}" 754 self.logger.info(f
"Adding {node_name} tag to {vg_name} volume group\n")
755 cmd = f
"vgchange --addtag {node_name} {vg_name}" 758 self.logger.info(
"Scanning volume group\n")
759 cmd =
"vgscan --cache" 762 self.logger.info(f
"Creating {lv_swap_name} lvm from {vg_name}\n")
763 cmd = f
"lvcreate -n {lv_swap_name} {vg_name} -l 51%VG --yes" 766 self.logger.info(f
"Creating {lv_md_name} lvm from {vg_name}\n")
767 cmd = f
"lvcreate -n {lv_md_name} {vg_name} -l 100%FREE --yes" 770 swap_check_cmd =
"free -m | grep Swap | awk '{print $2}'" 772 allocated_swap_size_before =
int(
float(free_swap_op[0].strip(
' \n')))
775 allocated_swap_size_after =
int(
float(allocated_swap_op[0].strip(
' \n')))
776 if allocated_swap_size_before >= allocated_swap_size_after:
777 raise MotrError(errno.EINVAL, f
"swap size before allocation" 778 f
"({allocated_swap_size_before}M) must be less than " 779 f
"swap size after allocation({allocated_swap_size_after}M)\n")
781 self.logger.info(f
"swap size before allocation ={allocated_swap_size_before}M\n")
782 self.logger.info(f
"swap_size after allocation ={allocated_swap_size_after}M\n")
786 cmd = f
"lsblk --noheadings --bytes {lv_path} | " "awk '{print $4}'" 788 lv_size = res[0].rstrip(
"\n")
789 lv_size =
int(lv_size)
790 self.logger.info(f
"{lv_path} size = {lv_size} \n")
791 if lvm_min_size
is None:
792 lvm_min_size = lv_size
794 lvm_min_size =
min(lv_size, lvm_min_size)
799 cvg_cnt = self.server_node[
'storage'][CVG_COUNT_KEY]
801 raise MotrError(errno.EINVAL,
"cvg_cnt not found\n")
806 cvg = self.server_node[
'storage'][
'cvg']
808 raise MotrError(errno.EINVAL,
"cvg not found\n")
815 raise MotrError(errno.EINVAL,
"cvg is empty\n")
822 for key, val
in elem.items():
829 if key==
"metadata_devices":
834 check_type(val[i], str, f
"metadata_devices[{i}]")
835 if key==
"data_devices":
844 cvg = self.storage[
'cvg']
847 raise MotrError(errno.EINVAL,
"cvg not found\n")
853 return (
int(val/size) * size)
861 md_len = len(md_disks)
862 for i
in range(md_len):
866 self.logger.info(f
"setting MOTR_M0D_IOS_BESEG_SIZE to {lvm_min_size}\n")
867 cmd = f
'sed -i "/MOTR_M0D_IOS_BESEG_SIZE/s/.*/MOTR_M0D_IOS_BESEG_SIZE={lvm_min_size}/" {MOTR_SYS_CFG}' 877 for i
in range(
int(cvg_cnt)):
880 metadata_devices = cvg_item[
"metadata_devices"]
882 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
883 check_type(metadata_devices, list,
"metadata_devices")
884 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
886 for device
in metadata_devices:
891 lv_md_name = f
"lv_raw_md{dev_count}" 892 cmd = f
"lvs -o lv_path | grep {lv_md_name}" 894 lv_path = res[0].rstrip(
"\n")
897 self.logger.info(f
"setting MOTR_M0D_IOS_BESEG_SIZE to {lvm_min_size}\n")
898 cmd = f
'sed -i "/MOTR_M0D_IOS_BESEG_SIZE/s/.*/MOTR_M0D_IOS_BESEG_SIZE={lvm_min_size}/" {MOTR_SYS_CFG}' 902 """Get lnet interface.""" 905 with open(LNET_CONF_FILE,
'r') as f: 907 for line
in f.readlines():
908 if len(line.strip()) <= 0:
continue 909 tokens = re.split(
r'\W+', line)
911 lnet_xface = tokens[4]
914 raise MotrError(errno.EINVAL, f
"Cant parse {LNET_CONF_FILE}")
916 if lnet_xface ==
None:
918 f
"Cant obtain iface details from {LNET_CONF_FILE}")
919 if lnet_xface
not in os.listdir(SYS_CLASS_NET_DIR):
921 f
"Invalid iface {lnet_xface} in lnet.conf")
925 """Check rpm packages.""" 928 cmd = f
"rpm -q {pkg}" 937 self.logger.info(f
"rpm found: {pkg}\n")
939 raise MotrError(errno.ENOENT, f
"Missing rpm: {pkg}")
942 """Get lnet nids of all available nodes in cluster.""" 944 myhostname = self.server_node[
"hostname"]
947 if (myhostname == node):
948 cmd =
"lctl list_nids" 954 nids.append(op[0].rstrip(
"\n"))
959 nodes_info = Conf.get(self._index,
'server_node')
961 for value
in nodes_info.values():
962 nodes.append(value[
"hostname"])
966 """Lnet lctl ping on all available nodes in cluster.""" 970 self.logger.info(
"lnet pinging on all nodes in cluster\n")
972 cmd = f
"lctl ping {nid}" 973 self.logger.info(f
"lctl ping on: {nid}\n")
979 2. validate lnet interface which was configured in init 980 3. ping on lnet interface 981 4. lctl ping on all nodes in cluster. motr_setup post_install and prepare 982 MUST be performed on all nodes before executing this step. 984 self.logger.info(
"post_install and prepare phases MUST be performed " 985 "on all nodes before executing test phase\n")
986 search_lnet_pkgs = [
"kmod-lustre-client",
"lustre-client"]
990 self.logger.info(f
"lnet interface found: {lnet_xface}\n")
992 cmd = f
"ip addr show {lnet_xface}" 997 ip_addr = ip_addr.split(
"inet ")[1].
split(
"/")[0]
998 self.logger.info(f
"lnet interface ip: {ip_addr}\n")
1000 raise MotrError(errno.EINVAL, f
"Cant parse {lnet_xface} ip addr")
1002 self.logger.info(f
"ping on: {ip_addr}\n")
1003 cmd = f
"ping -c 3 {ip_addr}" 1009 search_libfabric_pkgs = [
"libfabric"]
1015 for i
in range(
int(cvg_cnt)):
1018 metadata_devices = cvg_item[
"metadata_devices"]
1020 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1021 check_type(metadata_devices, list,
"metadata_devices")
1022 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
1024 for device
in metadata_devices:
1030 node_name = self.server_node[
'name']
1033 lv_list =
execute_command(self,
"lvdisplay | grep \"LV Path\" | awk \'{ print $3 }\'")[0].
split(
'\n')
1034 lv_list = lv_list[0:len(lv_list)-1]
1038 for i
in range(1, metadata_disks_count+1):
1039 md_lv_path = f
'/dev/vg_{node_name}_md{i}/lv_raw_md{i}' 1040 swap_lv_path = f
'/dev/vg_{node_name}_md{i}/lv_main_swap{i}' 1042 if md_lv_path
in lv_list:
1043 if swap_lv_path
in lv_list:
1046 self.logger.warning(f
"{swap_lv_path} does not exist. Need to create lvm\n")
1049 self.logger.warning(f
"{md_lv_path} does not exist. Need to create lvm\n")
1054 cmd =
'/usr/bin/hctl status' 1055 self.logger.info(f
"Executing cmd : '{cmd}'\n")
1063 cmd = f
'/usr/bin/yum list installed {pkg}' 1066 self.logger.info(f
"{pkg} is installed\n")
1069 self.logger.info(f
"{pkg} is not installed\n")
1073 mix_workload_path = f
"{MOTR_WORKLOAD_DIR}/mix_workload.yaml" 1074 m0worklaod_path = f
"{MOTR_WORKLOAD_DIR}/m0workload" 1075 m0crate_path = f
"{MOTR_WORKLOAD_DIR}/m0crate_workload_batch_1_file1.yaml" 1077 os.path.isfile(m0worklaod_path)
and 1078 os.path.isfile(mix_workload_path)
and 1079 os.path.isfile(m0crate_path)
1081 cmd = f
"{m0worklaod_path} -t {mix_workload_path}" 1083 self.logger.info(f
"{out[0]}\n")
1085 self.logger.
error(
"workload files are missing\n")
1093 logger = logging.getLogger(LOGGER)
1094 if not os.path.exists(LOGDIR):
1096 os.makedirs(LOGDIR, exist_ok=
True)
1097 with open(f
'{self.logfile}',
'w'):
pass 1099 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
1101 if not os.path.exists(self.logfile):
1103 with open(f
'{self.logfile}',
'w'):
pass 1105 raise MotrError(errno.EINVAL, f
"{self.logfile} creation failed\n")
1106 logger.setLevel(logging.DEBUG)
1108 fh = logging.FileHandler(self.logfile)
1109 fh.setLevel(logging.DEBUG)
1111 ch = logging.StreamHandler()
1112 ch.setLevel(logging.ERROR)
1113 formatter = logging.Formatter(
'%(asctime)s - %(levelname)s - %(message)s')
1114 fh.setFormatter(formatter)
1115 ch.setFormatter(formatter)
1116 logger.addHandler(fh)
1117 logger.addHandler(ch)
1121 if not os.path.exists(os.path.dirname(log_dir)):
1122 self.logger.warning(f
"{log_dir} does not exist")
1125 if len(patterns) == 0:
1126 self.logger.info(f
"Removing {log_dir}")
1130 for pattern
in patterns:
1137 search_pat =
"{}/{}*".
format(log_dir, pattern)
1138 for dname
in glob.glob(search_pat, recursive=
True):
1139 removed_dirs.append(dname)
1141 if len(removed_dirs) > 0:
1142 self.logger.info(f
"Removed below directories of pattern {pattern} from {log_dir}.\n{removed_dirs}")
1145 for log_dir
in MOTR_LOG_DIRS:
1146 if os.path.exists(log_dir):
1149 self.logger.warning(f
"{log_dir} does not exist")
1150 if os.path.exists(IVT_DIR):
1151 self.logger.info(f
"Removing {IVT_DIR}")
1155 for service
in services:
1156 self.logger.info(f
"Checking status of {service} service\n")
1157 cmd = f
"systemctl status {service}" 1165 self.logger.info(
"Doing ping to nids.\n")
1171 self.logger.info(
"lnet is not up. Restaring lnet.\n")
1173 self.logger.info(
"Doing ping to nids after 5 seconds.\n")
1175 self.logger.warning(
"lnet is up. Doing ping to nids after 5 seconds.\n")
1184 nids.append(op[0].strip(
"\n"))
1185 self.logger.info(f
"nids= {nids}\n")
1187 cmd = f
"lctl ping {nid}" 1188 self.logger.info(f
"lctl ping on: {nid}\n")
1195 hostname = self.server_node[
"hostname"]
1196 nodes_info = Conf.get(self._index,
'server_node')
1199 for value
in nodes_info.values():
1200 host = value[
"hostname"]
1201 cvg_count = value[
"storage"][CVG_COUNT_KEY]
1202 name = value[
"name"]
1203 self.logger.info(f
"update_motr_hare_keys for {host}\n")
1204 for i
in range(
int(cvg_count)):
1206 lv_md_name = f
"lv_raw_md{i + 1}" 1208 if (hostname == value[
"hostname"]):
1209 cmd = (
"lvs -o lv_path")
1211 r = re.compile(f
".*{lv_md_name}")
1214 lv_path = lvm_find[0].strip()
1215 except Exception
as e:
1216 self.logger.info(f
"exception pass {e}\n")
1218 cmd = (f
"ssh {host}" 1219 f
" \"lvs -o lv_path\"")
1220 for retry
in range(1, retry_count):
1221 self.logger.info(f
"Getting LVM data for {host}, attempt: {retry}\n")
1223 r = re.compile(f
".*{lv_md_name}")
1226 lv_path = lvm_find[0].strip()
1227 except Exception
as e:
1228 self.logger.info(f
"exception pass {e}\n")
1230 self.logger.info(f
"found lvm {lv_path} after {retry} count")
1233 time.sleep(retry_delay)
1235 raise MotrError(res[1], f
"[ERR] {lv_md_name} not found on {host}\n")
1236 self.logger.info(f
"setting key server>{name}>cvg[{i}]>m0d[0]>md_seg1" 1237 f
" with value {lv_path} in {self._motr_hare_conf}")
1238 Conf.set(self._index_motr_hare,f
"server>{name}>cvg[{i}]>m0d[0]>md_seg1",f
"{lv_path.strip()}")
1239 Conf.save(self._index_motr_hare)
1241 for value
in nodes_info.values():
1242 if (hostname == value[
"hostname"]):
1245 host = value[
"hostname"]
1246 cmd = (f
"scp {self._motr_hare_conf}" 1247 f
" {host}:{self._motr_hare_conf}")
1255 for i
in range(
int(cvg_cnt)):
1258 metadata_devices = cvg_item[
"metadata_devices"]
1260 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1261 check_type(metadata_devices, list,
"metadata_devices")
1262 self.logger.info(f
"lvm metadata_devices: {metadata_devices}")
1264 for device
in metadata_devices:
1265 cmd = f
"pvs | grep {device} " "| awk '{print $2}'" 1268 vol_grps.append(ret[0].strip())
1272 self.logger.info(
"Removing cortx lvms")
1274 if (len(vol_grps) == 0):
1275 self.logger.info(
"No cortx volume groups (e.g. vg_srvnode-1_md1) are found \n")
1277 self.logger.info(f
"Volume groups found: {vol_grps}")
1278 self.logger.info(
"Executing swapoff -a")
1280 self.logger.info(f
"Removing cortx LVM entries from {FSTAB}")
1283 cmd = f
"pvs|grep {vg} |" "awk '{print $1}'" 1285 cmd = f
"lvs|grep {vg} |" "awk '{print $1}'" 1290 lv_path = f
"/dev/{vg}/{lv}" 1291 self.logger.info(f
"Executing lvchange -an {lv_path}")
1293 self.logger.info(f
"Executing lvremove {lv_path}")
1295 if os.path.exists(lv_path):
1296 self.logger.info(
"Removing dmsetup entries using cmd " 1297 f
"\'dmsetup remove {lv_path}\'")
1301 self.logger.info(f
"Executing vgchange -an {vg}")
1303 self.logger.info(f
"Executing vgremove {vg}")
1308 self.logger.info(f
"Executing pvremove {pv}")
1310 self.logger.info(f
"Executing wipefs -a {pv}")
1318 cmd =
"ls -l /dev/vg_srvnode*/* | awk '{print $9}'" 1320 for lv_path
in lv_paths:
1324 if os.path.exists(lv_path):
1325 self.logger.info(f
"dmsetup remove {lv_path}")
1329 cmd = f
"fdisk -l {device} |" f
"grep {device}:" "| awk '{print $5}'" 1331 return ret[0].strip()
1334 fp = open(file,
"r") 1335 file_data = fp.read() 1337 for line
in file_data.splitlines():
1338 if line.startswith(
'#')
or (len(line.strip()) == 0):
1340 entry = line.split(
'=',1)
1341 config_dict[entry[0]] = entry[1]
1348 for i
in range(
int(cvg_cnt)):
1351 metadata_devices = cvg_item[
"metadata_devices"]
1353 raise MotrError(errno.EINVAL,
"metadata devices not found\n")
1354 check_type(metadata_devices, list,
"metadata_devices")
1355 self.logger.info(f
"\nlvm metadata_devices: {metadata_devices}\n\n")
1356 for device
in metadata_devices:
1366 fname = os.path.split(device)
1367 cmd = f
"lsblk -o name | grep -c {fname}" 1374 if total_parts == 0:
1375 self.logger.info(f
"No partitions found on {device}")
1377 self.logger.info(f
"No. of partitions={total_parts} on {device}")
1378 for i
in range(
int(total_parts)):
1382 self.logger.
error(f
"Deletion of partition({part_num}) failed on {device}")
1387 cmd = f
"fdisk {device}" 1388 stdin_str = str(
"d\n"+f
"{part_num}"+
"\n" +
"w\n")
1394 len_fids_list = len(fids)
1397 for i
in range(len_fids_list):
1398 if fids[i][
"name"] == service:
1399 fids_list.append(fids[i][
"fid"])
1401 num_fids = len(fids_list)
1408 return fids_list[idx]
1410 self.logger.
error(f
"Invalid index({idx}) of service({service})" 1411 f
"Valid index should be in range [0-{num_fids-1}]." 1415 self.logger.
error(f
"No fids for service({service}). Returning -1.")
1422 hare_lib_path = f
"{self.local_path}/hare/config/{self.machine_id}" 1423 cmd = f
"hctl fetch-fids --conf-dir {hare_lib_path}" 1425 self.logger.info(f
"Available fids:\n{out[0]}\n")
1426 fp = open(TEMP_FID_FILE,
"w")
1429 fp = open(TEMP_FID_FILE,
"r") 1430 fids = yaml.safe_load(fp) 1432 self.logger.
error(
"No fids returned by 'hctl fetch-fids'. Returning -1.\n")
1434 fid =
get_fid(self, fids, service, idx)
1439 Get list of running m0d process 1443 for proc
in psutil.process_iter():
1446 pinfo = proc.as_dict(attrs=[
'pid',
'name',
'username'])
1447 if pinfo.get(
'name') ==
"m0d":
1449 listOfProc.append(pinfo);
1450 except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
1456 cmd=f
"KILL -SIGTERM {proc.get('pid')}" 1464 self.logger.info(f
"service={service}\nidx={idx}\n")
1466 if service
in [
"fsm",
"client",
"motr_client"]:
1467 cmd = f
"{MOTR_FSM_SCRIPT_PATH}" 1473 confd_path = f
"{self.local_path}/motr/sysconfig/{self.machine_id}/confd.xc" 1476 cmd = f
"cp -f {confd_path} /etc/motr/" 1479 cmd = f
"cp -v {self.local_path}/motr/sysconfig/{self.machine_id}/motr /etc/sysconfig/" 1486 cmd =
"/opt/seagate/cortx/motr/libexec/m0trace_logrotate.sh &" 1488 cmd =
"/opt/seagate/cortx/motr/libexec/m0addb_logrotate.sh &" 1492 cmd = f
"{MOTR_SERVER_SCRIPT_PATH} m0d-{fid}"
def receiveSigTerm(signalNumber, frame)
def add_swap_fstab(self, dev_name)
def create_lvm(self, index, metadata_dev)
static void split(m0_bindex_t offset, int nr, bool commit)
static struct m0_list list
def update_config_file(self, fname, kv_list)
def verify_libfabric(self)
def get_disk_size(self, device)
def check_type(var, vtype, msg)
def get_fid(self, fids, service, idx)
def update_copy_motr_config_file(self)
def execute_command_console(self, command)
def update_bseg_size(self)
static void decode(struct m0_xcode_obj *obj)
def calc_lvm_min_size(self, lv_path, lvm_min_size)
def get_md_disks_lists(self, node_info)
def execute_command_without_exception(self, cmd, timeout_secs=TIMEOUT_SECS, retries=1)
def configure_machine_id(self, phase)
def restart_services(self, services)
def validate_storage_schema(storage)
def delete_parts(self, dev_count, device)
def check_pkgs(self, pkgs)
def execute_command_verbose(self, cmd, timeout_secs=TIMEOUT_SECS, verbose=False, set_timeout=True, retry_count=CMD_RETRY_COUNT)
def delete_part(self, device, part_num)
def validate_motr_rpm(self)
def create_dirs(self, dirs)
def get_cvg_cnt_and_cvg(self)
def __init__(self, rc, message, args)
def create_swap(self, swap_dev)
def remove_logs(self, patterns)
def get_metadata_disks_count(self)
def set_setup_size(self, service)
def validate_files(files)
def get_part_count(self, device)
def remove_dirs(self, log_dir, patterns)
def get_nids(self, nodes)
def fetch_fid(self, service, idx)
def update_motr_hare_keys_for_all_nodes(self)
static long long min(long long a, long long b)
def execute_command(self, cmd, timeout_secs=TIMEOUT_SECS, verbose=False, retries=1, stdin=None, logging=True)
def del_swap_fstab_by_vg_name(self, vg_name)
def execute_command_without_log(cmd, timeout_secs=TIMEOUT_SECS, verbose=False, retries=1, stdin=None, logging=False)
def pkg_installed(self, pkg)
def get_server_node(self)
def update_to_file(self, index, url, machine_id, md_disks)
def get_value(self, key, key_type)
def configure_libfabric(self)
def get_cvg_cnt_and_cvg_k8(self)
def start_service(self, service, idx)
def get_mdisks_from_list(self, md_lists)
def remove_dm_entries(self)
def check_services(self, services)
def get_logical_node_class(self)
def update_motr_hare_keys(self, nodes)
def getListOfm0dProcess()