diff --git a/imagestorebackupstorage/ansible/imagestorebackupstorage.py b/imagestorebackupstorage/ansible/imagestorebackupstorage.py index 67fc82ee1da8dba63cfe2efb81620a8a371d33e6..b0cc6966b893c29e8da053a07bdfca95d26186c2 100644 --- a/imagestorebackupstorage/ansible/imagestorebackupstorage.py +++ b/imagestorebackupstorage/ansible/imagestorebackupstorage.py @@ -33,6 +33,7 @@ remote_port = None host_uuid = None require_python_env = "false" skip_packages = "" +new_add = "false" # get parameter from shell parser = argparse.ArgumentParser(description='Deploy image backupstorage to host') @@ -104,7 +105,7 @@ else : zstacklib = ZstackLib(zstacklib_args) if host_info.distro in RPM_BASED_OS: - qemu_pkg = "fuse-sshfs nmap collectd tar net-tools" + qemu_pkg = "fuse-sshfs nmap collectd tar net-tools blktrace" qemu_pkg = qemu_pkg + ' python2-pyparted nettle' if releasever in kylin else qemu_pkg + ' pyparted' if not remote_bin_installed(host_post_info, "qemu-img", return_status=True): @@ -229,6 +230,11 @@ def load_nbd(): load_nbd() +if client == "false" and new_add == "false": + dst_dir = os.path.join(fs_rootpath, "registry") + if not file_dir_exist("path=" + dst_dir, host_post_info): + error("ERROR: registry directory is missing, imagestore metadata may have been lost. Check it immediately!") + # name: install zstack-store if client == "false": command = "bash %s %s %s" % (dest_pkg, fs_rootpath, max_capacity) diff --git a/kvmagent/ansible/kvm.py b/kvmagent/ansible/kvm.py index 480dc221ccb3ff1ebd8af02c4271ed4cacc13d2c..31115676218e650260757a70d3c5caa77b1fc4d0 100755 --- a/kvmagent/ansible/kvm.py +++ b/kvmagent/ansible/kvm.py @@ -43,7 +43,6 @@ zstack_lib_dir = "/var/lib/zstack" zstack_libvirt_nwfilter_dir = "%s/nwfilter" % zstack_lib_dir skipIpv6 = 'false' bridgeDisableIptables = 'false' -isMini = 'false' isBareMetal2Gateway='false' releasever = '' unsupported_iproute_list = ["nfs4"] @@ -247,12 +246,6 @@ def install_kvm_pkg(): # common kvmagent deps of x86 and arm that no need to update common_dep_list = "%s %s" % (common_dep_list, common_update_list) - # zstack mini needs higher version kernel etc. - C76_KERNEL_OR_HIGHER = '3.10.0-957' in host_info.kernel_version - if isMini == 'true': - mini_dep_list = " drbd84-utils kmod-drbd84" if C76_KERNEL_OR_HIGHER and not IS_AARCH64 else "" - common_dep_list += mini_dep_list - if isCube: cube_dep_list = " lm_sensors" if releasever not in kylin: diff --git a/kvmagent/kvmagent/plugins/imagestore.py b/kvmagent/kvmagent/plugins/imagestore.py index 35573e0638fd94cabf977a3348a662cde1464dbb..ec2bb70a5e1f20f5e329de71d2a3941808ea26ed 100755 --- a/kvmagent/kvmagent/plugins/imagestore.py +++ b/kvmagent/kvmagent/plugins/imagestore.py @@ -100,7 +100,7 @@ class ImageStoreClient(object): with linux.ShowLibvirtErrorOnException(vm): cmdstr = '%s stopmirr -domain %s -delbitmap=%s -drive "%s"' % \ (self.ZSTORE_CLI_PATH, vm, complete, node) - shell.run(cmdstr) + shell.check_run(cmdstr) def query_mirror_volumes(self, vm): with linux.ShowLibvirtErrorOnException(vm): diff --git a/kvmagent/kvmagent/plugins/shared_block_plugin.py b/kvmagent/kvmagent/plugins/shared_block_plugin.py index d25681fc595f1d54526644040796e424d9802af3..f40895100c6e4e37527d8f92df0dd18d5288c3a3 100755 --- a/kvmagent/kvmagent/plugins/shared_block_plugin.py +++ b/kvmagent/kvmagent/plugins/shared_block_plugin.py @@ -288,7 +288,7 @@ class CheckDisk(object): r, o, e = bash.bash_roe(command, errorout=False) if r != 0 and e and re.search(r'VG(.*)lock failed', e): - lvm.check_stuck_vglk() + lvm.check_stuck_vglk_and_gllk() r, o, e = bash.bash_roe(command, errorout=True) logger.debug("resized pv %s (wwid: %s), return code: %s, stdout %s, stderr: %s" % (disk_name, self.identifier, r, o, e)) @@ -680,7 +680,7 @@ class SharedBlockPlugin(kvmagent.KvmAgent): retry_times_for_checking_vg_lockspace = get_retry_times_for_checking_vg_lockspace() - lvm.check_stuck_vglk() + lvm.check_stuck_vglk_and_gllk() logger.debug("starting vg %s lock..." % cmd.vgUuid) lvm.start_vg_lock(cmd.vgUuid, retry_times_for_checking_vg_lockspace) @@ -1688,6 +1688,8 @@ class SharedBlockPlugin(kvmagent.KvmAgent): def check_vg_state(self, req): cmd = jsonobject.loads(req[http.REQUEST_BODY]) rsp = AgentRsp() + lvm.check_stuck_vglk_and_gllk() + if cmd.vgUuids is None or len(cmd.vgUuids) == 0: return jsonobject.dumps(rsp) diff --git a/kvmagent/kvmagent/plugins/vm_plugin.py b/kvmagent/kvmagent/plugins/vm_plugin.py index 626bc245e7bcbfd64a6b960a8b28c93376ff987a..06c65af7fbb973abb384999549cd81330a895c64 100644 --- a/kvmagent/kvmagent/plugins/vm_plugin.py +++ b/kvmagent/kvmagent/plugins/vm_plugin.py @@ -6017,9 +6017,6 @@ def get_vm_migration_caps(domain_id, cap_key): def check_mirror_jobs(domain_id, migrate_without_bitmaps): - if not get_vm_migration_caps(domain_id, "dirty-bitmaps"): - return - isc = ImageStoreClient() volumes = isc.query_mirror_volumes(domain_id) if volumes: @@ -6027,11 +6024,41 @@ def check_mirror_jobs(domain_id, migrate_without_bitmaps): logger.info("stop mirror for %s:%s" % (domain_id, v)) isc.stop_mirror(domain_id, False, v) + if not get_vm_migration_caps(domain_id, "dirty-bitmaps"): + return + if migrate_without_bitmaps: execute_qmp_command(domain_id, '{"execute": "migrate-set-capabilities","arguments":' '{"capabilities":[ {"capability": "dirty-bitmaps", "state":false}]}}') +def get_block_file_content_by_disk_name(domain_id, disk_name): + all_blocks = get_vm_blocks(domain_id) + no_prefix_name = disk_name.replace("drive-", "") + block = filter(lambda b: b.get('device') == disk_name or b.get('device') == no_prefix_name or + (b.get('inserted') and b['inserted'].get('node-name') == disk_name) or + (b.get('qdev') and no_prefix_name in b['qdev'].split("/")), all_blocks) + + if len(block) == 0: + raise kvmagent.KvmError("No blocks found[uuid:{}, disk_name:{}]".format(domain_id, disk_name)) + return block[0]['inserted']['file'] + + +def check_install_path_by_qmp(domain_id, disk_name, path): + file_content = get_block_file_content_by_disk_name(domain_id, disk_name) + logger.info("get %s file content from qmp: %s" % (disk_name, file_content)) + r_path = VolumeTO.get_volume_actual_installpath(path) + if r_path in file_content: + return True + + # ceph file example: "json:{\"driver\": \"raw\", \"file\": {\"pool\": \"11111\", \"image\": \"ca46af50ab8742b68e464e9b23b05598\"}" + if r_path.startswith("ceph://"): + strs = path.replace('ceph://', '').split('/') + return strs[0] in file_content and strs[1] in file_content + + return False + + class VmPlugin(kvmagent.KvmAgent): KVM_START_VM_PATH = "/vm/start" KVM_STOP_VM_PATH = "/vm/stop" @@ -7426,7 +7453,11 @@ class VmPlugin(kvmagent.KvmAgent): def check_volume(): vm = get_vm_by_uuid(vmUuid) - return vm._get_target_disk_by_path(task_spec.newVolume.installPath, is_exception=False) is not None + d, _ = vm._get_target_disk_by_path(task_spec.newVolume.installPath, is_exception=False) + if d is None: + return False + + return check_install_path_by_qmp(vmUuid, d.alias.name_, task_spec.newVolume.installPath) job_over = False @thread.AsyncThread @@ -8098,6 +8129,16 @@ host side snapshot files chian: currVolume = installPath.split(":/")[-1] volumeType = "qcow2" + try: + vm = get_vm_by_uuid(cmd.vmUuid) + states = vm.domain.jobStats() + if libvirt.VIR_DOMAIN_JOB_DATA_REMAINING in states and libvirt.VIR_DOMAIN_JOB_DATA_TOTAL in states: + rsp.error = "domain already has migrate job, cannot do drive mirror right now." + rsp.success = False + return jsonobject.dumps(rsp) + except libvirt.libvirtError: + pass + try: volumes = isc.query_mirror_volumes(cmd.vmUuid) if volumes is None: diff --git a/zstacklib/zstacklib/utils/ceph.py b/zstacklib/zstacklib/utils/ceph.py index 5bc748a5ecbef17cfee31473c8672e1845639463..245eae71ce8c3a7d2fc11e2395f9eacf9e502af3 100644 --- a/zstacklib/zstacklib/utils/ceph.py +++ b/zstacklib/zstacklib/utils/ceph.py @@ -12,6 +12,7 @@ from zstacklib.utils import linux from zstacklib.utils import remoteStorage from zstacklib.utils.bash import bash_r from zstacklib.utils.linux import get_fs_type, check_nbd +from zstacklib.utils.zstone import ZStoneCephPoolCapacityGetter logger = log.get_logger(__name__) @@ -52,12 +53,17 @@ def get_defer_deleting_options(cmd): return opts +def is_zstone(): + return os.path.exists("/opt/zstone/bin/zstnlet") + def get_ceph_manufacturer(): if is_xsky(): return "xsky" elif is_sandstone(): return "sandstone" + elif is_zstone(): + return "zstone" else: return "open-source" @@ -210,37 +216,7 @@ def get_pools_capacity(): osd_nodes.add(node.name) pool_capacity.crush_item_osds = sorted(osd_nodes) - # fill crush_item_osds_total_size, poolTotalSize - o = shell.call('ceph osd df -f json') - # In the open source Ceph 10 version, the value returned by executing 'ceph osd df -f json' might have '-nan', causing json parsing to fail. - o = o.replace("-nan", "\"\"") - manufacturer = get_ceph_manufacturer() - osds = jsonobject.loads(o) - if not osds.nodes: - return result - for pool_capacity in result: - if not pool_capacity.crush_item_osds: - continue - for osd_name in pool_capacity.crush_item_osds: - for osd in osds.nodes: - if osd.name != osd_name: - continue - pool_capacity.crush_item_osds_total_size = pool_capacity.crush_item_osds_total_size + osd.kb * 1024 - pool_capacity.available_capacity = pool_capacity.available_capacity + osd.kb_avail * 1024 - pool_capacity.used_capacity = pool_capacity.used_capacity + osd.kb_used * 1024 - if manufacturer == "open-source": - pool_capacity.related_osd_capacity.update({osd_name : CephOsdCapacity(osd.kb * 1024, osd.kb_avail * 1024, osd.kb_used * 1024)}) - - if not pool_capacity.disk_utilization: - continue - - if pool_capacity.crush_item_osds_total_size: - pool_capacity.pool_total_size = int(pool_capacity.crush_item_osds_total_size * pool_capacity.disk_utilization) - if pool_capacity.available_capacity: - pool_capacity.available_capacity = int(pool_capacity.available_capacity * pool_capacity.disk_utilization) - if pool_capacity.used_capacity: - pool_capacity.used_capacity = int(pool_capacity.used_capacity * pool_capacity.disk_utilization) - + pool_capacity_getter_mapping.get(get_ceph_manufacturer(), DefaultCephPoolCapacityGetter()).fill_pool_capacity(result) return result @@ -376,3 +352,39 @@ class NbdRemoteStorage(remoteStorage.RemoteStorage): if len(device_and_mount_path) != 0: shell.call('umount -f %s' % self.mount_path) shell.call("qemu-nbd -d %s" % self.volume_mounted_device) + +class DefaultCephPoolCapacityGetter: + def fill_pool_capacity(self, result): + # fill crush_item_osds_total_size, poolTotalSize + o = shell.call('ceph osd df -f json') + # In the open source Ceph 10 version, the value returned by executing 'ceph osd df -f json' might have '-nan', causing json parsing to fail. + o = o.replace("-nan", "\"\"") + osds = jsonobject.loads(o) + if not osds.nodes: + return + + for pool_capacity in result: + if not pool_capacity.crush_item_osds: + continue + for osd_name in pool_capacity.crush_item_osds: + for osd in osds.nodes: + if osd.name != osd_name: + continue + pool_capacity.crush_item_osds_total_size = pool_capacity.crush_item_osds_total_size + osd.kb * 1024 + pool_capacity.available_capacity = pool_capacity.available_capacity + osd.kb_avail * 1024 + pool_capacity.used_capacity = pool_capacity.used_capacity + osd.kb_used * 1024 + pool_capacity.related_osd_capacity.update({osd_name : CephOsdCapacity(osd.kb * 1024, osd.kb_avail * 1024, osd.kb_used * 1024)}) + + if not pool_capacity.disk_utilization: + continue + + if pool_capacity.crush_item_osds_total_size: + pool_capacity.pool_total_size = int(pool_capacity.crush_item_osds_total_size * pool_capacity.disk_utilization) + if pool_capacity.available_capacity: + pool_capacity.available_capacity = int(pool_capacity.available_capacity * pool_capacity.disk_utilization) + if pool_capacity.used_capacity: + pool_capacity.used_capacity = int(pool_capacity.used_capacity * pool_capacity.disk_utilization) + +pool_capacity_getter_mapping = { + "zstone":ZStoneCephPoolCapacityGetter() +} diff --git a/zstacklib/zstacklib/utils/lvm.py b/zstacklib/zstacklib/utils/lvm.py index 0f7cb08f0645cff5393bb589370bc3bbf6788d7a..ea7a3d89288901025d80da9c53700746467125d1 100644 --- a/zstacklib/zstacklib/utils/lvm.py +++ b/zstacklib/zstacklib/utils/lvm.py @@ -1308,7 +1308,7 @@ def _need_retry_active_lv(arg, exception): lock = get_lock_hold_by_us() if lock is not None: logger.debug("find lv lock hold by us on lockspace but not on client, directly init lv[path:%s]" % path) - return sanlock.sanlock_direct_init_resource(lock) == 0 + return sanlock.direct_init_resource(lock) == 0 return False @@ -1767,18 +1767,18 @@ def examine_lockspace(lockspace): return r -def check_stuck_vglk(): +def check_stuck_vglk_and_gllk(): @linux.retry(3, 1) - def is_stuck_vglk(): - r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") + def is_stuck_vglk_or_gllk(): + r, o, e = bash.bash_roe("sanlock client status | grep -E ':VGLK:|:GLLK:'") if r != 0: return else: - raise RetryException("found sanlock vglk lock stuck") + raise RetryException("found sanlock vglk/gllk stuck") try: - is_stuck_vglk() + is_stuck_vglk_or_gllk() except Exception as e: - r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") + r, o, e = bash.bash_roe("sanlock client status | grep -E ':VGLK:|:GLLK:'") if r != 0: return if len(o.strip().splitlines()) == 0: @@ -1788,9 +1788,25 @@ def check_stuck_vglk(): continue cmd = "sanlock client release -%s" % stucked.replace(" p ", " -p ") r, o, e = bash.bash_roe(cmd) - logger.warn("find stuck vglk and already released, detail: [return_code: %s, stdout: %s, stderr: %s]" % + logger.warn("find stuck vglk/gllk and already released, detail: [return_code: %s, stdout: %s, stderr: %s]" % (r, o, e)) + check_lock = lock._get_lock("check-vglk-and-gllk") + if check_lock.acquire(False) is False: + logger.debug("other thread is checking vglk or gllk...") + return + + def release_lock(lck): + try: + lck.release() + except Exception: + return + try: + sanlock.check_stuck_vglk_and_gllk() + except Exception as e: + logger.debug("an exception was found on checking abnormal vglk/gllk: %s" % str(e)) + finally: + release_lock(check_lock) @bash.in_bash def fix_global_lock(): @@ -1858,7 +1874,7 @@ def vgck(vgUuid, timeout): def lvm_vgck(vgUuid, timeout): health, o, e = vgck(vgUuid, 360 if timeout < 360 else timeout) - check_stuck_vglk() + check_stuck_vglk_and_gllk() if health != 0: s = "vgck %s failed, detail: [return_code: %s, stdout: %s, stderr: %s]" % (vgUuid, health, o, e) diff --git a/zstacklib/zstacklib/utils/qga.py b/zstacklib/zstacklib/utils/qga.py index 78357573bba30cdb7cb7b3dd6edb89e856da695a..3fe348c91bab56010af4db90fc29c905b2ef5cba 100644 --- a/zstacklib/zstacklib/utils/qga.py +++ b/zstacklib/zstacklib/utils/qga.py @@ -100,7 +100,7 @@ class VmQga(object): VM_OS_LINUX_REDHAT = "rhel" VM_OS_LINUX_DEBIAN = "debian" VM_OS_LINUX_FEDORA = "fedora" - VM_OS_LINUX_OPENEULER = "openEuler" + VM_OS_LINUX_OPENEULER = "openeuler" VM_OS_WINDOWS = "mswindows" ZS_TOOLS_PATN_WIN = "C:\Program Files\GuestTools\zs-tools\zs-tools.exe" diff --git a/zstacklib/zstacklib/utils/sanlock.py b/zstacklib/zstacklib/utils/sanlock.py index f59d3b73081f9f9c4324bd67b87ca9c09990f3fa..25e35dbf26eceeb06809b569b4ae03b38b753f17 100644 --- a/zstacklib/zstacklib/utils/sanlock.py +++ b/zstacklib/zstacklib/utils/sanlock.py @@ -1,9 +1,17 @@ -from zstacklib.utils import log +from zstacklib.utils import log, linux, thread import re +import random from string import whitespace from zstacklib.utils import bash +GLLK_BEGIN = 65 +VGLK_BEGIN = 66 +SMALL_ALIGN_SIZE = 1*1024**2 +BIG_ALIGN_SIZE = 8*1024**2 +RESOURCE_SIZE = 1*1024**2 + + logger = log.get_logger(__name__) class SanlockHostStatus(object): @@ -137,6 +145,227 @@ class SanlockClientStatusParser(object): return records -def sanlock_direct_init_resource(resource): +@bash.in_bash +def direct_init_resource(resource): cmd = "sanlock direct init -r %s" % resource - return bash.bash_r(cmd) \ No newline at end of file + return bash.bash_r(cmd) + + +def check_stuck_vglk_and_gllk(): + # 1. clear the vglk/gllk held by the dead host + # 2. check stuck vglk/gllk + locks = get_vglks() + get_gllks() + logger.debug("start checking all vgs[%s] to see if the VGLK/GLLK on disk is normal" % map(lambda v: v.vg_name, locks)) + + abnormal_lcks = filter(lambda v: v.abnormal_held(), locks) + if len(abnormal_lcks) == 0: + logger.debug("no abnormal vglk or gllk found") + return + + logger.debug("found possible dirty vglk/gllk on disk: %s" % map(lambda v: v.vg_name, abnormal_lcks)) + results = {} + def check_stuck_lock(): + @thread.AsyncThread + def check(lck): + results.update({lck.vg_name: lck.stuck()}) + for lck in abnormal_lcks: + check(lck) + + def wait(_): + return len(results) == len(abnormal_lcks) + + check_stuck_lock() + linux.wait_callback_success(wait, timeout=60, interval=3) + for lck in filter(lambda v: results.get(v.vg_name) is True, abnormal_lcks): + lck.refresh() + if not lck.abnormal_held(): + continue + + if lck.host_id not in lck.owners: + live_min_host_id = get_hosts_state(lck.lockspace_name).get_live_min_hostid() + if int(lck.host_id) != live_min_host_id: + logger.debug("find dirty %s on vg %s, init it directly by host[hostId:%s] with min hostId" % (lck.resource_name, lck.vg_name, live_min_host_id)) + continue + + logger.debug("find dirty %s on vg %s, init it directly" % (lck.resource_name, lck.vg_name)) + direct_init_resource("{}:{}:/dev/mapper/{}-lvmlock:{}".format(lck.lockspace_name, lck.resource_name, lck.vg_name, lck.offset)) + + +class Resource(object): + def __init__(self, lines, host_id): + self.host_id = host_id + self.owners = [] + self.shared = None + self._update(lines) + + def _update(self, lines): + self.owners = [] + self.shared = None + for line in lines.strip().splitlines(): + line = line.strip() + if ' lvm_' in line: + self.offset, self.lockspace_name, self.resource_name, self.timestamp, own, self.gen, self.lver = line.split() + self.vg_name = self.lockspace_name.strip("lvm_") + if self.timestamp.strip("0") != '': + self.owners.append(str(int(own))) + elif ' SH' in line: + self.shared = True + self.owners.append(str(int(line.split()[0]))) + + @property + def lock_type(self): + if self.shared: + return 'sh' + elif len(self.owners) == 1: + return 'ex' + else: + return 'un' + + def refresh(self): + r, o, e = direct_dump_resource("/dev/mapper/%s-lvmlock" % self.vg_name, self.offset) + self._update(o) + + def in_use(self): + return bash.bash_r("sanlock client status | grep %s:%s" % (self.lockspace_name, self.resource_name)) == 0 + + # the current host holds the resource lock, but the process holding the lock cannot be found or held by a dead host + def abnormal_held(self): + if self.lock_type == 'un': + return False + # held by us + if self.host_id in self.owners: + return not self.in_use() + # held by dead host with ex mode + if self.lock_type != 'ex': + return False + host_state = get_hosts_state(self.lockspace_name) + if host_state is not None and host_state.is_host_dead(self.owners[0]): + return True + + return False + + def stuck(self): + if not self.abnormal_held(): + return False + + ori_lver = self.lver + ori_lock_type = self.lock_type + ori_time = linux.get_current_timestamp() + # the purpose of retrying is to repeatedly confirm that the lock on the resource has generated dirty data + # because the results of 'sanlock client status' and 'sanlock direct dump' may not necessarily be at the same time + @linux.retry(12, sleep_time=random.uniform(3, 4)) + def _stuck(): + self.refresh() + if not self.abnormal_held() or self.lock_type != ori_lock_type: + return + elif self.lock_type == 'ex' and self.lver == ori_lver: + raise RetryException("resource %s held by us, lock type: ex" % self.resource_name) + elif self.lock_type == 'sh': + raise RetryException("resource %s held by us, lock type: sh" % self.resource_name) + + try: + _stuck() + except RetryException as e: + logger.warn(str(e) + (" over %s seconds" % (linux.get_current_timestamp() - ori_time))) + return True + except Exception as e: + raise e + + return False + + +''' +s lvm_8e97627ab5ea4b0e8cb9f42c8345d728:7:/dev/mapper/8e97627ab5ea4b0e8cb9f42c8345d728-lvmlock:0 +h 7 gen 3 timestamp 3654034 LIVE +h 52 gen 2 timestamp 1815547 DEAD +h 58 gen 3 timestamp 1104848 DEAD +h 67 gen 5 timestamp 1824156 DEAD +h 100 gen 4 timestamp 1207551 LIVE +s lvm_675a67fb03b54acf9daac0a7ae966b74:70:/dev/mapper/675a67fb03b54acf9daac0a7ae966b74-lvmlock:0 +h 70 gen 2 timestamp 3654038 LIVE +h 100 gen 1 timestamp 1207549 LIVE +''' +class HostsState(object): + def __init__(self, lines, lockspace_name): + self.lockspace_name = lockspace_name + self.hosts = {} + self._update(lines) + + def _update(self, lines): + self.hosts = {} + find_lockspace = False + for line in lines.strip().splitlines(): + if line.strip().startswith('s %s' % self.lockspace_name): + find_lockspace = True + elif line.strip().startswith('h ') and find_lockspace: + host_id = line.split()[1] + host_state = line.split()[6] + self.hosts.update({host_id: host_state}) + elif find_lockspace and line.strip().startswith('s lvm_'): + break + logger.debug("get hosts state[%s] on lockspace %s" % (self.hosts, self.lockspace_name)) + + def is_host_live(self, host_id): + return self.hosts.get(host_id) == "LIVE" + + def is_host_dead(self, host_id): + return self.hosts.get(host_id) == "DEAD" + + def get_live_min_hostid(self): + return min([int(id) for id in self.hosts.keys() if self.is_host_live(id)]) + + +def get_hosts_state(lockspace_name): + r, o, e = bash.bash_roe("sanlock client gets -h 1") + if r == 0 and lockspace_name in o: + return HostsState(o, lockspace_name) + + +@bash.in_bash +def direct_dump(path, offset, length): + return bash.bash_roe("sanlock direct dump %s:%s:%s" % (path, offset, length)) + + +@bash.in_bash +def direct_dump_resource(path, offset): + return bash.bash_roe("sanlock direct dump %s:%s:%s" % (path, offset, RESOURCE_SIZE)) + + +def get_vglks(): + result = [] + for lockspace in get_lockspaces(): + path = lockspace.split(":")[2] + host_id = lockspace.split(":")[1] + r, o, e = direct_dump_resource(path, VGLK_BEGIN * SMALL_ALIGN_SIZE) + # vglk may be stored at 66M or 528M + if ' VGLK ' not in o: + r, o, e = direct_dump_resource(path, VGLK_BEGIN * BIG_ALIGN_SIZE) + if ' VGLK ' in o: + result.append(Resource(o, host_id)) + return result + + +def get_gllks(): + result = [] + for lockspace in get_lockspaces(): + path = lockspace.split(":")[2] + host_id = lockspace.split(":")[1] + r, o, e = direct_dump_resource(path, GLLK_BEGIN * SMALL_ALIGN_SIZE) + # gllk may be stored at 65M or 520M + if ' GLLK ' not in o and '_GLLK_disabled' not in o: + r, o, e = direct_dump_resource(path, GLLK_BEGIN * BIG_ALIGN_SIZE) + if ' GLLK ' in o: + result.append(Resource(o, host_id)) + return result + + +def get_lockspaces(): + result = [] + r, o, e = bash.bash_roe("sanlock client gets") + if r != 0 or o.strip() == '': + return result + return [line.split()[1].strip() for line in o.strip().splitlines() if 's lvm_' in line] + + +class RetryException(Exception): + pass \ No newline at end of file diff --git a/zstacklib/zstacklib/utils/zstone.py b/zstacklib/zstacklib/utils/zstone.py new file mode 100644 index 0000000000000000000000000000000000000000..25fc502511738c700288ff01ece1f109276184a4 --- /dev/null +++ b/zstacklib/zstacklib/utils/zstone.py @@ -0,0 +1,16 @@ +import zstacklib.utils.jsonobject as jsonobject +from zstacklib.utils import shell + +class ZStoneCephPoolCapacityGetter(): + def fill_pool_capacity(self, result): + o = shell.call('ceph df detail -f json') + r = jsonobject.loads(o) + if not r.pools: + return + for pool in r.pools: + for pool_capacity in result: + if pool_capacity.pool_name == pool.name: + pool_capacity.available_capacity = pool.stats.max_avail + pool_capacity.used_capacity = pool.stats.stored + pool_capacity.pool_total_size = pool_capacity.available_capacity + pool_capacity.used_capacity + break \ No newline at end of file