当前云主机的系统盘有两种存储模式:1)本地file存储,系统盘保存在对应宿主机的磁盘上;2)ceph存储,系统盘保存在对应的ceph pool中。某些上层用户(例如RDS)对系统盘的IO有较高的要求,因此系统盘会采用本地file存储的形式。在目前的网络条件下,从glance服务器上下载基础镜像到计算节点会花费较多的时间,所以nova中对于镜像的处理策略是:
镜像从glance服务器上下载到计算节点上的_base目录做为基础镜像的缓存,其命名格式为image_id的sha1 hash形式,同时会将基础镜像格式转换为raw。如果镜像支持CoW,那么就会将镜像变为实例配置的大小,如果不支持CoW,就直接将镜像拷贝到实例对应的目录中。
其中镜像缓存保存的目录是:
/data/nova/instances/_base/
镜像缓存名称示例生成代码:
#!/usr/bin/env python
import hashlib
image_id = "67b67b36-0e07-4f6d-98d3-ed0057e9f87d" #基础镜像id
base_image_name = hashlib.sha1(image_id).hexdigest() #base镜像的名称为基础镜像id的hash
转换镜像缓存格式为raw(如果基础镜像不是raw格式的话)
$ qemu-img convert -O raw 源文件 目标文件
nova中镜像缓存、格式转换代码逻辑
# nova.virt.images.py
def fetch_to_raw(context, image_href, path, user_id, project_id, max_size=0):
# image_href:镜像id
# path:镜像缓存保存的路径,示例/data/nova/instances/_base/01c57ef9f47a8642e2093cc007476a5a0bc849d2
# 从glance上下载镜像,保存为.part后缀的文件
path_tmp = "%s.part" % path
fetch(context, image_href, path_tmp, user_id, project_id,
max_size=max_size)
with fileutils.remove_path_on_error(path_tmp):
# 通过执行qemu-img info命令获取镜像信息
data = qemu_img_info(path_tmp)
fmt = data.file_format
if fmt is None:
raise exception.ImageUnacceptable(
reason=_("'qemu-img info' parsing failed."),
image_id=image_href)
backing_file = data.backing_file
if backing_file is not None:
raise exception.ImageUnacceptable(image_id=image_href,
reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") %
{'fmt': fmt, 'backing_file': backing_file}))
# We can't generally shrink incoming images, so disallow
# images > size of the flavor we're booting. Checking here avoids
# an immediate DoS where we convert large qcow images to raw
# (which may compress well but not be sparse).
# TODO(p-draigbrady): loop through all flavor sizes, so that
# we might continue here and not discard the download.
# If we did that we'd have to do the higher level size checks
# irrespective of whether the base image was prepared or not.
disk_size = data.virtual_size
if max_size and max_size < disk_size:
msg = _('%(base)s virtual size %(disk_size)s '
'larger than flavor root disk size %(size)s')
LOG.error(msg % {'base': path,
'disk_size': disk_size,
'size': max_size})
raise exception.InstanceTypeDiskTooSmall()
if fmt != "raw" and CONF.force_raw_images:
staged = "%s.converted" % path
LOG.debug("%s was %s, converting to raw" % (image_href, fmt))
with fileutils.remove_path_on_error(staged):
# qemu-img convert命令转换为raw格式镜像
convert_image(path_tmp, staged, 'raw')
os.unlink(path_tmp)
data = qemu_img_info(staged)
if data.file_format != "raw":
raise exception.ImageUnacceptable(image_id=image_href,
reason=_("Converted to raw, but format is now %s") %
data.file_format)
os.rename(staged, path)
else:
os.rename(path_tmp, path)
nova当中有一个定时任务(默认2400s执行一次)来管理上述缓存的镜像,清理长时间没有使用的镜像。
# nova.compute.manager.py
@periodic_task.periodic_task(spacing=CONF.image_cache_manager_interval,
external_process_ok=True)
def _run_image_cache_manager_pass(self, context):
"""Run a single pass of the image cache manager."""
if not self.driver.capabilities["has_imagecache"]:
return
if CONF.image_cache_manager_interval == 0:
return
# 检查是否还有其他节点使用这个目录(共享存储的情况下)
# 在我们目前集群的配置当中,nodes就只包含当前的计算节点
storage_users.register_storage_use(CONF.instances_path, CONF.host)
nodes = storage_users.get_storage_users(CONF.instances_path)
# 查询获取运行在上述nodes上的云主机
filters = {'deleted': False,
'soft_deleted': True,
'host': nodes}
filtered_instances = self.conductor_api.instance_get_all_by_filters(
context, filters, columns_to_join=[])
# 调用后端驱动的manage_image_cache来具体管理镜像缓存
self.driver.manage_image_cache(context, filtered_instances)
结合目前我们集群的配置,可以简单的认为上述代码的作用是,查询出运行在本计算节点上的云主机,然后调用manage_image_cache方法,接着来看下manage_image_cache。
# nova.virt.libvirt.driver.py
class LibvirtDriver(driver.ComputeDriver):
def manage_image_cache(self, context, all_instances):
"""Manage the local cache of images."""
self.image_cache_manager.verify_base_images(context, all_instances)
直接调用的是verify_base_images。
# nova.virt.libvirt.imagecache.py
class ImageCacheManager(object):
def verify_base_images(self, context, all_instances):
# 开始先重置几个变量
# self.used_images = {}
# self.image_popularity = {}
# self.instance_names = set()
#
# self.active_base_files = []
# self.corrupt_base_files = []
# self.originals = []
# self.removable_base_files = []
# self.unexplained_images = []
self._reset_state()
base_dir = os.path.join(CONF.instances_path, CONF.base_dir_name)
if not os.path.exists(base_dir):
LOG.debug(_('Skipping verification, no base directory at %s'),
base_dir)
return
LOG.debug(_('Verify base images'))
# 列出_base目录下合法的镜像缓存,有两种合法的镜像:
# 1)镜像名长度就是sha1 hash长度
# 2)镜像名是sha1 hash且带下划线(主要是resize过程中镜像名的变化)
# 上述两种镜像都会保存到unexplained_images变量中,1)同时还会保存到originals变量中
self._list_base_images(base_dir)
# 列出所有运行的云主机,并将其使用的镜像保存到used_images变量中
self._list_running_instances(context, all_instances)
# Determine what images are on disk because they're in use
for img in self.used_images:
fingerprint = hashlib.sha1(img).hexdigest()
LOG.debug(_('Image id %(id)s yields fingerprint %(fingerprint)s'),
{'id': img,
'fingerprint': fingerprint})
# 根据镜像fingerprint(即sha1 hash值)从_base目录中查询匹配的镜像
# 会同时包含resize镜像
for result in self._find_base_file(base_dir, fingerprint):
base_file, image_small, image_resized = result
# _handle_base_image主要完成以下几件事
# 1)从unexplained_images变量中移除base_file
# 2)如果这个base_file有云主机在使用,则将其加入到active_base_files变量中
# 3)对base_file进行校验,如果校验失败,则将base_file加入到corrupt_base_files变量中
# 4)如果base_file镜像存在且没有在使用,则将base_file加入到removable_base_files变量中
self._handle_base_image(img, base_file)
if not image_small and not image_resized:
self.originals.append(base_file)
# 找出那些还在使用的镜像缓存(有云主机的系统盘文件把这个镜像缓存作为base文件)
# 将这些镜像从unexplained_images变量中移除,并加入到active_base_files变量中
inuse_backing_images = self._list_backing_images()
for backing_path in inuse_backing_images:
if backing_path not in self.active_base_files:
self.active_base_files.append(backing_path)
# 现在unexplained_images变量中剩下的镜像都是可以删除的了
# 将他们加入到removable_base_files变量中
for img in self.unexplained_images:
LOG.warning(_('Unknown base file: %s'), img)
self.removable_base_files.append(img)
# Dump these lists
if self.active_base_files:
LOG.info(_('Active base files: %s'),
' '.join(self.active_base_files))
if self.corrupt_base_files:
LOG.info(_('Corrupt base files: %s'),
' '.join(self.corrupt_base_files))
# 删除没有使用的镜像缓存
if self.removable_base_files:
LOG.info(_('Removable base files: %s'),
' '.join(self.removable_base_files))
#remove_unused_base_images配置项,默认是true
if CONF.remove_unused_base_images:
for base_file in self.removable_base_files:
self._remove_base_file(base_file)
# That's it
LOG.debug(_('Verification complete'))
可以看到verify_base_images的核心逻辑是列出_base目录下名称合法的镜像缓存,过滤掉那些还在使用的镜像缓存,然后删除没有使用的镜像。具体的判断标准和流程可以参考代码中的注释。
本文来自网易实践者社区,经作者廖跃华授权发布。