nova中镜像缓存以及清理

nova镜像缓存机制

当前云主机的系统盘有两种存储模式:1)本地file存储,系统盘保存在对应宿主机的磁盘上;2)ceph存储,系统盘保存在对应的ceph pool中。某些上层用户(例如RDS)对系统盘的IO有较高的要求,因此系统盘会采用本地file存储的形式。在目前的网络条件下,从glance服务器上下载基础镜像到计算节点会花费较多的时间,所以nova中对于镜像的处理策略是:

镜像从glance服务器上下载到计算节点上的_base目录做为基础镜像的缓存,其命名格式为image_id的sha1 hash形式,同时会将基础镜像格式转换为raw。如果镜像支持CoW,那么就会将镜像变为实例配置的大小,如果不支持CoW,就直接将镜像拷贝到实例对应的目录中。

其中镜像缓存保存的目录是:

/data/nova/instances/_base/

镜像缓存名称示例生成代码:

#!/usr/bin/env python

import hashlib

image_id = "67b67b36-0e07-4f6d-98d3-ed0057e9f87d"     #基础镜像id
base_image_name = hashlib.sha1(image_id).hexdigest()  #base镜像的名称为基础镜像id的hash

转换镜像缓存格式为raw(如果基础镜像不是raw格式的话)

$ qemu-img convert -O raw 源文件 目标文件

nova中镜像缓存、格式转换代码逻辑

# nova.virt.images.py
def fetch_to_raw(context, image_href, path, user_id, project_id, max_size=0):
    # image_href:镜像id
    # path:镜像缓存保存的路径,示例/data/nova/instances/_base/01c57ef9f47a8642e2093cc007476a5a0bc849d2

    # 从glance上下载镜像,保存为.part后缀的文件
    path_tmp = "%s.part" % path
    fetch(context, image_href, path_tmp, user_id, project_id,
          max_size=max_size)

    with fileutils.remove_path_on_error(path_tmp):
        # 通过执行qemu-img info命令获取镜像信息
        data = qemu_img_info(path_tmp)

        fmt = data.file_format
        if fmt is None:
            raise exception.ImageUnacceptable(
                reason=_("'qemu-img info' parsing failed."),
                image_id=image_href)

        backing_file = data.backing_file
        if backing_file is not None:
            raise exception.ImageUnacceptable(image_id=image_href,
                reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") %
                        {'fmt': fmt, 'backing_file': backing_file}))

        # We can't generally shrink incoming images, so disallow
        # images > size of the flavor we're booting.  Checking here avoids
        # an immediate DoS where we convert large qcow images to raw
        # (which may compress well but not be sparse).
        # TODO(p-draigbrady): loop through all flavor sizes, so that
        # we might continue here and not discard the download.
        # If we did that we'd have to do the higher level size checks
        # irrespective of whether the base image was prepared or not.
        disk_size = data.virtual_size
        if max_size and max_size < disk_size:
            msg = _('%(base)s virtual size %(disk_size)s '
                    'larger than flavor root disk size %(size)s')
            LOG.error(msg % {'base': path,
                             'disk_size': disk_size,
                             'size': max_size})
            raise exception.InstanceTypeDiskTooSmall()

        if fmt != "raw" and CONF.force_raw_images:
            staged = "%s.converted" % path
            LOG.debug("%s was %s, converting to raw" % (image_href, fmt))
            with fileutils.remove_path_on_error(staged):
                # qemu-img convert命令转换为raw格式镜像
                convert_image(path_tmp, staged, 'raw')
                os.unlink(path_tmp)

                data = qemu_img_info(staged)
                if data.file_format != "raw":
                    raise exception.ImageUnacceptable(image_id=image_href,
                        reason=_("Converted to raw, but format is now %s") %
                        data.file_format)

                os.rename(staged, path)
        else:
            os.rename(path_tmp, path)

nova镜像缓存过期清理机制

nova当中有一个定时任务(默认2400s执行一次)来管理上述缓存的镜像,清理长时间没有使用的镜像。

# nova.compute.manager.py
@periodic_task.periodic_task(spacing=CONF.image_cache_manager_interval,
                             external_process_ok=True)
def _run_image_cache_manager_pass(self, context):
    """Run a single pass of the image cache manager."""

    if not self.driver.capabilities["has_imagecache"]:
        return
    if CONF.image_cache_manager_interval == 0:
        return

    # 检查是否还有其他节点使用这个目录(共享存储的情况下)
    # 在我们目前集群的配置当中,nodes就只包含当前的计算节点
    storage_users.register_storage_use(CONF.instances_path, CONF.host)
    nodes = storage_users.get_storage_users(CONF.instances_path)

    # 查询获取运行在上述nodes上的云主机
    filters = {'deleted': False,
               'soft_deleted': True,
               'host': nodes}
    filtered_instances = self.conductor_api.instance_get_all_by_filters(
        context, filters, columns_to_join=[])

    # 调用后端驱动的manage_image_cache来具体管理镜像缓存
    self.driver.manage_image_cache(context, filtered_instances)

结合目前我们集群的配置,可以简单的认为上述代码的作用是,查询出运行在本计算节点上的云主机,然后调用manage_image_cache方法,接着来看下manage_image_cache。

# nova.virt.libvirt.driver.py
class LibvirtDriver(driver.ComputeDriver):
    def manage_image_cache(self, context, all_instances):
        """Manage the local cache of images."""
        self.image_cache_manager.verify_base_images(context, all_instances)

直接调用的是verify_base_images。

# nova.virt.libvirt.imagecache.py
class ImageCacheManager(object):
    def verify_base_images(self, context, all_instances):
        # 开始先重置几个变量
        # self.used_images = {}
        # self.image_popularity = {}
        # self.instance_names = set()
        # 
        # self.active_base_files = []
        # self.corrupt_base_files = []
        # self.originals = []
        # self.removable_base_files = []
        # self.unexplained_images = []
        self._reset_state()

        base_dir = os.path.join(CONF.instances_path, CONF.base_dir_name)
        if not os.path.exists(base_dir):
            LOG.debug(_('Skipping verification, no base directory at %s'),
                      base_dir)
            return

        LOG.debug(_('Verify base images'))
        # 列出_base目录下合法的镜像缓存,有两种合法的镜像:
        # 1)镜像名长度就是sha1 hash长度
        # 2)镜像名是sha1 hash且带下划线(主要是resize过程中镜像名的变化)
        # 上述两种镜像都会保存到unexplained_images变量中,1)同时还会保存到originals变量中
        self._list_base_images(base_dir)
        # 列出所有运行的云主机,并将其使用的镜像保存到used_images变量中
        self._list_running_instances(context, all_instances)

        # Determine what images are on disk because they're in use
        for img in self.used_images:
            fingerprint = hashlib.sha1(img).hexdigest()
            LOG.debug(_('Image id %(id)s yields fingerprint %(fingerprint)s'),
                      {'id': img,
                       'fingerprint': fingerprint})
            # 根据镜像fingerprint(即sha1 hash值)从_base目录中查询匹配的镜像
            # 会同时包含resize镜像
            for result in self._find_base_file(base_dir, fingerprint):
                base_file, image_small, image_resized = result
                # _handle_base_image主要完成以下几件事
                # 1)从unexplained_images变量中移除base_file
                # 2)如果这个base_file有云主机在使用,则将其加入到active_base_files变量中
                # 3)对base_file进行校验,如果校验失败,则将base_file加入到corrupt_base_files变量中
                # 4)如果base_file镜像存在且没有在使用,则将base_file加入到removable_base_files变量中
                self._handle_base_image(img, base_file)

                if not image_small and not image_resized:
                    self.originals.append(base_file)

        # 找出那些还在使用的镜像缓存(有云主机的系统盘文件把这个镜像缓存作为base文件)
        # 将这些镜像从unexplained_images变量中移除,并加入到active_base_files变量中
        inuse_backing_images = self._list_backing_images()
        for backing_path in inuse_backing_images:
            if backing_path not in self.active_base_files:
                self.active_base_files.append(backing_path)

        # 现在unexplained_images变量中剩下的镜像都是可以删除的了
        # 将他们加入到removable_base_files变量中
        for img in self.unexplained_images:
            LOG.warning(_('Unknown base file: %s'), img)
            self.removable_base_files.append(img)

        # Dump these lists
        if self.active_base_files:
            LOG.info(_('Active base files: %s'),
                     ' '.join(self.active_base_files))
        if self.corrupt_base_files:
            LOG.info(_('Corrupt base files: %s'),
                     ' '.join(self.corrupt_base_files))

        # 删除没有使用的镜像缓存
        if self.removable_base_files:
            LOG.info(_('Removable base files: %s'),
                     ' '.join(self.removable_base_files))
            #remove_unused_base_images配置项,默认是true
            if CONF.remove_unused_base_images:
                for base_file in self.removable_base_files:
                    self._remove_base_file(base_file)

        # That's it
        LOG.debug(_('Verification complete'))

可以看到verify_base_images的核心逻辑是列出_base目录下名称合法的镜像缓存,过滤掉那些还在使用的镜像缓存,然后删除没有使用的镜像。具体的判断标准和流程可以参考代码中的注释。

本文来自网易实践者社区,经作者廖跃华授权发布。