Android Kernel | 在模拟器中的/data分区的来龙去脉

本文详细解析了/data分区在Android模拟器中如何通过virtio半虚拟化技术与块设备驱动virtio_blk进行绑定和初始化过程,涉及设备匹配、驱动加载及磁盘注册等关键步骤。

这篇文章用来记录/data分区在模拟器中是如何被初始化的。

首先我们需要知道/data的挂载信息,查看fstab挂载表:

# Android fstab file.
#<src>                                                  <mnt_point>         <type>    <mnt_flags and options>                              <fs_mgr_flags>
# The filesystem that contains the filesystem checker binary (typically /system) cannot
# specify MF_CHECK, and must come before any filesystems that do specify MF_CHECK
system   /system     ext4    ro,barrier=1     wait,logical,first_stage_mount
vendor   /vendor     ext4    ro,barrier=1     wait,logical,first_stage_mount
product  /product    ext4    ro,barrier=1     wait,logical,first_stage_mount
system_ext  /system_ext  ext4   ro,barrier=1   wait,logical,first_stage_mount
/dev/block/vdc   /data     ext4      noatime,nosuid,nodev,nomblk_io_submit,errors=panic   wait,check,quota,fileencryption=aes-256-xts:aes-256-cts,reservedsize=128M,fsverity,keydirectory=/metadata/vold/metadata_encryption,latemount
/dev/block/platform/a003c00.virtio_mmio/by-name/metadata    /metadata    ext4    noatime,nosuid,nodev    wait,formattable,first_stage_mount
/devices/*/block/vdf  auto  auto      defaults voldmanaged=sdcard:auto,encryptable=userdata

可以发现/data分区被挂载到/dev/block/vdc这个device上,然后我们在adb shell中执行:

cat /proc/partitions
major minor  #blocks  name
 253       32  104857600 vdc

发现vdc分区位于主设备号为253的设备上。

再执行:

cat /proc/devices
Block devices:
253 virtblk

发现253号设备是virtblk这样一个块设备。

然后在--show-kernel的条件下,发现在内核初始化时有这样一句话:

virtio_blk virtio3: [vdc] 209715200 512-byte logical blocks (107 GB/100 GiB)

搜索virtio,了解到:

virtio 是一种 I/O 半虚拟化解决方案,是一套通用 I/O 设备虚拟化的程序,是对半虚拟化 Hypervisor 中的一组通用 I/O 设备的抽象。提供了一套上层应用与各 Hypervisor 虚拟化设备(KVM,Xen,VMware等)之间的通信框架和编程接口,减少跨平台所带来的兼容性问题,大大提高驱动程序开发效率。

其实并不难理解,对于模拟器而言,并没有实际上添加了一块硬盘进去,而是模拟了一块硬盘,所以会使用到这样的一个虚拟化方案。

关于这个virtio我也只是初步接触,这篇文章不会去介绍它,而是在我们之前已经了解的linux驱动模型和设备初始化学习的基础上,看看这个块设备是如何被添加的。

首先查找源码,看到了virtio定义了一个总线类型:

//common/drivers/virtio/virtio.c:311
static struct bus_type virtio_bus = {
  .name  = "virtio",
  .match = virtio_dev_match, // 匹配函数
  .dev_groups = virtio_dev_groups, // 设备组
  .uevent = virtio_uevent, // 总线模型中设备uevent函数
  .probe = virtio_dev_probe, // 总线模型中设备初始化函数
  .remove = virtio_dev_remove, // 总线模型中设备移除函数
};

所以如果在总线上发现了一个设备,则会去该总线所绑定的驱动列表中寻找对应的驱动匹配,那么会去执行match函数,我们先看看match函数如何完成驱动和设备的匹配:

//common/drivers/virtio/virtio.c:82
/* This looks through all the IDs a driver claims to support.  If any of them
 * match, we return 1 and the kernel will call virtio_dev_probe(). */
static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
{
  unsigned int i;
  struct virtio_device *dev = dev_to_virtio(_dv);
  const struct virtio_device_id *ids;


  ids = drv_to_virtio(_dr)->id_table;
  for (i = 0; ids[i].device; i++)
    if (virtio_id_match(dev, &ids[i]))
      return 1;
  return 0;
}

可以发现对比过程就是将device的id与驱动中定义的id_table进行比较,那么,对于vdc既然是一个分区,它的驱动应该也和块设备有关,于是我们找到virtio_blk驱动:

//common/drivers/block/virtio_blk.c:1000
static struct virtio_driver virtio_blk = {
  .feature_table      = features,
  .feature_table_size    = ARRAY_SIZE(features),
  .feature_table_legacy    = features_legacy,
  .feature_table_size_legacy  = ARRAY_SIZE(features_legacy),
  .driver.name      = KBUILD_MODNAME,
  .driver.owner      = THIS_MODULE,
  .id_table      = id_table,
  .probe        = virtblk_probe,
  .remove        = virtblk_remove,
  .config_changed      = virtblk_config_changed,
#ifdef CONFIG_PM_SLEEP
  .freeze        = virtblk_freeze,
  .restore      = virtblk_restore,
#endif
};

可以看到这个驱动有关的东西就在这里,id_table被定义为:

static const struct virtio_device_id id_table[] = {
  { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
  { 0 },
};
其中宏定义: VIRTIO_ID_BLOCK = 2

所以,当有设备被接入到virtio总线,且设备id为2时,它将会与virtio_blk驱动绑定,然后先执行总线的初始化函数:

//common/drivers/virtio/virtio.c:205
static int virtio_dev_probe(struct device *_d)
{
  int err, i;
  struct virtio_device *dev = dev_to_virtio(_d);
  struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
  u64 device_features;
  u64 driver_features;
  u64 driver_features_legacy;


  /* We have a driver! */
  virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);


  /* Figure out what features the device supports. */
  device_features = dev->config->get_features(dev);


  /* Figure out what features the driver supports. */
  driver_features = 0;
  for (i = 0; i < drv->feature_table_size; i++) {
    unsigned int f = drv->feature_table[i];
    BUG_ON(f >= 64);
    driver_features |= (1ULL << f);
  }


  /* Some drivers have a separate feature table for virtio v1.0 */
  if (drv->feature_table_legacy) {
    driver_features_legacy = 0;
    for (i = 0; i < drv->feature_table_size_legacy; i++) {
      unsigned int f = drv->feature_table_legacy[i];
      BUG_ON(f >= 64);
      driver_features_legacy |= (1ULL << f);
    }
  } else {
    driver_features_legacy = driver_features;
  }


  if (device_features & (1ULL << VIRTIO_F_VERSION_1))
    dev->features = driver_features & device_features;
  else
    dev->features = driver_features_legacy & device_features;


  /* Transport features always preserved to pass to finalize_features. */
  for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
    if (device_features & (1ULL << i))
      __virtio_set_bit(dev, i);


  err = dev->config->finalize_features(dev);
  if (err)
    goto err;


  if (drv->validate) {
    u64 features = dev->features;


    err = drv->validate(dev);
    if (err)
      goto err;


    /* Did validation change any features? Then write them again. */
    if (features != dev->features) {
      err = dev->config->finalize_features(dev);
      if (err)
        goto err;
    }
  }


  err = virtio_features_ok(dev);
  if (err)
    goto err;


  err = drv->probe(dev);
  if (err)
    goto err;


  /* If probe didn't do it, mark device DRIVER_OK ourselves. */
  if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
    virtio_device_ready(dev);


  if (drv->scan)
    drv->scan(dev);


  virtio_config_enable(dev);


  return 0;
err:
  virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
  return err;


}

经过一系列初始化后,执行驱动的probe函数(上面代码77行),也就是这个函数:

//common/drivers/block/virtio_blk.c:698
static int virtblk_probe(struct virtio_device *vdev)
{
  struct virtio_blk *vblk;
  struct request_queue *q;
  int err, index;


  u32 v, blk_size, max_size, sg_elems, opt_io_size;
  u16 min_io_size;
  u8 physical_block_exp, alignment_offset;
  unsigned int queue_depth;


  if (!vdev->config->get) {
    dev_err(&vdev->dev, "%s failure: config access disabled\n",
      __func__);
    return -EINVAL;
  }


  err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
           GFP_KERNEL);
  if (err < 0)
    goto out;
  index = err;


  /* We need to know how many segments before we allocate. */
  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
           struct virtio_blk_config, seg_max,
           &sg_elems);


  /* We need at least one SG element, whatever they say. */
  if (err || !sg_elems)
    sg_elems = 1;


  /* Prevent integer overflows and honor max vq size */
  sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2);


  /* We need extra sg elements at head and tail. */
  sg_elems += 2;
  vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
  if (!vblk) {
    err = -ENOMEM;
    goto out_free_index;
  }


  /* This reference is dropped in virtblk_remove(). */
  refcount_set(&vblk->refs, 1);
  mutex_init(&vblk->vdev_mutex);


  vblk->vdev = vdev;
  vblk->sg_elems = sg_elems;


  INIT_WORK(&vblk->config_work, virtblk_config_changed_work);


  err = init_vq(vblk);
  if (err)
    goto out_free_vblk;


  /* Default queue sizing is to fill the ring. */
  if (!virtblk_queue_depth) {
    queue_depth = vblk->vqs[0].vq->num_free;
    /* ... but without indirect descs, we use 2 descs per req */
    if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
      queue_depth /= 2;
  } else {
    queue_depth = virtblk_queue_depth;
  }


  memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
  vblk->tag_set.ops = &virtio_mq_ops;
  vblk->tag_set.queue_depth = queue_depth;
  vblk->tag_set.numa_node = NUMA_NO_NODE;
  vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
  vblk->tag_set.cmd_size =
    sizeof(struct virtblk_req) +
    sizeof(struct scatterlist) * sg_elems;
  vblk->tag_set.driver_data = vblk;
  vblk->tag_set.nr_hw_queues = vblk->num_vqs;


  err = blk_mq_alloc_tag_set(&vblk->tag_set);
  if (err)
    goto out_free_vq;


  vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, vblk);
  if (IS_ERR(vblk->disk)) {
    err = PTR_ERR(vblk->disk);
    goto out_free_tags;
  }
  q = vblk->disk->queue;


  virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);


  vblk->disk->major = major;
  vblk->disk->first_minor = index_to_minor(index);
  vblk->disk->minors = 1 << PART_BITS;
  vblk->disk->private_data = vblk;
  vblk->disk->fops = &virtblk_fops;
  vblk->disk->flags |= GENHD_FL_EXT_DEVT;
  vblk->index = index;


  /* configure queue flush support */
  virtblk_update_cache_mode(vdev);


  /* If disk is read-only in the host, the guest should obey */
  if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
    set_disk_ro(vblk->disk, 1);


  /* We can handle whatever the host told us to handle. */
  blk_queue_max_segments(q, vblk->sg_elems-2);


  /* No real sector limit. */
  blk_queue_max_hw_sectors(q, -1U);


  max_size = virtio_max_dma_size(vdev);


  /* Host can optionally specify maximum segment size and number of
   * segments. */
  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
           struct virtio_blk_config, size_max, &v);
  if (!err)
    max_size = min(max_size, v);


  blk_queue_max_segment_size(q, max_size);


  /* Host can optionally specify the block size of the device */
  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
           struct virtio_blk_config, blk_size,
           &blk_size);
  if (!err) {
    err = blk_validate_block_size(blk_size);
    if (err) {
      dev_err(&vdev->dev,
        "virtio_blk: invalid block size: 0x%x\n",
        blk_size);
      goto out_cleanup_disk;
    }


    blk_queue_logical_block_size(q, blk_size);
  } else
    blk_size = queue_logical_block_size(q);


  /* Use topology information if available */
  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
           struct virtio_blk_config, physical_block_exp,
           &physical_block_exp);
  if (!err && physical_block_exp)
    blk_queue_physical_block_size(q,
        blk_size * (1 << physical_block_exp));


  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
           struct virtio_blk_config, alignment_offset,
           &alignment_offset);
  if (!err && alignment_offset)
    blk_queue_alignment_offset(q, blk_size * alignment_offset);


  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
           struct virtio_blk_config, min_io_size,
           &min_io_size);
  if (!err && min_io_size)
    blk_queue_io_min(q, blk_size * min_io_size);


  err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
           struct virtio_blk_config, opt_io_size,
           &opt_io_size);
  if (!err && opt_io_size)
    blk_queue_io_opt(q, blk_size * opt_io_size);


  if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
    q->limits.discard_granularity = blk_size;


    virtio_cread(vdev, struct virtio_blk_config,
           discard_sector_alignment, &v);
    q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;


    virtio_cread(vdev, struct virtio_blk_config,
           max_discard_sectors, &v);
    blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);


    virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
           &v);


    /*
     * max_discard_seg == 0 is out of spec but we always
     * handled it.
     */
    if (!v)
      v = sg_elems - 2;
    blk_queue_max_discard_segments(q,
                 min(v, MAX_DISCARD_SEGMENTS));


    blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
  }


  if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
    virtio_cread(vdev, struct virtio_blk_config,
           max_write_zeroes_sectors, &v);
    blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
  }


  virtblk_update_capacity(vblk, false);
  virtio_device_ready(vdev);


  err = device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
  if (err)
    goto out_cleanup_disk;


  return 0;


out_cleanup_disk:
  blk_cleanup_disk(vblk->disk);
out_free_tags:
  blk_mq_free_tag_set(&vblk->tag_set);
out_free_vq:
  vdev->config->del_vqs(vdev);
  kfree(vblk->vqs);
out_free_vblk:
  kfree(vblk);
out_free_index:
  ida_simple_remove(&vd_index_ida, index);
out:
  return err;
}

我们需要关注的是第83行,它执行了 blk_mq_alloc_disk 函数,最终调用了我们前面文章中介绍过的__alloc_disk_node函数,分配了通用磁盘空间,然后在第202行,调用了device_add_disk函数,将通用磁盘添加到系统,并且可以推断它的parent-kobj为/sys/block,这里也会发送uevent消息到用户空间。

在第90行,将所有virtioblk设备统一命名以vd开头,所以vdc就是这样来了。

执行完成后,设备完成和驱动的绑定并作为磁盘被添加到系统,然后在系统启动时被挂载到/data。

总结:

bus发现设备并匹配驱动 -> 调用bus的probe函数 -> 调用driver的probe函数 -> 包装通用的磁盘注册流程 -> 磁盘被添加到系统 -> 系统启动时根据fstab挂载

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值