?? block-qcow.c
字號:
s->l2_cache_counts[j] >>= 1; } } l2_table = s->l2_cache + (i << s->l2_bits); goto found; } }cache_miss: /* not found: load a new entry in the least used one */ min_index = 0; min_count = 0xffffffff; for (i = 0; i < L2_CACHE_SIZE; i++) { if (s->l2_cache_counts[i] < min_count) { min_count = s->l2_cache_counts[i]; min_index = i; } } l2_table = s->l2_cache + (min_index << s->l2_bits); /*If extent pre-allocated, read table from disk, *otherwise write new table to disk*/ if (new_l2_table) { /*Should we allocate the whole extent? Adjustable parameter.*/ if (s->cluster_alloc == s->l2_size) { cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t)); cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); if (qtruncate(s->fd, cluster_offset + (s->cluster_size * s->l2_size), s->sparse) != 0) { DPRINTF("ERROR truncating file\n"); return 0; } s->fd_end = cluster_offset + (s->cluster_size * s->l2_size); for (i = 0; i < s->l2_size; i++) { l2_table[i] = cpu_to_be64(cluster_offset + (i*s->cluster_size)); } } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); lseek(s->fd, l2_offset, SEEK_SET); if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != s->l2_size * sizeof(uint64_t)) return 0; } else { lseek(s->fd, l2_offset, SEEK_SET); if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) != s->l2_size * sizeof(uint64_t)) return 0; } /*Update the cache entries*/ s->l2_cache_offsets[min_index] = l2_offset; s->l2_cache_counts[min_index] = 1;found: /*The extent is split into 's->l2_size' blocks of *size 's->cluster_size'*/ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); cluster_offset = be64_to_cpu(l2_table[l2_index]); if (!cluster_offset || ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) { if (!allocate) return 0; if ((cluster_offset & QCOW_OFLAG_COMPRESSED) && (n_end - n_start) < s->cluster_sectors) { /* cluster is already allocated but compressed, we must decompress it in the case it is not completely overwritten */ if (decompress_cluster(s, cluster_offset) < 0) cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET); cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); /* write the cluster content - not asynchronous */ lseek(s->fd, cluster_offset, SEEK_SET); if (write(s->fd, s->cluster_cache, s->cluster_size) != s->cluster_size) return -1; } else { /* allocate a new cluster */ cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET); if (allocate == 1) { /* round to cluster size */ cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); if (qtruncate(s->fd, cluster_offset + s->cluster_size, s->sparse)!=0) { DPRINTF("ERROR truncating file\n"); return 0; } s->fd_end = (cluster_offset + s->cluster_size); /* if encrypted, we must initialize the cluster content which won't be written */ if (s->crypt_method && (n_end - n_start) < s->cluster_sectors) { uint64_t start_sect; start_sect = (offset & ~(s->cluster_size - 1)) >> 9; memset(s->cluster_data + 512, 0xaa, 512); for (i = 0; i < s->cluster_sectors;i++) { if (i < n_start || i >= n_end) { encrypt_sectors(s, start_sect + i, s->cluster_data, s->cluster_data + 512, 1, 1, &s->aes_encrypt_key); lseek(s->fd, cluster_offset + i * 512, SEEK_SET); if (write(s->fd, s->cluster_data, 512) != 512) return -1; } } } } else { cluster_offset |= QCOW_OFLAG_COMPRESSED | (uint64_t)compressed_size << (63 - s->cluster_bits); } } /* update L2 table */ tmp = cpu_to_be64(cluster_offset); l2_table[l2_index] = tmp; /*For IO_DIRECT we write 4KByte blocks*/ l2_sector = (l2_index * sizeof(uint64_t)) >> 12; l2_ptr = (char *)l2_table + (l2_sector << 12); if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) { DPRINTF("ERROR allocating memory for L1 table\n"); } memcpy(tmp_ptr2, l2_ptr, 4096); lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET); if (write(s->fd, tmp_ptr2, 4096) != 4096) { free(tmp_ptr2); return -1; } free(tmp_ptr2); } return cluster_offset;}static void init_cluster_cache(struct disk_driver *dd){ struct td_state *bs = dd->td_state; struct tdqcow_state *s = (struct tdqcow_state *)dd->private; uint32_t count = 0; int i, cluster_entries; cluster_entries = s->cluster_size / 512; DPRINTF("Initialising Cluster cache, %d sectors per cluster (%d cluster size)\n", cluster_entries, s->cluster_size); for (i = 0; i < bs->size; i += cluster_entries) { if (get_cluster_offset(s, i << 9, 0, 0, 0, 1)) count++; if (count >= L2_CACHE_SIZE) return; } DPRINTF("Finished cluster initialisation, added %d entries\n", count); return;}static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num, int nb_sectors, int *pnum){ int index_in_cluster, n; uint64_t cluster_offset; cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0); index_in_cluster = sector_num & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) n = nb_sectors; *pnum = n; return (cluster_offset != 0);}static int decompress_buffer(uint8_t *out_buf, int out_buf_size, const uint8_t *buf, int buf_size){ z_stream strm1, *strm = &strm1; int ret, out_len; memset(strm, 0, sizeof(*strm)); strm->next_in = (uint8_t *)buf; strm->avail_in = buf_size; strm->next_out = out_buf; strm->avail_out = out_buf_size; ret = inflateInit2(strm, -12); if (ret != Z_OK) return -1; ret = inflate(strm, Z_FINISH); out_len = strm->next_out - out_buf; if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) || (out_len != out_buf_size) ) { inflateEnd(strm); return -1; } inflateEnd(strm); return 0;} static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset){ int ret, csize; uint64_t coffset; coffset = cluster_offset & s->cluster_offset_mask; if (s->cluster_cache_offset != coffset) { csize = cluster_offset >> (63 - s->cluster_bits); csize &= (s->cluster_size - 1); lseek(s->fd, coffset, SEEK_SET); ret = read(s->fd, s->cluster_data, csize); if (ret != csize) return -1; if (decompress_buffer(s->cluster_cache, s->cluster_size, s->cluster_data, csize) < 0) { return -1; } s->cluster_cache_offset = coffset; } return 0;}static inline void init_fds(struct disk_driver *dd){ int i; struct tdqcow_state *s = (struct tdqcow_state *)dd->private; for(i = 0; i < MAX_IOFD; i++) dd->io_fd[i] = 0; dd->io_fd[0] = s->aio.aio_ctx.pollfd;}/* Open the disk file and initialize qcow state. */static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags){ int fd, len, i, shift, ret, size, l1_table_size, o_flags; int max_aio_reqs; struct td_state *bs = dd->td_state; struct tdqcow_state *s = (struct tdqcow_state *)dd->private; char *buf; QCowHeader *header; QCowHeader_ext *exthdr; uint32_t cksum; uint64_t final_cluster = 0; DPRINTF("QCOW: Opening %s\n",name); o_flags = O_DIRECT | O_LARGEFILE | ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR); fd = open(name, o_flags); if (fd < 0) { DPRINTF("Unable to open %s (%d)\n",name,0 - errno); return -1; } s->fd = fd; if (asprintf(&s->name,"%s", name) == -1) { close(fd); return -1; } ASSERT(sizeof(QCowHeader) + sizeof(QCowHeader_ext) < 512); ret = posix_memalign((void **)&buf, 512, 512); if (ret != 0) goto fail; if (read(fd, buf, 512) != 512) goto fail; header = (QCowHeader *)buf; be32_to_cpus(&header->magic); be32_to_cpus(&header->version); be64_to_cpus(&header->backing_file_offset); be32_to_cpus(&header->backing_file_size); be32_to_cpus(&header->mtime); be64_to_cpus(&header->size); be32_to_cpus(&header->crypt_method); be64_to_cpus(&header->l1_table_offset); if (header->magic != QCOW_MAGIC) goto fail; switch (header->version) { case QCOW_VERSION: break; case 2: close(fd); dd->drv = &tapdisk_qcow2; return dd->drv->td_open(dd, name, flags); default: goto fail; } if (header->size <= 1 || header->cluster_bits < 9) goto fail; if (header->crypt_method > QCOW_CRYPT_AES) goto fail; s->crypt_method_header = header->crypt_method; if (s->crypt_method_header) s->encrypted = 1; s->cluster_bits = header->cluster_bits; s->cluster_size = 1 << s->cluster_bits; s->cluster_sectors = 1 << (s->cluster_bits - 9); s->l2_bits = header->l2_bits; s->l2_size = 1 << s->l2_bits; s->cluster_alloc = s->l2_size; bs->size = header->size / 512; s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1; s->backing_file_offset = header->backing_file_offset; s->backing_file_size = header->backing_file_size; /* read the level 1 table */ shift = s->cluster_bits + s->l2_bits; s->l1_size = (header->size + (1LL << shift) - 1) >> shift; s->l1_table_offset = header->l1_table_offset; /*allocate a 4Kbyte multiple of memory*/ l1_table_size = s->l1_size * sizeof(uint64_t); if (l1_table_size % 4096 > 0) { l1_table_size = ((l1_table_size >> 12) + 1) << 12; } ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size); memset(s->l1_table, 0x00, l1_table_size); DPRINTF("L1 Table offset detected: %llu, size %d (%d)\n", (long long)s->l1_table_offset, (int) (s->l1_size * sizeof(uint64_t)), l1_table_size); lseek(fd, s->l1_table_offset, SEEK_SET); if (read(fd, s->l1_table, l1_table_size) != l1_table_size) goto fail; for(i = 0; i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); //DPRINTF("L1[%d] => %llu\n", i, s->l1_table[i]); if (s->l1_table[i] > final_cluster) final_cluster = s->l1_table[i]; } /* alloc L2 cache */ size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t); ret = posix_memalign((void **)&s->l2_cache, 4096, size); if(ret != 0) goto fail; size = s->cluster_size; ret = posix_memalign((void **)&s->cluster_cache, 4096, size); if(ret != 0) goto fail; ret = posix_memalign((void **)&s->cluster_data, 4096, size); if(ret != 0) goto fail; s->cluster_cache_offset = -1; if (s->backing_file_offset != 0) s->cluster_alloc = 1; /*Cannot use pre-alloc*/ bs->sector_size = 512; bs->info = 0; /*Detect min_cluster_alloc*/ s->min_cluster_alloc = 1; /*Default*/ if (s->backing_file_offset == 0 && s->l1_table_offset % 4096 == 0) { /*We test to see if the xen magic # exists*/ exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader)); be32_to_cpus(&exthdr->xmagic); if(exthdr->xmagic != XEN_MAGIC) goto end_xenhdr; /* Try to detect old tapdisk images. They have to be fixed because * they don't use big endian but native endianess for the L1 table */ if ((exthdr->flags & EXTHDR_L1_BIG_ENDIAN) == 0) { /* The image is broken. Fix it. The L1 table has already been byte-swapped, so we can write it to the image file as it is currently in memory. Then swap it back to native endianess for operation. */ DPRINTF("qcow: Converting image to big endian L1 table\n"); lseek(fd, s->l1_table_offset, SEEK_SET); if (write(fd, s->l1_table, l1_table_size) != l1_table_size) { DPRINTF("qcow: Failed to write new L1 table\n"); goto fail; } for(i = 0;i < s->l1_size; i++) { cpu_to_be64s(&s->l1_table[i]); } /* Write the big endian flag to the extended header */ exthdr->flags |= EXTHDR_L1_BIG_ENDIAN; if (write(fd, buf, 512) != 512) { DPRINTF("qcow: Failed to write extended header\n"); goto fail; } } /*Finally check the L1 table cksum*/ be32_to_cpus(&exthdr->cksum); cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t)); if(exthdr->cksum != cksum) goto end_xenhdr; be32_to_cpus(&exthdr->min_cluster_alloc); be32_to_cpus(&exthdr->flags); s->sparse = (exthdr->flags & SPARSE_FILE); s->min_cluster_alloc = exthdr->min_cluster_alloc; } end_xenhdr: /* A segment (i.e. a page) can span multiple clusters */ max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) * MAX_SEGMENTS_PER_REQ * MAX_REQUESTS; if (tap_aio_init(&s->aio, bs->size, max_aio_reqs)!=0) { DPRINTF("Unable to initialise AIO state\n"); tap_aio_free(&s->aio); goto fail; } init_fds(dd); if (!final_cluster) s->fd_end = s->l1_table_offset + l1_table_size; else { s->fd_end = lseek(fd, 0, SEEK_END); if (s->fd_end == (off_t)-1) goto fail; } return 0; fail: DPRINTF("QCOW Open failed\n"); tap_aio_free(&s->aio); free(s->l1_table); free(s->l2_cache); free(s->cluster_cache); free(s->cluster_data); close(fd); return -1;}static int tdqcow_queue_read(struct disk_driver *dd, uint64_t sector, int nb_sectors, char *buf, td_callback_t cb, int id, void *private){ struct tdqcow_state *s = (struct tdqcow_state *)dd->private; int ret = 0, index_in_cluster, n, i, rsp = 0; uint64_t cluster_offset, sec, nr_secs; sec = sector; nr_secs = nb_sectors; /*Check we can get a lock*/ for (i = 0; i < nb_sectors; i++) if (!tap_aio_can_lock(&s->aio, sector + i)) return cb(dd, -EBUSY, sector, nb_sectors, id, private);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -