?? block-qcow.c
字號:
/* block-qcow.c * * Asynchronous Qemu copy-on-write disk implementation. * Code based on the Qemu implementation * (see copyright notice below) * * (c) 2006 Andrew Warfield and Julian Chesterfield * *//* * Block driver for the QCOW format * * Copyright (c) 2004 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files(the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: */#include <errno.h>#include <fcntl.h>#include <stdio.h>#include <stdlib.h>#include <unistd.h>#include <sys/statvfs.h>#include <sys/stat.h>#include <sys/ioctl.h>#include <string.h>#include <zlib.h>#include <inttypes.h>#include <libaio.h>#include "bswap.h"#include "aes.h"#include "tapdisk.h"#include "tapaio.h"#include "blk.h"/* *BSD has no O_LARGEFILE */#ifndef O_LARGEFILE#define O_LARGEFILE 0#endif#if 1#define ASSERT(_p) \ if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \ __LINE__, __FILE__); *(int*)0=0; }#else#define ASSERT(_p) ((void)0)#endif#define ROUNDUP(l, s) \({ \ (uint64_t)( \ (l + (s - 1)) - ((l + (s - 1)) % s)); \})#undef IOCB_IDX#define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)#define ZERO_TEST(_b) (_b | 0x00)/**************************************************************//* QEMU COW block driver with compression and encryption support */#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)#define XEN_MAGIC (('X' << 24) | ('E' << 16) | ('N' << 8) | 0xfb)#define QCOW_VERSION 1#define QCOW_CRYPT_NONE 0x00#define QCOW_CRYPT_AES 0x01#define QCOW_OFLAG_COMPRESSED (1LL << 63)#define SPARSE_FILE 0x01#define EXTHDR_L1_BIG_ENDIAN 0x02#ifndef O_BINARY#define O_BINARY 0#endiftypedef struct QCowHeader { uint32_t magic; uint32_t version; uint64_t backing_file_offset; uint32_t backing_file_size; uint32_t mtime; uint64_t size; /* in bytes */ uint8_t cluster_bits; uint8_t l2_bits; uint32_t crypt_method; uint64_t l1_table_offset;} QCowHeader;/*Extended header for Xen enhancements*/typedef struct QCowHeader_ext { uint32_t xmagic; uint32_t cksum; uint32_t min_cluster_alloc; uint32_t flags;} QCowHeader_ext;#define L2_CACHE_SIZE 16 /*Fixed allocation in Qemu*/struct tdqcow_state { int fd; /*Main Qcow file descriptor */ uint64_t fd_end; /*Store a local record of file length */ char *name; /*Record of the filename*/ uint32_t backing_file_size; uint64_t backing_file_offset; int encrypted; /*File contents are encrypted or plain*/ int cluster_bits; /*Determines length of cluster as *indicated by file hdr*/ int cluster_size; /*Length of cluster*/ int cluster_sectors; /*Number of sectors per cluster*/ int cluster_alloc; /*Blktap fix for allocating full *extents*/ int min_cluster_alloc; /*Blktap historical extent alloc*/ int sparse; /*Indicates whether to preserve sparseness*/ int l2_bits; /*Size of L2 table entry*/ int l2_size; /*Full table size*/ int l1_size; /*L1 table size*/ uint64_t cluster_offset_mask; uint64_t l1_table_offset; /*L1 table offset from beginning of *file*/ uint64_t *l1_table; /*L1 table entries*/ uint64_t *l2_cache; /*We maintain a cache of size *L2_CACHE_SIZE of most read entries*/ uint64_t l2_cache_offsets[L2_CACHE_SIZE]; /*L2 cache entries*/ uint32_t l2_cache_counts[L2_CACHE_SIZE]; /*Cache access record*/ uint8_t *cluster_cache; uint8_t *cluster_data; uint64_t cluster_cache_offset; /**/ uint32_t crypt_method; /*current crypt method, 0 if no *key yet */ uint32_t crypt_method_header; /**/ AES_KEY aes_encrypt_key; /*AES key*/ AES_KEY aes_decrypt_key; /*AES key*/ /* libaio state */ tap_aio_context_t aio;};static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);#ifdef USE_GCRYPT#include <gcrypt.h>static uint32_t gen_cksum(char *ptr, int len){ int i; uint32_t md[4]; /* Convert L1 table to big endian */ for(i = 0; i < len / sizeof(uint64_t); i++) { cpu_to_be64s(&((uint64_t*) ptr)[i]); } /* Generate checksum */ gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len); /* Convert L1 table back to native endianess */ for(i = 0; i < len / sizeof(uint64_t); i++) { be64_to_cpus(&((uint64_t*) ptr)[i]); } return md[0];}#else /* use libcrypto */#include <openssl/md5.h>static uint32_t gen_cksum(char *ptr, int len){ int i; unsigned char *md; uint32_t ret; md = malloc(MD5_DIGEST_LENGTH); if(!md) return 0; /* Convert L1 table to big endian */ for(i = 0; i < len / sizeof(uint64_t); i++) { cpu_to_be64s(&((uint64_t*) ptr)[i]); } /* Generate checksum */ if (MD5((unsigned char *)ptr, len, md) != md) ret = 0; else memcpy(&ret, md, sizeof(uint32_t)); /* Convert L1 table back to native endianess */ for(i = 0; i < len / sizeof(uint64_t); i++) { be64_to_cpus(&((uint64_t*) ptr)[i]); } free(md); return ret;}#endifstatic int get_filesize(char *filename, uint64_t *size, struct stat *st){ int fd; QCowHeader header; /*Set to the backing file size*/ fd = open(filename, O_RDONLY); if (fd < 0) return -1; if (read(fd, &header, sizeof(header)) < sizeof(header)) { close(fd); return -1; } close(fd); be32_to_cpus(&header.magic); be64_to_cpus(&header.size); if (header.magic == QCOW_MAGIC) { *size = header.size >> SECTOR_SHIFT; return 0; } if(S_ISBLK(st->st_mode)) { fd = open(filename, O_RDONLY); if (fd < 0) return -1; if (blk_getimagesize(fd, size) != 0) { close(fd); return -1; } close(fd); } else *size = (st->st_size >> SECTOR_SHIFT); return 0;}static int qcow_set_key(struct tdqcow_state *s, const char *key){ uint8_t keybuf[16]; int len, i; memset(keybuf, 0, 16); len = strlen(key); if (len > 16) len = 16; /* XXX: we could compress the chars to 7 bits to increase entropy */ for (i = 0; i < len; i++) { keybuf[i] = key[i]; } s->crypt_method = s->crypt_method_header; if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0) return -1; if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0) return -1;#if 0 /* test */ { uint8_t in[16]; uint8_t out[16]; uint8_t tmp[16]; for (i=0; i<16; i++) in[i] = i; AES_encrypt(in, tmp, &s->aes_encrypt_key); AES_decrypt(tmp, out, &s->aes_decrypt_key); for (i = 0; i < 16; i++) DPRINTF(" %02x", tmp[i]); DPRINTF("\n"); for (i = 0; i < 16; i++) DPRINTF(" %02x", out[i]); DPRINTF("\n"); }#endif return 0;}/* * The crypt function is compatible with the linux cryptoloop * algorithm for < 4 GB images. NOTE: out_buf == in_buf is * supported . */static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num, uint8_t *out_buf, const uint8_t *in_buf, int nb_sectors, int enc, const AES_KEY *key){ union { uint64_t ll[2]; uint8_t b[16]; } ivec; int i; for (i = 0; i < nb_sectors; i++) { ivec.ll[0] = cpu_to_le64(sector_num); ivec.ll[1] = 0; AES_cbc_encrypt(in_buf, out_buf, 512, key, ivec.b, enc); sector_num++; in_buf += 512; out_buf += 512; }}static int qtruncate(int fd, off_t length, int sparse){ int ret, i; int current = 0, rem = 0; uint64_t sectors; struct stat st; char *buf; /* If length is greater than the current file len * we synchronously write zeroes to the end of the * file, otherwise we truncate the length down */ ret = fstat(fd, &st); if (ret == -1) return -1; if (S_ISBLK(st.st_mode)) return 0; sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE; rem = st.st_size % DEFAULT_SECTOR_SIZE; /* If we are extending this file, we write zeros to the end -- * this tries to ensure that the extents allocated wind up being * contiguous on disk. */ if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) { /*We are extending the file*/ if ((ret = posix_memalign((void **)&buf, 512, DEFAULT_SECTOR_SIZE))) { DPRINTF("posix_memalign failed: %d\n", ret); return -1; } memset(buf, 0x00, DEFAULT_SECTOR_SIZE); if (lseek(fd, 0, SEEK_END)==-1) { DPRINTF("Lseek EOF failed (%d), internal error\n", errno); free(buf); return -1; } if (rem) { ret = write(fd, buf, rem); if (ret != rem) { DPRINTF("write failed: ret = %d, err = %s\n", ret, strerror(errno)); free(buf); return -1; } } for (i = current; i < sectors; i++ ) { ret = write(fd, buf, DEFAULT_SECTOR_SIZE); if (ret != DEFAULT_SECTOR_SIZE) { DPRINTF("write failed: ret = %d, err = %s\n", ret, strerror(errno)); free(buf); return -1; } } free(buf); } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE)) if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) { DPRINTF("Ftruncate failed (%s)\n", strerror(errno)); return -1; } return 0;}/* 'allocate' is: * * 0 to not allocate. * * 1 to allocate a normal cluster (for sector indexes 'n_start' to * 'n_end') * * 2 to allocate a compressed cluster of size * 'compressed_size'. 'compressed_size' must be > 0 and < * cluster_size * * return 0 if not allocated. */static uint64_t get_cluster_offset(struct tdqcow_state *s, uint64_t offset, int allocate, int compressed_size, int n_start, int n_end){ int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector; char *tmp_ptr2, *l2_ptr, *l1_ptr; uint64_t *tmp_ptr; uint64_t l2_offset, *l2_table, cluster_offset, tmp; uint32_t min_count; int new_l2_table; /*Check L1 table for the extent offset*/ l1_index = offset >> (s->l2_bits + s->cluster_bits); l2_offset = s->l1_table[l1_index]; new_l2_table = 0; if (!l2_offset) { if (!allocate) return 0; /* * allocating a new l2 entry + extent * at the end of the file, we must also * update the L1 entry safely. */ l2_offset = s->fd_end; /* round to cluster size */ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); /* update the L1 entry */ s->l1_table[l1_index] = l2_offset; tmp = cpu_to_be64(l2_offset); /*Truncate file for L2 table *(initialised to zero in case we crash)*/ if (qtruncate(s->fd, l2_offset + (s->l2_size * sizeof(uint64_t)), s->sparse) != 0) { DPRINTF("ERROR truncating file\n"); return 0; } s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t)); /*Update the L1 table entry on disk * (for O_DIRECT we write 4KByte blocks)*/ l1_sector = (l1_index * sizeof(uint64_t)) >> 12; l1_ptr = (char *)s->l1_table + (l1_sector << 12); if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) { DPRINTF("ERROR allocating memory for L1 table\n"); } memcpy(tmp_ptr, l1_ptr, 4096); /* Convert block to write to big endian */ for(i = 0; i < 4096 / sizeof(uint64_t); i++) { cpu_to_be64s(&tmp_ptr[i]); } /* * Issue non-asynchronous L1 write. * For safety, we must ensure that * entry is written before blocks. */ lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET); if (write(s->fd, tmp_ptr, 4096) != 4096) { free(tmp_ptr); return 0; } free(tmp_ptr); new_l2_table = 1; goto cache_miss; } else if (s->min_cluster_alloc == s->l2_size) { /*Fast-track the request*/ cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t)); l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1); return cluster_offset + (l2_index * s->cluster_size); } /*Check to see if L2 entry is already cached*/ for (i = 0; i < L2_CACHE_SIZE; i++) { if (l2_offset == s->l2_cache_offsets[i]) { /* increment the hit count */ if (++s->l2_cache_counts[i] == 0xffffffff) { for (j = 0; j < L2_CACHE_SIZE; j++) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -