?? excelparser.php
字號:
$sdc_start_block = $dir->getLong( $root_entry_index * 0x80 + 0x74 );
$small_data_chain = $this->get_blocks_chain($sdc_start_block);
$this->max_sblocks = count($small_data_chain) * 8;
$schain = $this->get_blocks_chain($small_block);
for( $i = 0; $i < count( $schain ); $i++ ) {
$sfatbuf = $dp->get( $schain[$i] * 0x200, 0x200 );
$sfat = new DataProvider( $sfatbuf, DP_STRING_SOURCE );
//$this->dbglog->dump( strlen($sfatbuf), "strlen(\$sftabuf)");
//$this->dbglog->dump( $sfat, "\$sfat");
if( $sfat->getSize() < 0x200 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function found (strlen($sfat) < 0x200) returns 6");
return 6;
}
for( $j=0; $j<0x80; $j++ )
$this->sfat[] = $sfat->getLong( $j * 4 );
$sfat->close();
unset( $sfatbuf, $sfat );
}
unset( $schain );
$sfcbuf = $dp->ReadFromFat( $small_data_chain );
$sdp = new DataProvider( $sfcbuf, DP_STRING_SOURCE );
unset( $sfcbuf, $small_data_chain );
}
$workbook_index = $this->find_stream( $dir, 'Workbook' );
if( $workbook_index<0 ) {
$workbook_index = $this->find_stream( $dir, 'Book' );
if( $workbook_index<0 ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook index not found returns 7");
return 7;
}
}
$workbook_start_block = $dir->getLong( $workbook_index * 0x80 + 0x74 );
$workbook_length = $dir->getLong( $workbook_index * 0x80 + 0x78 );
$wb = '';
if( $workbook_length > 0 ) {
if( $workbook_length >= 0x1000 ) {
$chain = $this->get_blocks_chain($workbook_start_block);
$wb = $dp->ReadFromFat( $chain );
} else {
$chain = $this->get_blocks_chain($workbook_start_block,true);
$wb = $sdp->ReadFromFat( $chain, 0x40 );
unset( $sdp );
}
$wb = substr($wb,0,$workbook_length);
if( strlen($wb) != $workbook_length )
return 6;
unset( $chain );
}
// Unset fat arrays
unset( $this->fat, $this->sfat );
if( strlen($wb) <= 0 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) <= 0) returns 7");
return 7;
}
if( strlen($wb) < 4 ) {
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 4) returns 6");
return 6;
}
// parse workbook header
if( strlen($wb) < 256*ord($wb[3])+ord($wb[2]) ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (strlen($wb) < 256*ord($wb[3])+ord($wb[2])) < 4) returns 6");
return 6;
}
if( ord($wb[0]) != 0x09 ){
/*DBG*/ $this->dbglog->debug("parse_workbook() function workbook found (ord($wb[0]) != 0x09) returns 6");
return 6;
}
$vers = ord($wb[1]);
if( ($vers!=0) && ($vers!=2) && ($vers!=4) && ($vers!=8) ){
return 8;
}
if( $vers!=8 )
$biff_ver = ($ver+4)/2;
else {
if (strlen($wb) < 12) return 6;
switch (ord($wb[4])+256*ord($wb[5]) )
{
case 0x0500:
if( ord($wb[0x0a])+256*ord($wb[0x0b]) < 1994 )
$biff_ver = 5;
else {
switch(ord( $wb[8])+256*ord($wb[9]) ) {
case 2412:
case 3218:
case 3321:
$biff_ver = 5;
break;
default:
$biff_ver = 7;
break;
}
}
break;
case 0x0600:
$biff_ver = 8;
break;
default:
return 8;
}
}
if( $biff_ver < 5 ) return 8;
$ptr = 0;
$this->worksheet['offset'] = array();
$this->worksheet['options'] = array();
$this->worksheet['unicode'] = array();
$this->worksheet['name'] = array();
$this->worksheet['data'] = array();
$this->format = $this->populateFormat();
$this->fonts = array();
$this->fonts[0] = ExcelFont::basicFontRecord();
$this->xf = array();
$this->xf['format'] = array();
$this->xf['font'] = array();
$this->xf['type_prot'] = array();
$this->xf['alignment'] = array();
$this->xf['decoration'] = array();
$xf_cnt=0;
$this->sst['unicode'] = array();
$this->sst['data'] = array();
$opcode = 0;
$sst_defined = false;
$wblen = strlen($wb);
while( (ord($wb[$ptr])!=0x0a) && ($ptr<$wblen) )
{
$oc = ord($wb[$ptr])+256*ord($wb[$ptr+1]);
if( $oc != 0x3c )
$opcode = $oc;
switch ($opcode)
{
case 0x0085:
$ofs = str2long(substr($wb,$ptr+4,4));
$this->worksheet['offset'][] = $ofs;
$this->worksheet['options'][] = ord($wb[$ptr+8])+256*ord($wb[$ptr+9]);
if( $biff_ver==8 ) {
$len = ord($wb[$ptr+10]);
if( (ord($wb[$ptr+11]) & 1) > 0 ) {
$this->worksheet['unicode'][] = true;
$len = $len*2;
} else {
$this->worksheet['unicode'][] = false;
}
$this->worksheet['name'][] = substr($wb,$ptr+12,$len);
} else {
$this->worksheet['unicode'][] = false;
$len = ord($wb[$ptr+10]);
$this->worksheet['name'][] = substr($wb,$ptr+11,$len);
}
$pws = $this->parse_worksheet(substr($wb,$ofs));
if( is_array($pws) )
$this->worksheet['data'][] = $pws;
else
return $pws;
break;
// Format
case 0x041e:
$fidx = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
if($fidx<0x31 ||$fidx==0x31 )
break;
elseif($biff_ver>7)
$this->format[$fidx] = $this->getUnicodeString($wb,$ptr+6);
// echo $wb."---".$this->format[$fidx]."*****<br>";
// FONT 0x31
case EXCEL_FONT_RID:
$rec = ExcelFont::getFontRecord($wb,$ptr+4);
$this->fonts[count($this->fonts)] = $rec;
/*echo str_replace("\n","<br>\n",ExcelFont::toString($rec,count($this->fonts)-1));
echo "FontRecord<br>" */;
break;
// XF
case 0x00e0:
$this->xf['font'][$xf_cnt] = ord($wb[$ptr+4])+256*ord($wb[$ptr+5]);
$this->xf['format'][$xf_cnt] = ord($wb[$ptr+6])+256*ord($wb[$ptr+7]);
$this->xf['type'][$xf_cnt] = "1";
$this->xf['bitmask'][$xf_cnt] = "1";
$xf_cnt++;
break;
// SST
case 0x00fc:
if( $biff_ver < 8 ) break;
$sbuflen = ord($wb[$ptr+2]) + 256*ord($wb[$ptr+3]);
if( $oc != 0x3c ) {
if( $sst_defined ) return 6;
$snum = str2long(substr($wb,$ptr+8,4));
$sptr = $ptr+12;
$sst_defined = true;
} else {
if( $rslen > $slen ) {
$sptr = $ptr+4;
$rslen -= $slen;
$slen = $rslen;
if( (ord($wb[$sptr]) & 1) > 0 ) {
if( $char_bytes == 1 ) {
$sstr = '';
for( $i=0; $i<strlen($str); $i++ )
$sstr .= $str[$i].chr(0);
$str = $sstr;
$char_bytes=2;
}
$schar_bytes = 2;
} else {
$schar_bytes = 1;
}
if( $sptr+$slen*$schar_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr+3)/$schar_bytes;
$sstr = substr($wb,$sptr+1,$slen*$schar_bytes);
if( ($char_bytes == 2) && ($schar_bytes == 1) )
{
$sstr2 = '';
for( $i=0; $i<strlen($sstr); $i++ )
$sstr2 .= $sstr[$i].chr(0);
$sstr = $sstr2;
}
$str .= $sstr;
$sptr += $slen*$schar_bytes+1+4*$rt+$fesz;
if( $slen < $rslen ) {
if( ($sptr >= strlen($wb)) ||
($sptr < $ptr+4+$sbuflen) ||
(ord($wb[$sptr]) != 0x3c) )
{
return 6;
}
break;
} else {
if( $char_bytes == 2 ) {
$this->sst['unicode'][] = true;
} else {
$this->sst['unicode'][] = false;
}
$this->sst['data'][] = $str;
$snum--;
}
} else {
$sptr = $ptr+4;
if( $sptr > $ptr ) $sptr += 4*$rt+$fesz;
}
}
while( ($sptr < $ptr+4+$sbuflen) &&
($sptr < strlen($wb)) &&
($snum > 0) )
{
$rslen = ord($wb[$sptr])+256*ord($wb[$sptr+1]);
$slen = $rslen;
if( (ord($wb[$sptr+2]) & 1) > 0 ) {
$char_bytes = 2;
} else {
$char_bytes = 1;
}
$rt = 0;
$fesz = 0;
switch (ord($wb[$sptr+2]) & 0x0c) {
// Rich-Text with Far-East
case 0x0c:
$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
$fesz = str2long(substr($wb,$sptr+5,4));
if( $sptr+9+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-5)/$char_bytes;
$str = substr($wb,$sptr+9,$slen*$char_bytes);
$sptr += $slen*$char_bytes+9;
break;
// Rich-Text
case 8:
$rt = ord($wb[$sptr+3])+256*(ord($wb[$sptr+4]));
if( $sptr+5+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-1)/$char_bytes;
$str = substr($wb,$sptr+5,$slen*$char_bytes);
$sptr += $slen*$char_bytes+5;
break;
// Far-East
case 4:
$fesz = str2long(substr($wb,$sptr+3,4));
if( $sptr+7+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr-3)/$char_bytes;
$str = substr($wb,$sptr+7,$slen*$char_bytes);
$sptr += $slen*$char_bytes+7;
break;
// Compressed or uncompressed unicode
case 0:
if( $sptr+3+$slen*$char_bytes > $ptr+4+$sbuflen )
$slen = ($ptr+$sbuflen-$sptr+1)/$char_bytes;
$str = substr($wb,$sptr+3,$slen*$char_bytes);
$sptr += $slen*$char_bytes+3;
break;
}
if( $slen < $rslen ) {
if( ($sptr >= strlen($wb)) ||
($sptr < $ptr+4+$sbuflen) ||
(ord($wb[$sptr]) != 0x3c) ) return 6;
} else {
if( $char_bytes == 2 ) {
$this->sst['unicode'][] = true;
} else {
$this->sst['unicode'][] = false;
}
$sptr += 4*$rt+$fesz;
$this->sst['data'][] = $str;
$snum--;
}
} // switch
break;
} // switch
// !!! Optimization:
// $this->wsb[] = substr($wb,$ptr,4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]));
$ptr += 4+256*ord($wb[$ptr+3])+ord($wb[$ptr+2]);
} // while
// !!! Optimization:
// $this->workbook = $wb;
$this->biff_version = $biff_ver;
/*DBG*/ $this->dbglog->debug("parse_workbook() function returns 0");
return 0;
}
/**
* ParseFromString
*
* This function is not optimized for memory usage, but the script takes less times
* to be completely executed and to parse the selected data. Use when parsing speed
* is critical (PHP memory limit can be exceed for huge files and script will terminate!).
*
* IN:
* string contents - File contents
* string filename - File name of an existing Excel file.
*
* @return int
* 0 - success
* 1 - can't open file
* 2 - file too small to be an Excel file
* 3 - error reading header
* 4 - error reading file
* 5 - This is not an Excel file or file stored in < Excel 5.0
* 6 - file corrupted
* 7 - data not found
* 8 - Unsupported file version
*
* @param string
*
*/
function ParseFromString( $contents )
{
$this->dbglog->info("ParseFromString() enter.");
$this->dp = new DataProvider( $contents, DP_STRING_SOURCE );
return $this->InitParser();
}
/**
* Parse Excel file
*
* This function is optimized for memory usage, but the script takes more times
* to be completely executed and to parse the selected data. Use with large files.
* @param string $filename file name
*/
function ParseFromFile( $filename )
{
$this->dbglog->info("ParseFromFile() enter.");
$this->dp = new DataProvider( $filename, DP_FILE_SOURCE );
return $this->InitParser();
}
/**
*
* @return mixed
*/
function InitParser()
{
$this->dbglog->info("InitParser() enter.");
if( !$this->dp->isValid() )
{
$this->dbglog->error("InitParser() Failed to open file.");
$this->dbglog->error("InitParser() function returns 1");
return 1;
}
if( $this->dp->getSize() <= 0x200 )
{
$this->dbglog->error("InitParser() File too small to be an Excel file.");
$this->dbglog->error("InitParser() function returns 2");
return 2;
}
$this->max_blocks = $this->dp->getBlocks();
// read file header
$hdrbuf = $this->dp->get( 0, 0x200 );
if( strlen( $hdrbuf ) < 0x200 )
{
$this->dbglog->error("InitParser() Error reading header.");
$this->dbglog->error("InitParser() function returns 3");
return 3;
}
// check file header
$header_sig = array(0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1);
for( $i = 0; $i < count($header_sig); $i++ )
if( $header_sig[$i] != ord( $hdrbuf[$i] ) ){
/*DBG*/ $this->dbglog->error("InitParser() function founds invalid header");
/*DBG*/ $this->dbglog->error("InitParser() function returns 5");
return 5;
}
$f_header = new DataProvider( $hdrbuf, DP_STRING_SOURCE );
unset( $hdrbuf, $header_sig, $i );
$this->dp->_baseOfs = 0x200;
$rc = $this->parse_workbook($f_header, $this->dp);
unset( $f_header );
unset( $this->dp, $this->max_blocks, $this->max_sblocks );
return $rc;
}
}
/**
* Make 32-bit integer from dump
*
* @param mixed
* @return int
*/
function str2long($str)
{
return ord($str[0]) | ((ord($str[1])) << 8) | ((ord($str[2])) << 16) | ((ord($str[3])) << 24);
}
?>
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -