?? html.c
字號:
/* HTML.c** SIMPLE HTML PARSER WITHOUT ANY PRESENTATION CODE**** (c) COPYRIGHT MIT 1995.** Please first read the full copyright statement in the file COPYRIGH.** @(#) $Id: HTML.c,v 1.81 2000/08/09 10:43:08 kahan Exp $**** This generates of a hypertext object. It converts from the** structured stream interface foo HTML events into the style-** oriented interface of the HText interface.**** HISTORY:** 8 Jul 94 FM Insulate free() from _free structure element.*//* Library include files */#include "wwwsys.h"#include "WWWUtil.h"#include "WWWCore.h"#include "WWWHTML.h"#include "HTML.h"#include "HTextImp.h"#define PUTC(t,c) (*(t)->target->isa->put_character)((t)->target, (c))#define PUTS(t,s) (*(t)->target->isa->put_string)((t)->target, (s))#define PUTB(s,b,l) (*(t)->target->isa->put_block)((t)->target, (b), (l))#define FLUSH_TARGET(t) (*(t)->target->isa->flush)((t)->target)#define FREE_TARGET(t) (*(t)->target->isa->_free)((t)->target)#define ABORT_TARGET(t) (*(t)->target->isa->abort)((t)->target, e)#define MAX_NESTING 40struct _HTStream { const HTStreamClass * isa; /* .... */};struct _HTStructured { const HTStructuredClass * isa; HTRequest * request; HTParentAnchor * node_anchor; HTextImp * text; HTStream * target; HTChunk * title; BOOL in_word; SGML_dtd * dtd; char * comment_start; /* for literate programming */ char * comment_end; BOOL started; int overflow; int * sp; int stack[MAX_NESTING];};/*** Entity values -- for ISO Latin 1 local representation** This MUST match exactly the table referred to in the DTD!*/static char * ISO_Latin1[HTML_ENTITIES] = {/* 00 */ "\306", /* capital AE diphthong (ligature) */ "\301", /* capital A, acute accent */ "\302", /* capital A, circumflex accent */ "\300", /* capital A, grave accent */ "\305", /* capital A, ring */ "\303", /* capital A, tilde */ "\304", /* capital A, dieresis or umlaut mark */ "\307", /* capital C, cedilla */ "\320", /* capital Eth, Icelandic */ "\311", /* capital E, acute accent */ /* 10 */ "\312", /* capital E, circumflex accent */ "\310", /* capital E, grave accent */ "\313", /* capital E, dieresis or umlaut mark */ "\315", /* capital I, acute accent */ "\316", /* capital I, circumflex accent */ "\314", /* capital I, grave accent */ "\317", /* capital I, dieresis or umlaut mark */ "\321", /* capital N, tilde */ "\323", /* capital O, acute accent */ "\324", /* capital O, circumflex accent */ /* 20 */ "\322", /* capital O, grave accent */ "\330", /* capital O, slash */ "\325", /* capital O, tilde */ "\326", /* capital O, dieresis or umlaut mark */ "\336", /* capital THORN, Icelandic */ "\332", /* capital U, acute accent */ "\333", /* capital U, circumflex accent */ "\331", /* capital U, grave accent */ "\334", /* capital U, dieresis or umlaut mark */ "\335", /* capital Y, acute accent */ /* 30 */ "\341", /* small a, acute accent */ "\342", /* small a, circumflex accent */ "\264", /* acute accent */ "\346", /* small ae diphthong (ligature) */ "\340", /* small a, grave accent */ "\046", /* ampersand */ "\345", /* small a, ring */ "\343", /* small a, tilde */ "\344", /* small a, dieresis or umlaut mark */ "\246", /* broken vertical bar *//* 40 */ "\347", /* small c, cedilla */ "\270", /* cedilla */ "\242", /* cent sign */ "\251", /* copyright */ "\244", /* general currency sign */ "\260", /* degree sign */ "\367", /* division sign */ "\351", /* small e, acute accent */ "\352", /* small e, circumflex accent */ "\350", /* small e, grave accent */ /* 50 */ "\360", /* small eth, Icelandic */ "\353", /* small e, dieresis or umlaut mark */ "\275", /* fraction one-half */ "\274", /* fraction one-fourth */ "\276", /* fraction three-fourth */ "\076", /* greater than */ "\355", /* small i, acute accent */ "\356", /* small i, circumflex accent */ "\241", /* inverted exclamation */ "\354", /* small i, grave accent */ /* 60 */ "\277", /* inverted question mark */ "\357", /* small i, dieresis or umlaut mark */ "\253", /* left angle quote */ "\074", /* less than */ "\257", /* macron accent */ "\265", /* micro sign (greek mu) */ "\267", /* middle dot */ "\040", /* non-breaking space */ "\254", /* not sign */ "\361", /* small n, tilde */ /* 70 */ "\363", /* small o, acute accent */ "\364", /* small o, circumflex accent */ "\362", /* small o, grave accent */ "\252", /* feminine ordinal */ "\272", /* masculine ordinal */ "\370", /* small o, slash */ "\365", /* small o, tilde */ "\366", /* small o, dieresis or umlaut mark */ "\266", /* paragraph sign */ "\261", /* plus or minus *//* 80 */ "\243", /* pound sign */ "\042", /* double quote sign - June 94 */ "\273", /* right angle quote */ "\256", /* registered trademark */ "\247", /* section sign */ "\255", /* soft hyphen */ "\271", /* superscript 1 */ "\262", /* superscript 2 */ "\263", /* superscript 3 */ "\337", /* small sharp s, German (sz ligature) */ /* 90 */ "\376", /* small thorn, Icelandic */ "\327", /* multiply sign */ "\372", /* small u, acute accent */ "\373", /* small u, circumflex accent */ "\371", /* small u, grave accent */ "\250", /* dieresis or umlaut mark */ "\374", /* small u, dieresis or umlaut mark */ "\375", /* small y, acute accent */ "\245", /* yen sign */ "\377" /* small y, dieresis or umlaut mark */ /* 100 */};PRIVATE char ** CurrentEntityValues = ISO_Latin1;PUBLIC BOOL HTMLUseCharacterSet (HTMLCharacterSet i){ if (i == HTML_ISO_LATIN1) { CurrentEntityValues = ISO_Latin1; return YES; } else { HTTRACE(SGML_TRACE, "HTML Parser. Doesn't support this character set\n"); return NO; }}PRIVATE int HTML_write (HTStructured * me, const char * b, int l){ if (!me->started) { HTextImp_build(me->text, HTEXT_BEGIN); me->started = YES; } /* Look at what we got */ switch (me->sp[0]) { case HTML_TITLE: HTChunk_putb(me->title, b, l); /* Fall through */ default: HTextImp_addText(me->text, b, l); } return HT_OK;}PRIVATE int HTML_put_character (HTStructured * me, char c){ return HTML_write(me, &c, sizeof(char));}PRIVATE int HTML_put_string (HTStructured * me, const char* s){ return HTML_write(me, s, (int) strlen(s));}PRIVATE void HTML_start_element (HTStructured * me, int element_number, const BOOL * present, const char ** value){ HTChildAnchor * address = NULL; if (!me->started) { HTextImp_build(me->text, HTEXT_BEGIN); me->started = YES; } /* Look at what element was started */ switch (element_number) { case HTML_A: if (present[HTML_A_HREF] && value[HTML_A_HREF]) { address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ value[HTML_A_HREF], /* Addresss */ present[HTML_A_REL] && value[HTML_A_REL] ? (HTLinkType) HTAtom_caseFor(value[HTML_A_REL]) : NULL); if (present[HTML_A_TITLE] && value[HTML_A_TITLE]) { HTLink * link = HTAnchor_mainLink((HTAnchor *) address); HTParentAnchor * dest = HTAnchor_parent(HTLink_destination(link)); if (!HTAnchor_title(dest)) HTAnchor_setTitle(dest, value[HTML_A_TITLE]); } HTextImp_foundLink(me->text, element_number, HTML_A_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Anchor `%s\'\n" _ value[HTML_A_HREF]); } break; case HTML_AREA: if (present[HTML_AREA_HREF] && value[HTML_AREA_HREF]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_AREA_HREF], NULL); HTextImp_foundLink(me->text, element_number, HTML_AREA_HREF, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Image map area `%s\'\n" _ value[HTML_AREA_HREF]); } break; case HTML_BASE: if (present[HTML_BASE_HREF] && value[HTML_BASE_HREF]) { HTAnchor_setBase(me->node_anchor, (char *) value[HTML_BASE_HREF]); HTTRACE(SGML_TRACE, "HTML Parser. New base `%s\'\n" _ value[HTML_BASE_HREF]); } break; case HTML_BODY: if (present[HTML_BODY_BACKGROUND] && value[HTML_BODY_BACKGROUND]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_BODY_BACKGROUND], NULL); HTextImp_foundLink(me->text, element_number, HTML_BODY_BACKGROUND, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Background `%s\'\n" _ value[HTML_BODY_BACKGROUND]); } break; case HTML_FORM: if (present[HTML_FORM_ACTION] && value[HTML_FORM_ACTION]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FORM_ACTION], NULL); HTextImp_foundLink(me->text, element_number, HTML_FORM_ACTION, address, present, value); } break; case HTML_FRAME: if (present[HTML_FRAME_SRC] && value[HTML_FRAME_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_FRAME_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_FRAME_SRC, address, present, value); HTTRACE(SGML_TRACE, "HTML Parser. Frame `%s\'\n" _ value[HTML_FRAME_SRC]); } break; case HTML_INPUT: if (present[HTML_INPUT_SRC] && value[HTML_INPUT_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_INPUT_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_INPUT_SRC, address, present, value); } break; case HTML_IMG: if (present[HTML_IMG_SRC] && value[HTML_IMG_SRC]) { address = HTAnchor_findChildAndLink(me->node_anchor, NULL, value[HTML_IMG_SRC], NULL); HTextImp_foundLink(me->text, element_number, HTML_IMG_SRC, address, present, value); } break; case HTML_ISINDEX: HTAnchor_setIndex(me->node_anchor); break; case HTML_LINK: if (present[HTML_LINK_HREF] && value[HTML_LINK_HREF]) { HTParentAnchor * dest = NULL; address = HTAnchor_findChildAndLink( me->node_anchor, /* parent */ present[HTML_A_NAME] ? value[HTML_A_NAME] : NULL, /* Tag */ present[HTML_A_HREF] ? value[HTML_A_HREF] : NULL, /* Addresss */ NULL); /* Rels */ dest = HTAnchor_parent(HTAnchor_followMainLink((HTAnchor *) address)); /* If forward reference */ if ((present[HTML_LINK_REL] && value[HTML_LINK_REL])) { char * strval = NULL;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -