?? gopherenum-depth.c
字號:
char *gopher_port = NULL; int y; if (url_in_db(up->url)) { /* Have we been here? */ Debug(43, 1, ("Already Visited URL: %s\n", up->url)); return (NULL); } if ((y = filter_selection(up))) { /* Match the URL based on REs */ Debug(43, 1, ("Removing Candidate: [%s] %s\n", Filter_Type_Name[y], up->url)); return (NULL); } if (!visit_server(up)) { /* Can we visit this server? */ Debug(43, 1, ("Disallowed to Visit Server: %s\n", up->url)); return (NULL); } if (!RobotsTxtCheck(up)) { Debug(43, 1, ("Disallowed by robots.txt: %s\n", up->url)); return (NULL); } if (url_retrieve(up)) { /* Grab the URL; success? */ Debug(43, 1, ("Cannot Retrieve URL: %s\n", up->url));#ifdef DONT_RETRY_FAILS mark_failed(up);#endif return (NULL); } if (up->md5 && md5_in_db(up->md5)) { /* Have we been here? */ Debug(43, 1, ("Already Visited MD5: %s\n", up->url)); return (NULL); } /* Remember that we've been here before */ if (up->md5 != NULL) mark_retrieved(up); if (up->gophertype == 0) return (NULL); /* * For each pointer, convert it to a URL, and add it to * the list of URLs to return. */ if ((fp = fopen(up->filename, "r")) == NULL) { log_errno2(__FILE__, __LINE__, up->filename); return (NULL); } Tail = &head; while (fgets(buf, BUFSIZ, fp)) { if (buf[0] == '.' || buf[0] == '\n') break; urlbuf = xstrdup(buf); if ((q = strrchr(buf, '\n'))) *q = (char) '\0'; p = urlbuf; if ((q = strchr(p, '\t')) == NULL) { errorlog("Illegal Gopher format: No Name: %s\n", buf); goto gopher_enum_cont; } *q = (char) '\0'; gopher_name = xstrdup(p); p = q + 1; if ((q = strchr(p, '\t')) == NULL) { errorlog("Illegal Gopher format: No Path: %s\n", buf); goto gopher_enum_cont; } *q = (char) '\0'; gopher_path = xstrdup(rfc1738_escape(p)); p = q + 1; if ((q = strchr(p, '\t')) == NULL) { errorlog("Illegal Gopher format: No Host: %s\n", buf); goto gopher_enum_cont; } *q = (char) '\0'; gopher_host = xstrdup(p); p = q + 1; if ((q = strchr(p, '\n')) == NULL) { errorlog("Illegal Gopher format: No Port: %s\n", buf); goto gopher_enum_cont; } *q = (char) '\0'; gopher_port = xstrdup(p); /* Fix for wierd cross-site Gopher links - wessels */ if (!strncasecmp(gopher_path, "ftp%3a", 6)) goto gopher_enum_cont; if (!strncasecmp(gopher_path, "ftp:", 4)) goto gopher_enum_cont; if (!strncasecmp(gopher_path, "exec%3a", 7)) goto gopher_enum_cont; if (!strncasecmp(gopher_path, "exec:", 5)) goto gopher_enum_cont; sprintf(newurl, "gopher://%s:%d/%c%s\n", gopher_host, atoi(gopher_port), gopher_name[0], gopher_path); l = (list_t *) xmalloc(sizeof(list_t)); l->ptr = (void *) xstrdup(newurl); l->next = (list_t *) NULL; *Tail = l; Tail = &(l->next); gopher_enum_cont: if (gopher_name) xfree(gopher_name); gopher_name = NULL; if (gopher_path) xfree(gopher_path); gopher_path = NULL; if (gopher_host) xfree(gopher_host); gopher_host = NULL; if (gopher_port) xfree(gopher_port); gopher_port = NULL; if (urlbuf) xfree(urlbuf); gopher_name = NULL; } fclose(fp); return (head);}/* * process_url() - Retrieves the given URL, computes an MD5, * and extracts the list of menu pointers within the documents. */static void process_url(up, depth) URL *up; int depth;{ list_t *head = 0; list_t *l = 0; list_t *next_l = 0; char *url; URL *tup; if (max_depth > 0 && depth > max_depth) { Debug(43, 1, ("Maximum Depth of %d Reached: %s\n", max_depth, up->url)); url_close(up); return; } Debug(43, 1, ("Processing: [%2d] %s\n", depth, up->url)); if ((head = gopher_enum(up)) == NULL) { url_close(up); return; } url_close(up); /* * Now, for each URL in the list, call process_url() if * the URL is a Gopher url and it is on the same host */ for (l = head; l; l = next_l) { next_l = l->next; url = (char *) l->ptr; if (url == (char *) NULL) goto free_list_entry; if ((tup = url_open(url)) == NULL) goto free_list_entry; if ((tup->type != URL_GOPHER)) { url_close(tup); goto free_list_entry; } if (tup->gophertype >= 2) { /* ignore everything 2 or higher */ url_close(tup); goto free_list_entry; } process_url(tup, depth + 1); /* should be a 1 - menu */ free_list_entry: xfree(l->ptr); xfree(l); }}/* ---------------------------------------------------------------------- *//* * initialize() - Basic init routines */static void initialize(){ char *s; FILE *logfp = NULL;#ifdef USE_HOST_CACHE host_cache_init();#endif max_depth = url_max = host_max = 0; if ((s = getenv("HARVEST_URL_MAX")) != NULL) url_max = atoi(s); if ((s = getenv("HARVEST_HOST_MAX")) != NULL) host_max = atoi(s); if ((s = getenv("HARVEST_DEPTH_MAX")) != NULL) max_depth = atoi(s); if ((s = getenv("HARVEST_DEPTH_CUR")) != NULL) start_depth = atoi(s); Debug(43, 9, ("HARVEST_DEPTH_CUR=%d\n", s ? s : "NULL")); if (url_max < 1) url_max = 250; /* hard-coded maximum */ if (host_max < 1) host_max = 1; /* hard-coded maximum */ if (max_depth < 1) max_depth = 0; /* hard-coded maximum */ host_filterfile = getenv("HARVEST_HOST_FILTER"); url_filterfile = getenv("HARVEST_URL_FILTER"); if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL) logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+"); if (logfp == (FILE *) NULL) logfp = stderr; init_log3("gopherenum-depth", logfp, stderr); init_url(); filter_initialize(); /* Open GDBM databases to keep track of where we've been */ urldb_filename = xstrdup(tempnam(NULL, "Gurl")); urldbf = gdbm_open(urldb_filename, 0, GDBM_NEWDB, 0644, NULL); if (urldbf == NULL) { log_errno(urldb_filename); fatal("gdbm_open: %s: %s", urldb_filename, gdbm_strerror(gdbm_errno)); } hostdb_filename = xstrdup(tempnam(NULL, "Ghost")); hostdbf = gdbm_open(hostdb_filename, 0, GDBM_NEWDB, 0644, NULL); if (hostdbf == NULL) { log_errno(hostdb_filename); fatal("gdbm_open: %s: %s", hostdb_filename, gdbm_strerror(gdbm_errno)); } md5db_filename = xstrdup(tempnam(NULL, "Gmd5")); md5dbf = gdbm_open(md5db_filename, 0, GDBM_NEWDB, 0644, NULL); if (md5dbf == NULL) { log_errno(md5db_filename); fatal("gdbm_open: %s: %s", md5db_filename, gdbm_strerror(gdbm_errno)); }}/* Die gracefully */static void sigdie(){ if (urldbf != NULL) gdbm_close(urldbf); if (hostdbf != NULL) gdbm_close(hostdbf); if (md5dbf != NULL) gdbm_close(md5dbf); /* * (void) unlink(urldb_filename); * (void) unlink(hostdb_filename); * (void) unlink(md5db_filename); */ crremove(urldb_filename); crremove(hostdb_filename); crremove(md5db_filename); exit(0);}/* ---------------------------------------------------------------------- */static void usage(){ fprintf(stderr, "Usage: gopherenum-depth gopher-URL\n"); exit(1);}int main(argc, argv) int argc; char **argv;{ URL *up; debug_init(); for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) { if (strncmp(*argv, "-D", 2) == 0) { debug_flag(*argv); } } if (argc != 1) usage(); signal(SIGTERM, sigdie); /* Die gracefully */ signal(SIGINT, sigdie); signal(SIGPIPE, sigdie); /* Clean up on broken pipe */ initialize(); /* Initialize */ /* Grab the RootNode URL from the command line */ if ((up = url_open(*argv)) == NULL || up->type != URL_GOPHER) usage(); /* Mark the RootNode */ tree_root = xstrdup(up->url); printf("%s\n", up->url); /* Print tree root */ process_url(up, start_depth); /* Do the Enumeration recursively */ url_close(up); /* Clean up */ finish_url(); sigdie(); /* NOTREACHED */}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -