?? sort.c
字號:
/* sort - sort lines of text (with all kinds of options). Copyright (C) 88, 1991-2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Written December 1988 by Mike Haertel. The author may be reached (Email) at the address mike@gnu.ai.mit.edu, or (US mail) as Mike Haertel c/o Free Software Foundation. 豶n E. Hansen added NLS support in 1997. */#include <config.h>#include <getopt.h>#include <sys/types.h>#include <signal.h>#include <stdio.h>#include <assert.h>#include "system.h"#include "long-options.h"#include "error.h"#include "hard-locale.h"#include "inttostr.h"#include "physmem.h"#include "posixver.h"#include "stdio-safer.h"#include "xmemcoll.h"#include "xstrtol.h"#if HAVE_SYS_RESOURCE_H# include <sys/resource.h>#endif#ifndef RLIMIT_DATAstruct rlimit { size_t rlim_cur; };# define getrlimit(Resource, Rlp) (-1)#endif/* The official name of this program (e.g., no `g' prefix). */#define PROGRAM_NAME "sort"#define AUTHORS N_ ("Mike Haertel and Paul Eggert")#if HAVE_LANGINFO_CODESET# include <langinfo.h>#endif#ifndef SA_NOCLDSTOP# define sigprocmask(How, Set, Oset) /* empty */# define sigset_t int#endif#ifndef STDC_HEADERSdouble strtod ();#endif/* Undefine, to avoid warning about redefinition on some systems. *//* FIXME: Remove these: use MIN/MAX from sys2.h. */#undef min#define min(a, b) ((a) < (b) ? (a) : (b))#undef max#define max(a, b) ((a) > (b) ? (a) : (b))#define UCHAR_LIM (UCHAR_MAX + 1)#define UCHAR(c) ((unsigned char) (c))#ifndef DEFAULT_TMPDIR# define DEFAULT_TMPDIR "/tmp"#endif/* Use this as exit status in case of error, not EXIT_FAILURE. This is necessary because EXIT_FAILURE is usually 1 and POSIX requires that sort exit with status 1 IFF invoked with -c and the input is not properly sorted. Any other irregular exit must exit with a status code greater than 1. */#define SORT_FAILURE 2#define SORT_OUT_OF_ORDER 1#define C_DECIMAL_POINT '.'#define NEGATION_SIGN '-'#define NUMERIC_ZERO '0'#if HAVE_SETLOCALEstatic char decimal_point;static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator *//* Nonzero if the corresponding locales are hard. */static int hard_LC_COLLATE;# if HAVE_NL_LANGINFOstatic int hard_LC_TIME;# endif# define IS_THOUSANDS_SEP(x) ((x) == th_sep)#else# define decimal_point C_DECIMAL_POINT# define IS_THOUSANDS_SEP(x) 0#endif#define NONZERO(x) (x != 0)/* The kind of blanks for '-b' to skip in various options. */enum blanktype { bl_start, bl_end, bl_both };/* The character marking end of line. Default to \n. */static int eolchar = '\n';/* Lines are held in core as counted strings. */struct line{ char *text; /* Text of the line. */ size_t length; /* Length including final newline. */ char *keybeg; /* Start of first key. */ char *keylim; /* Limit of first key. */};/* Input buffers. */struct buffer{ char *buf; /* Dynamically allocated buffer, partitioned into 3 regions: - input data; - unused area; - an array of lines, in reverse order. */ size_t used; /* Number of bytes used for input data. */ size_t nlines; /* Number of lines in the line array. */ size_t alloc; /* Number of bytes allocated. */ size_t left; /* Number of bytes left from previous reads. */ size_t line_bytes; /* Number of bytes to reserve for each line. */ int eof; /* An EOF has been read. */};struct keyfield{ size_t sword; /* Zero-origin 'word' to start at. */ size_t schar; /* Additional characters to skip. */ int skipsblanks; /* Skip leading white space at start. */ size_t eword; /* Zero-origin first word after field. */ size_t echar; /* Additional characters in field. */ int skipeblanks; /* Skip trailing white space at finish. */ int *ignore; /* Boolean array of characters to ignore. */ char *translate; /* Translation applied to characters. */ int numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ int general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ int month; /* Flag for comparison by month name. */ int reverse; /* Reverse the sense of comparison. */ struct keyfield *next; /* Next keyfield to try. */};struct month{ char *name; int val;};/* The name this program was run with. */char *program_name;/* FIXME: None of these tables work with multibyte character sets. Also, there are many other bugs when handling multibyte characters, or even unibyte encodings where line boundaries are not in the initial shift state. One way to fix this is to rewrite `sort' to use wide characters internally, but doing this with good performance is a bit tricky. *//* Table of white space. */static int blanks[UCHAR_LIM];/* Table of non-printing characters. */static int nonprinting[UCHAR_LIM];/* Table of non-dictionary characters (not letters, digits, or blanks). */static int nondictionary[UCHAR_LIM];/* Translation table folding lower case to upper. */static char fold_toupper[UCHAR_LIM];#define MONTHS_PER_YEAR 12/* Table mapping month names to integers. Alphabetic order allows binary search. */static struct month monthtab[] ={ {"APR", 4}, {"AUG", 8}, {"DEC", 12}, {"FEB", 2}, {"JAN", 1}, {"JUL", 7}, {"JUN", 6}, {"MAR", 3}, {"MAY", 5}, {"NOV", 11}, {"OCT", 10}, {"SEP", 9}};/* During the merge phase, the number of files to merge at once. */#define NMERGE 16/* Minimum size for a merge or check buffer. */#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))/* Minimum sort size; the code might not work with smaller sizes. */#define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)/* The number of bytes needed for a merge or check buffer, which can function relatively efficiently even if it holds only one line. If a longer line is seen, this value is increased. */static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);/* The approximate maximum number of bytes of main memory to use, as specified by the user. Zero if the user has not specified a size. */static size_t sort_size;/* The guessed size for non-regular files. */#define INPUT_FILE_SIZE_GUESS (1024 * 1024)/* Array of directory names in which any temporary files are to be created. */static char const **temp_dirs;/* Number of temporary directory names used. */static size_t temp_dir_count;/* Number of allocated slots in temp_dirs. */static size_t temp_dir_alloc;/* Flag to reverse the order of all comparisons. */static int reverse;/* Flag for stable sort. This turns off the last ditch bytewise comparison of lines, and instead leaves lines in the same order they were read if all keys compare equal. */static int stable;/* Tab character separating fields. If NUL, then fields are separated by the empty string between a non-whitespace character and a whitespace character. */static char tab;/* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */static int unique;/* Nonzero if any of the input files are the standard input. */static int have_read_stdin;/* List of key field comparisons to be tried. */static struct keyfield *keylist;voidusage (int status){ if (status != 0) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\Usage: %s [OPTION]... [FILE]...\n\"), program_name); fputs (_("\Write sorted concatenation of all FILE(s) to standard output.\n\\n\Ordering options:\n\\n\"), stdout); fputs (_("\Mandatory arguments to long options are mandatory for short options too.\n\"), stdout); fputs (_("\ -b, --ignore-leading-blanks ignore leading blanks\n\ -d, --dictionary-order consider only blanks and alphanumeric characters\n\ -f, --ignore-case fold lower case to upper case characters\n\"), stdout); fputs (_("\ -g, --general-numeric-sort compare according to general numerical value\n\ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ -n, --numeric-sort compare according to string numerical value\n\ -r, --reverse reverse the result of comparisons\n\\n\"), stdout); fputs (_("\Other options:\n\\n\ -c, --check check whether input is sorted; do not sort\n\ -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\ -m, --merge merge already sorted files; do not sort\n\ -o, --output=FILE write result to FILE instead of standard output\n\ -s, --stable stabilize sort by disabling last-resort comparison\n\ -S, --buffer-size=SIZE use SIZE for main memory buffer\n\"), stdout); printf (_("\ -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\ -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s\n\ multiple options specify multiple directories\n\ -u, --unique with -c: check for strict ordering\n\ otherwise: output only the first of an equal run\n\"), DEFAULT_TMPDIR); fputs (_("\ -z, --zero-terminated end lines with 0 byte, not newline\n\"), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\\n\POS is F[.C][OPTS], where F is the field number and C the character position\n\in the field. OPTS is one or more single-letter ordering options, which\n\override global ordering options for that key. If no key is given, use the\n\entire line as the key.\n\\n\SIZE may be followed by the following multiplicative suffixes:\n\"), stdout); fputs (_("\% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\\n\With no FILE, or when FILE is -, read standard input.\n\\n\*** WARNING ***\n\The locale specified by the environment affects sort order.\n\Set LC_ALL=C to get the traditional sort order that uses\n\native byte values.\n\"), stdout ); printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); } /* Don't use EXIT_FAILURE here in case it is defined to be 1. POSIX requires that sort return 1 IFF invoked with -c and the input is not properly sorted. */ assert (status == 0 || status == SORT_FAILURE); exit (status);}#define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"static struct option const long_options[] ={ {"ignore-leading-blanks", no_argument, NULL, 'b'}, {"check", no_argument, NULL, 'c'}, {"dictionary-order", no_argument, NULL, 'd'}, {"ignore-case", no_argument, NULL, 'f'}, {"general-numeric-sort", no_argument, NULL, 'g'}, {"ignore-nonprinting", no_argument, NULL, 'i'}, {"key", required_argument, NULL, 'k'}, {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, {"output", required_argument, NULL, 'o'}, {"reverse", no_argument, NULL, 'r'}, {"stable", no_argument, NULL, 's'}, {"buffer-size", required_argument, NULL, 'S'}, {"field-separator", required_argument, NULL, 't'}, {"temporary-directory", required_argument, NULL, 'T'}, {"unique", no_argument, NULL, 'u'}, {"zero-terminated", no_argument, NULL, 'z'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {0, 0, 0, 0},};/* The set of signals that are caught. */static sigset_t caught_signals;/* The list of temporary files. */struct tempnode{ struct tempnode *volatile next; char name[1]; /* Actual size is 1 + file name length. */};static struct tempnode *volatile temphead;/* Clean up any remaining temporary files. */static voidcleanup (void){ struct tempnode *node; for (node = temphead; node; node = node->next) unlink (node->name);}/* Report MESSAGE for FILE, then clean up and exit. */static void die (char const *, char const *) ATTRIBUTE_NORETURN;static voiddie (char const *message, char const *file){ error (0, errno, "%s: %s", message, file); exit (SORT_FAILURE);}/* Create a new temporary file, returning its newly allocated name. Store into *PFP a stream open for writing. */static char *create_temp_file (FILE **pfp){ static char const slashbase[] = "/sortXXXXXX"; static size_t temp_dir_index; sigset_t oldset; int fd; int saved_errno; char const *temp_dir = temp_dirs[temp_dir_index]; size_t len = strlen (temp_dir); struct tempnode *node = (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase); char *file = node->name; memcpy (file, temp_dir, len); memcpy (file + len, slashbase, sizeof slashbase); node->next = temphead; if (++temp_dir_index == temp_dir_count) temp_dir_index = 0; /* Create the temporary file in a critical section, to avoid races. */ sigprocmask (SIG_BLOCK, &caught_signals, &oldset); fd = mkstemp (file); if (0 <= fd) temphead = node; saved_errno = errno; sigprocmask (SIG_SETMASK, &oldset, NULL); errno = saved_errno; if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL) die (_("cannot create temporary file"), file); return file;}static FILE *xfopen (const char *file, const char *how){ FILE *fp; if (STREQ (file, "-")) { if (*how == 'r') { have_read_stdin = 1; fp = stdin; } else fp = stdout; } else { if ((fp = fopen_safer (file, how)) == NULL) die (_("open failed"), file); } return fp;}/* Close FP, whose name is FILE, and report any errors. */static voidxfclose (FILE *fp, char const *file){ if (fp == stdin) { /* Allow reading stdin from tty more than once. */ if (feof (fp)) clearerr (fp); } else { if (fclose (fp) != 0) die (_("close failed"), file); }}static voidwrite_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file){ if (fwrite (buf, 1, n_bytes, fp) != n_bytes) die (_("write failed"), output_file);}/* Append DIR to the array of temporary directory names. */static voidadd_temp_dir (char const *dir)
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -