##
#	SWISH++
#	swish++.conf
#
#	This is a template configuration file.  It contains every configuration
#	variable, lists which executable(s) it's used with, and gives a brief
#	explanation.  Use this as a starting point for your own configuration
#	file.
#
#	Notes:
#	------
#
#	1. Sets of values are separated by whitespace.
#
#	2. The same variable may be specified on more than one line.  For sets,
#	   the value is the union of all the values; for all others, the value
#	   is the last value specified.
#
#	3. There is a special "infinity" numeric value.
#
#	4. Boolean values are one of: true, t, false, f, yes, y, no, n.  Case
#	   is irrelevant.
#
#	5. COMMENTS ARE TREATED AS SUCH ONLY IF THEY ARE ON LINES BY
#	   THEMSELVES!
##

AssociateMeta		yes
#
# used by: index; when "no", same as the -A option.
#
#	Associate words with meta names during indexing.

#ExcludeClass		no_index
#
# used by: index; same as the -C option.
#
#	For HTML and XHTML files only (as specified by IncludeFile), a set of
#	class names of HTML and XHTML elements whose content text is not to be
#	indexed.

ExcludeFile		*.gif *.jpg
#
# used by: index, extract; same as the -E option.
#
#	A set of filename patterns of files not to index or extract.  Case is
#	significant.  You normally specify either IncludeFile or ExtractFile
#	-OR- ExcludeFile (whichever is easier to specify), but not both.

# Mail (RFC 822) and News
# -----------------------
IncludeMeta		Bcc Cc Comments Content-Description From Keywords
IncludeMeta		Newsgroups Resent-To Subject To

# vCard
# -----
#IncludeMeta		adr=address categories class label=address fn=name
#IncludeMeta		nickname note org role title

# Unix manual pages
# -----------------
IncludeMeta		author bugs caveats description diagnostics environment
IncludeMeta		errors examples exit-status files history name notes
IncludeMeta		options return-value see-also synopsis warnings

#ExcludeMeta		meta1 meta2 metaN
#
# used by: index; same as the -m or -M option, respectively
#
#	For HTML and XHTML files, the values of meta NAME attributes for which
#	the words in the values of the associated CONTENT attribute should be
#	indexed or not indexed, respectively.
#
#	For mail and news files, the values of the of header names for which
#	the words in the associated values of the headers should be indexed or
#	not indexed, respectively.
#
#	For mail files only, the values of the vCard fields for which the words
#	in the associated values of the fields should be indexed or not
#	indexed, respectively.
#
#	For manual page files, the values of the section headings for which the
#	words in the associated sections should be indexed or not indexed,
#	respectively.
#
#	Case is irrelevant.  You normally specify either IncludeMeta or
#	ExcludeMeta (whichever is easier to specify), but not both.
#	
#	By default, for IncludeMeta, words for all meta names are indexed.
#	Specifying at least one meta name via IncludeMeta changes that so that
#	only the words associated with a member of the set of meta names
#	explicitly specified are indexed.
#
#	If you're indexing mail or news files, it is recommended that you use
#	the IncludeMeta values as given above.
#
#	Additionally, meta names can be reassigned.

ExtractExtension	txt
#
# used by extract; same as the -x option.
#
#	The extension to append to filenames during extraction.

#ExtractFile		*.doc *.ppt *.xls
#
# used by extract; same as the -e option.
#
#	A set of filename patterns of files to extract.  Case is significant.
#	Filename patterns specified here MUST NOT also be specified in
#	ExcludeFile below.
#
#	You should modify the set to include only those that you are actually
#	using for increased performance.

#ExtractFilter		no
#
# used by extract; when "yes", same as the -f option.
#
#	When "yes", extract a single file to standard output.

#FilesGrow		100
#
# used by: index; same as the -g option.
#
#	The number of files to grow reserved space for when incrementally
#	indexing.  The number may be specified as either an absolute number or
#	a percentage (when a trailing % is present).

#FilesReserve		1000
#
# used by: index; same as the -F option.
#
#	The initial number of files to reserve space for.  During indexing,
#	this can be exceeded without any problem, but there will be a slight
#	performance penalty.
#
#	If you know approximately how many files you have, modify the above
#	value!

#FilterAttachment application/*word	extract -f %f > @%F.txt
FilterAttachment application/pdf	pdftotext %f @%F.txt
#FilterAttachment application/postscript	pstotext %f > @%F.txt
#
# used by: index; no option equivalent
#
#	Filter e-mail attachments having certain MIME types prior to indexing.
#	MIME type patterns MUST be specified entirely in lower case.
#
#	See http://www.research.compaq.com/SRC/virtualpaper/pstotext.html for
#	information about the pstotext program.

FilterFile *.bz2		bunzip2 -c %f > @%F
FilterFile *.gz		gunzip -c %f > @%F
FilterFile *.Z		uncompress -c %f > @%F
FilterFile *.pdf	pdftotext %f @%F.txt
#FilterFile *.ps	pstotext %f > @%F.txt
#
# used by: index, extract; no option equivalent.
#
#	Filter files having certain extensions prior to either indexing or
#	extraction.
#
#	See http://www.research.compaq.com/SRC/virtualpaper/pstotext.html for
#	information about the pstotext program.

#FollowLinks		no
#
# used by: index, extract; same as the -l option.
#
#	Follow symbolic links during indexing or extraction.

#IncludeFile	text	*.txt
#IncludeFile	HTML	*.asp *.*htm* *.jsp
#IncludeFile	Mail	*.m
#IncludeFile	RTF	*.rtf
#
# used by: index; same as the -e option.
#
#	A set of filename patterns of files to index and the modules they map
#	to.  Case is irrelevant for the module name but significant for the
#	patterns.  Filename patterns specified here MUST NOT also be specified
#	in ExcludeFile.
#
#	You should modify the set to include only those that you are actually
#	using for increased performance.

#Incremental		no
#
# used by: index; when "yes", same as the -I option.
#
#	When "yes", incrementally index files and add them to an existing
#	index.

IndexFile		swish++.index
#
# used by: index, search; same as the -i option.
#
#	The name of the index file either generated or searched.

#PidFile			/var/run/search.pid
#
# used by: search; same as the -P option
#
#	If "search" is run as a daemon, record its process ID in this file.

#RecurseSubdirs		yes
#
# used by: index, extract; when "no", same as the -r option.
#
#	When "no", do not recursively index the files in subdirectories, that
#	is when a directory is encountered, all the files in that directory are
#	indexed (modulo the filename patterns specified via the IncludeFile,
#	ExcludeFile, or ExtractFile variables), but subdirectories encountered
#	are ignored and therefore the files contained in them are not indexed.
#	(This variable is most useful when specifying the directories and files
#	via standard input.)  The default is to index the files in
#	subdirectories recursively.

#ResultsMax		100
#
# used by: search; same as the -m option.
#
#	The maximum number of results to return overriding the compiled-in
#	default (which is usually 100).

#ResultSeparator	" "
#
# used by: search; same as the -R option
#
#	The string to separate the parts in a search result when ResultsFormat
#	is "classic".  Either single or double quotes can be used to preserve
#	whitespace.  Quotes are stripped only if they match.

#ResultsFormat		classic
#
# used by: search; same as the -F option
#
#	The output format of search results: either "classic" or "XML".

#SearchBackground	yes
#
# used by: search; when "no", same as the -B option.
#
#	When "yes" and SearchDaemon is not "none", automatically detach from
#	the terminal and run in the background.

#SearchDaemon		none
#
# used by: search; same as the -b option.
#
#	When not "none", run "search" as a daemon process listening to either a
#	Unix domain ("unix") or TCP socket ("tcp") or both ("both") for
#	requests.

#SocketAddress		*:1967
#
# used by: search; same as the -a option.
#
#	Default IP address and port of the TCP socket; used only when
#	SearchDaemon  is either "tcp" or "both".

#SocketFile		/tmp/search.socket
#
# used by: search; same as the -u option.
#
#	Default name of the Unix domain socket file; used only when
#	SearchDaemon is either "unix" or "both".

#SocketQueueSize		511
#
# used by: search; same as the -q option.
#
#	Maximum number of queued connections for a socket; used only when
#	SearchDaemon is not "none".  The default 511 value is taken from
#	httpd.h in Apache:
#
#		It defaults to 511 instead of 512 because some systems store it
#		as an 8-bit datatype; 512 truncated to 8-bits is 0, while 511
#		is 255 when truncated.
#
#	If it's good enough for Apache, it's good enough for us.

#SocketTimeout		10
#
# used by search; same as the -o option.
#
#	Number of seconds a client has to complete a search request before
#	being disconnected.  This is to prevent a client from connecting, not
#	completing a request, and causing the thread servicing the request to
#	wait forever.  This is used only when SearchDaemon is not "none".

#StemWords		no
#
# used by: search; when "yes", same as the -s option.
#
#	Perform stemming (suffix stripping) on words during searches.  Words
#	that end in the wildcard character are not stemmed.

#StopWordFile		custom_stop_word_file
#
# used by: index, extract; same as the -s option.
#
#	The name of a file containing the set of stop-words to use instead of
#	the built-in set.

#TempDirectory		/tmp
#
# used by: index
#
#	Directory to use for temporary files during indexing.  If your OS
#	mounts swap space on /tmp, as indexing progresses and more files get
#	created in /tmp, you will have less swap space, indexing will get
#	slower, and you may run out of memory.  If this is the case, you can
#	specify a directory on a real filesystem, i.e., one on a physical
#	disk.  The directory must exist.

#ThreadsMin		5
#ThreadsMax		100
#
# used by: search; same as the -t or -T option, respectively.
#
#	The minimum/maximum number of simultanous threads, respectively; used
#	only when SearchDaemon is not "none".

#ThreadTimeout		30
#
# used by: search; same as the -O option.
#
#	Number of seconds until an idle spare thread times out and destroys
#	itself; used only when SearchDaemon is not "none".

#TitleLines		12
#
# used by: index; same as the -t option.
#
#	For HTML and XHTML files only, the maximum number of lines into a file
#	to look at for HTML and XHTML <TITLE> tags.  The default is 12.  Larger
#	numbers slow indexing.

#Verbosity		0
#
# used by: index, extract; same as the -v option.
#
#	Print additional information to standard output during indexing or
#	extraction.  The verbosity levels are 0-4; see index(1) or extract(1)
#	for details.

#WordFilesMax		infinity
#
# used by: index, search; same as the -f option.
#
#	The maximum number of files a word may occur in before it is discarded
#	as being too frequent.  The default is infinity.

#WordPercentMax		100
#
# used by: index, search; same as the -p option.
#
#	The maximum percentage of files a word may occur in before it is
#	discarded as being too frequent.  If you want to keep all words
#	regardless, specify 101.

# the end