# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file
#
# To ban all spiders from the entire site uncomment the next two lines:
User-Agent: *
# Disallow: /
Crawl-Delay: 5
# "more facets"
Disallow: /search/facet/
Disallow: /collections/facet/
Disallow: /institutions/facet/
# range-limit page normally only requested by AJAX for loading range limit info.
Disallow: /search/range_limit
Disallow: /collections/range_limit
Disallow: /institutions/range_limit
# "View larger" link for range limit.
Disallow: /search/range_limit_panel
Disallow: /collections/range_limit_panel
Disallow: /institutions/range_limit_panel
# disallow search results that include facet limits
# to try to prevent these crawlers from tree-walking every possible facet limit combination. 'nofollow' on the links would be my first choice
# Unclear if the [ should be %-encoded here, spec says yes,
# google robots.txt-validator suggests no, so list both.
Disallow: /search*f%5B
Disallow: /search*f[
Disallow: /collections*f%5B
Disallow: /collections*f[
# disallow download pages
Disallow: /downloads/
Disallow: /start_download/