You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

43 line
1.2 KiB

  1. # $Id$
  2. # Do not crawl CVS and .svn directories (they are 403 Forbidden anyway)
  3. User-agent: *
  4. Disallow: CVS
  5. Disallow: .svn
  6. # Be gentle
  7. User-agent: *
  8. Crawl-delay: 5
  9. # Prevent excessive search engine hits
  10. Disallow: /cgi-bin/trac.cgi
  11. Disallow: /log
  12. # Don’t crawl git repos
  13. Disallow: /git/*.git/*
  14. Disallow: /git/*.git.broken/*
  15. # "This robot collects content from the Internet for the sole purpose of
  16. # helping educational institutions prevent plagiarism. [...] we compare
  17. # student papers against the content we find on the Internet to see if we
  18. # can find similarities." (http://www.turnitin.com/robot/crawlerinfo.html)
  19. # --> fuck off.
  20. User-Agent: TurnitinBot
  21. Disallow: /
  22. # "NameProtect engages in crawling activity in search of a wide range of
  23. # brand and other intellectual property violations that may be of interest
  24. # to our clients." (http://www.nameprotect.com/botinfo.html)
  25. # --> fuck off.
  26. User-Agent: NPBot
  27. Disallow: /
  28. # "iThenticate® is a new service we have developed to combat the piracy
  29. # of intellectual property and ensure the originality of written work for
  30. # publishers, non-profit agencies, corporations, and newspapers."
  31. # (http://www.slysearch.com/)
  32. # --> fuck off.
  33. User-Agent: SlySearch
  34. Disallow: /