Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/boost_1_34_1/tools/inspect/link_check.cpp @ 29

Last change on this file since 29 was 29, checked in by landauf, 16 years ago

updated boost from 1_33_1 to 1_34_1

File size: 5.8 KB
Line 
1//  link_check implementation  -----------------------------------------------//
2
3//  Copyright Beman Dawes 2002.
4//
5//  Distributed under the Boost Software License, Version 1.0.
6//  (See accompanying file LICENSE_1_0.txt or copy at
7//  http://www.boost.org/LICENSE_1_0.txt)
8
9#include "link_check.hpp"
10#include "boost/regex.hpp"
11#include "boost/filesystem/operations.hpp"
12
13namespace fs = boost::filesystem;
14
15namespace
16{
17  boost::regex url_regex(
18    "<\\s*[^>]*\\s+(?:HREF|SRC)" // HREF or SRC
19    "\\s*=\\s*\"([^\"]*)\"",
20    boost::regbase::normal | boost::regbase::icase);
21
22} // unnamed namespace
23
24namespace boost
25{
26  namespace inspect
27  {
28
29//  link_check constructor  --------------------------------------------------//
30
31   link_check::link_check()
32     : m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
33       m_bookmark_errors(0)
34   {
35   }
36
37//  inspect (all)  -----------------------------------------------------------//
38
39   void link_check::inspect(
40      const string & /*library_name*/,
41      const path & full_path )
42    {
43      // keep track of paths already encountered to reduce disk activity
44      if ( !fs::is_directory( full_path ) )
45        m_paths[ relative_to( full_path, fs::initial_path() ) ] |= m_present;
46    }
47
48//  inspect ( .htm, .html )  -------------------------------------------------//
49
50   void link_check::inspect(
51      const string & library_name,
52      const path & full_path,   // example: c:/foo/boost/filesystem/path.hpp
53      const string & contents )     // contents of file to be inspected
54    {
55      if (contents.find( "boostinspect:" "nolink" ) != string::npos) return;
56
57      string::const_iterator start( contents.begin() );
58      string::const_iterator end( contents.end() );
59      boost::match_results< string::const_iterator > what;
60      boost::match_flag_type flags = boost::match_default;
61
62      while( boost::regex_search( start, end, what, url_regex, flags) )
63      {
64        // what[0] contains the whole string iterators.
65        // what[1] contains the URL iterators.
66        do_url( string( what[1].first, what[1].second ),
67          library_name, full_path );
68
69        start = what[0].second; // update search position
70        flags |= boost::match_prev_avail; // update flags
71        flags |= boost::match_not_bob;
72      }
73    }
74
75//  do_url  ------------------------------------------------------------------//
76
77    void link_check::do_url( const string & url, const string & library_name,
78      const path & source_path ) // precondition: source_path.is_complete()
79    {
80      if ( url[0] == '#'
81        || url.find( "mailto:" ) == 0
82        || url.find( "http:" ) == 0
83        || url.find( "https:" ) == 0
84        || url.find( "ftp:" ) == 0
85        || url.find( "news:" ) == 0
86        || url.find( "javascript:" ) == 0
87        ) return;
88
89      if ( url.find( "file:" ) == 0 )
90      {
91        ++m_invalid_errors;
92        error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + url );
93        return;
94      }
95
96      // detect characters banned by RFC2396:
97      if ( url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
98      {
99        ++m_invalid_errors;
100        error( library_name, source_path, string(name()) + " invalid character in URL: " + url );
101      }
102
103      // strip url of bookmarks
104      string plain_url( url );
105      string::size_type pos( plain_url.find( '#' ) );
106      if ( pos != string::npos )
107      {
108        plain_url.erase( pos );
109        // detect characters banned by RFC2396 in bookmark:
110        if ( url.find( '#', pos+1 ) != string::npos )
111        {
112          ++m_bookmark_errors;
113          error( library_name, source_path, string(name()) + " invalid bookmark: " + url );
114        }
115      }
116
117      // strip url of references to current dir
118      if ( plain_url[0]=='.' && plain_url[1]=='/' ) plain_url.erase( 0, 2 );
119
120      // url is relative source_path.branch()
121      // convert to target_path, which is_complete()
122      path target_path;
123      try { target_path = source_path.branch_path() /= path( plain_url, fs::no_check ); }
124      catch ( const fs::filesystem_error & )
125      {
126        ++m_invalid_errors;
127        error( library_name, source_path, string(name()) + " invalid URL: " + url );
128        return;
129      }
130
131      // create a m_paths entry if necessary
132      std::pair< const string, int > entry(
133        relative_to( target_path, fs::initial_path() ), 0 );
134      m_path_map::iterator itr( m_paths.find( entry.first ) );
135      if ( itr == m_paths.end() )
136      {
137        if ( fs::exists( target_path ) ) entry.second = m_present;
138        itr = m_paths.insert( entry ).first;
139      }
140
141      // itr now points to the m_paths entry
142      itr->second |= m_linked_to;
143
144      // if target isn't present, the link is broken
145      if ( (itr->second & m_present) == 0 )
146      {
147        ++m_broken_errors;
148        error( library_name, source_path, string(name()) + " broken link: " + url );
149      }
150    }
151
152//  close  -------------------------------------------------------------------//
153
154   void link_check::close()
155   {
156     for ( m_path_map::const_iterator itr = m_paths.begin();
157       itr != m_paths.end(); ++itr )
158     {
159// std::clog << itr->first << " " << itr->second << "\n";
160       if ( (itr->second & m_linked_to) != m_linked_to
161         && (itr->first.rfind( ".html" ) == itr->first.size()-5
162          || itr->first.rfind( ".htm" ) == itr->first.size()-4)
163         // because they may be redirectors, it is OK if these are unlinked:
164         && itr->first.rfind( "index.html" ) == string::npos
165         && itr->first.rfind( "index.htm" ) == string::npos )
166       {
167         ++m_unlinked_errors;
168         path full_path( fs::initial_path() / path(itr->first, fs::no_check) );
169         error( impute_library( full_path ), full_path, string(name()) + " unlinked file" );
170       }
171     }
172   }
173
174  } // namespace inspect
175} // namespace boost
176
Note: See TracBrowser for help on using the repository browser.