Planet

navi

home

PPS

about

screenshots

download

development

forum

Context Navigation

source: downloads/boost_1_34_1/tools/inspect/link_check.cpp @ 35

Last change on this file since 35 was 29, checked in by landauf, 16 years ago
updated boost from 1_33_1 to 1_34_1
File size: 5.8 KB

Line
1	// link_check implementation -----------------------------------------------//
2
3	// Copyright Beman Dawes 2002.
4	//
5	// Distributed under the Boost Software License, Version 1.0.
6	// (See accompanying file LICENSE_1_0.txt or copy at
7	// http://www.boost.org/LICENSE_1_0.txt)
8
9	#include "link_check.hpp"
10	#include "boost/regex.hpp"
11	#include "boost/filesystem/operations.hpp"
12
13	namespace fs = boost::filesystem;
14
15	namespace
16	{
17	boost::regex url_regex(
18	"<\\s[^>]\\s+(?:HREF\|SRC)" // HREF or SRC
19	"\\s=\\s\"([^\"]*)\"",
20	boost::regbase::normal \| boost::regbase::icase);
21
22	} // unnamed namespace
23
24	namespace boost
25	{
26	namespace inspect
27	{
28
29	// link_check constructor --------------------------------------------------//
30
31	link_check::link_check()
32	: m_broken_errors(0), m_unlinked_errors(0), m_invalid_errors(0),
33	m_bookmark_errors(0)
34	{
35	}
36
37	// inspect (all) -----------------------------------------------------------//
38
39	void link_check::inspect(
40	const string & /library_name/,
41	const path & full_path )
42	{
43	// keep track of paths already encountered to reduce disk activity
44	if ( !fs::is_directory( full_path ) )
45	m_paths[ relative_to( full_path, fs::initial_path() ) ] \|= m_present;
46	}
47
48	// inspect ( .htm, .html ) -------------------------------------------------//
49
50	void link_check::inspect(
51	const string & library_name,
52	const path & full_path, // example: c:/foo/boost/filesystem/path.hpp
53	const string & contents ) // contents of file to be inspected
54	{
55	if (contents.find( "boostinspect:" "nolink" ) != string::npos) return;
56
57	string::const_iterator start( contents.begin() );
58	string::const_iterator end( contents.end() );
59	boost::match_results< string::const_iterator > what;
60	boost::match_flag_type flags = boost::match_default;
61
62	while( boost::regex_search( start, end, what, url_regex, flags) )
63	{
64	// what[0] contains the whole string iterators.
65	// what[1] contains the URL iterators.
66	do_url( string( what[1].first, what[1].second ),
67	library_name, full_path );
68
69	start = what[0].second; // update search position
70	flags \|= boost::match_prev_avail; // update flags
71	flags \|= boost::match_not_bob;
72	}
73	}
74
75	// do_url ------------------------------------------------------------------//
76
77	void link_check::do_url( const string & url, const string & library_name,
78	const path & source_path ) // precondition: source_path.is_complete()
79	{
80	if ( url[0] == '#'
81	\|\| url.find( "mailto:" ) == 0
82	\|\| url.find( "http:" ) == 0
83	\|\| url.find( "https:" ) == 0
84	\|\| url.find( "ftp:" ) == 0
85	\|\| url.find( "news:" ) == 0
86	\|\| url.find( "javascript:" ) == 0
87	) return;
88
89	if ( url.find( "file:" ) == 0 )
90	{
91	++m_invalid_errors;
92	error( library_name, source_path, string(name()) + " invalid URL (hardwired file): " + url );
93	return;
94	}
95
96	// detect characters banned by RFC2396:
97	if ( url.find_first_of( " <>\"{}\|\\^[]'" ) != string::npos )
98	{
99	++m_invalid_errors;
100	error( library_name, source_path, string(name()) + " invalid character in URL: " + url );
101	}
102
103	// strip url of bookmarks
104	string plain_url( url );
105	string::size_type pos( plain_url.find( '#' ) );
106	if ( pos != string::npos )
107	{
108	plain_url.erase( pos );
109	// detect characters banned by RFC2396 in bookmark:
110	if ( url.find( '#', pos+1 ) != string::npos )
111	{
112	++m_bookmark_errors;
113	error( library_name, source_path, string(name()) + " invalid bookmark: " + url );
114	}
115	}
116
117	// strip url of references to current dir
118	if ( plain_url[0]=='.' && plain_url[1]=='/' ) plain_url.erase( 0, 2 );
119
120	// url is relative source_path.branch()
121	// convert to target_path, which is_complete()
122	path target_path;
123	try { target_path = source_path.branch_path() /= path( plain_url, fs::no_check ); }
124	catch ( const fs::filesystem_error & )
125	{
126	++m_invalid_errors;
127	error( library_name, source_path, string(name()) + " invalid URL: " + url );
128	return;
129	}
130
131	// create a m_paths entry if necessary
132	std::pair< const string, int > entry(
133	relative_to( target_path, fs::initial_path() ), 0 );
134	m_path_map::iterator itr( m_paths.find( entry.first ) );
135	if ( itr == m_paths.end() )
136	{
137	if ( fs::exists( target_path ) ) entry.second = m_present;
138	itr = m_paths.insert( entry ).first;
139	}
140
141	// itr now points to the m_paths entry
142	itr->second \|= m_linked_to;
143
144	// if target isn't present, the link is broken
145	if ( (itr->second & m_present) == 0 )
146	{
147	++m_broken_errors;
148	error( library_name, source_path, string(name()) + " broken link: " + url );
149	}
150	}
151
152	// close -------------------------------------------------------------------//
153
154	void link_check::close()
155	{
156	for ( m_path_map::const_iterator itr = m_paths.begin();
157	itr != m_paths.end(); ++itr )
158	{
159	// std::clog << itr->first << " " << itr->second << "\n";
160	if ( (itr->second & m_linked_to) != m_linked_to
161	&& (itr->first.rfind( ".html" ) == itr->first.size()-5
162	\|\| itr->first.rfind( ".htm" ) == itr->first.size()-4)
163	// because they may be redirectors, it is OK if these are unlinked:
164	&& itr->first.rfind( "index.html" ) == string::npos
165	&& itr->first.rfind( "index.htm" ) == string::npos )
166	{
167	++m_unlinked_errors;
168	path full_path( fs::initial_path() / path(itr->first, fs::no_check) );
169	error( impute_library( full_path ), full_path, string(name()) + " unlinked file" );
170	}
171	}
172	}
173
174	} // namespace inspect
175	} // namespace boost
176

Note: See TracBrowser for help on using the repository browser.

Download in other formats: