This is Part 4 of the 7-part Exercise 6-0. Click here to see the other parts.
Exercise 6-0 (Part 4 / 7)
Relating to section 6.1.3 of the textbook (page 105-109). Implement and test out the “Finding URLs” program. The program scans a line of input text, and automatically detect and display all the URL addresses within that line of text.
The Project
This section summarises the partitioned program in the form of C++ source and header files.
Source File List
Header File List
Source Files
main.cpp
#include <iostream> // cin, cout, endl #include <string> // string #include <vector> // vector #include "find_urls.h" // find_url #include "vcout.h" // vcout using std::cin; using std::cout; using std::endl; using std::string; using std::vector; int main() { cout << "Enter a line. This program automatically find URLs..." << endl; // Read a line of input, then find and display URLs. string line; vector<string> urls; while (getline(cin, line)) { vector<string> urls = find_urls(line); vcout(urls); } return 0; }
find_urls.cpp
#include <string> // string #include <vector> // vector #include "url_beg.h" // url_beg #include "url_end.h" // url_end using std::string; using std::vector; vector<string> find_urls(const string& s) { vector<string> ret; typedef string::const_iterator iter; iter b = s.begin(), e = s.end(); // look through the entire input while (b != e) { // look for one or more letters followed by :// b = url_beg(b, e); // if we found it if (b != e) { // get the rest of the URL iter after = url_end(b, e); // remember the URL ret.push_back(string(b, after)); // advance b and check for more URLs on this line b = after; } } return ret; }
not_url_char.cpp
<br />#include <string> // string, isalnum #include <algorithm> // find using std::string; bool not_url_char(char c) { // characters, in addition to alphanumerics, that can appear in a URL static const string url_ch = "~;/?:@=&$-_.+!*'(),"; // see whether c can appear in a URL and return the negative return !(isalnum(c) || find(url_ch.begin(), url_ch.end(), c) != url_ch.end() ); }
url_beg.cpp
#include <string> // string, isalpha #include <algorithm> // search #include "not_url_char.h" // not_url_char using std::string; string::const_iterator url_beg(string::const_iterator b, string::const_iterator e) { static const string sep = "://"; typedef string::const_iterator iter; // i marks where the separator was found iter i = b; while ((i = search(i, e, sep.begin(), sep.end() )) != e) { // make sure the separator isn't at the beginning or end of the line if (i != b && i + sep.size() != e) { // beg marks the beginning of the protocol-name iter beg = i; while (beg != b && isalpha(beg[-1])) --beg; // is there at least one appropriate character before and after the separator? if (beg != i && !not_url_char(i[sep.size()])) return beg; } // the separator we found wasn't part of a URL; advance i past this separator i += sep.size(); } return e; }
url_end.cpp
#include <string> // string #include <vector> // vector #include <algorithm> // find_if #include "not_url_char.h" // not_url_char using std::string; string::const_iterator url_end(string::const_iterator b, string::const_iterator e) { return find_if(b, e, not_url_char); }
vcout.cpp
#include <iostream> #include <string> // string #include <vector> // vector using std::cout; using std::endl; using std::string; using std::vector; int vcout(const vector<string>& v) { for (vector<string>::const_iterator i = v.begin(); i != v.end(); ++i) cout << (*i) << endl; return 0; }
Header Files
find_urls.h
#ifndef GUARD_FIND_URLS_H #define GUARD_FIND_URLS_H #include <vector> #include <string> std::vector<std::string> find_urls(const std::string&); #endif // GUARD_FIND_URLS_H
not_url_char.h
#ifndef GUARD_NOT_URL_CHAR_H #define GUARD_NOT_URL_CHAR_H bool not_url_char(char); #endif // GUARD_NOT_URL_CHAR_H
url_beg.h
#ifndef GUARD_URL_BEG_H #define GUARD_URL_BEG_H std::string::const_iterator url_beg(std::string::const_iterator, std::string::const_iterator); #endif // GUARD_URL_BEG_H
url_end.h
#ifndef GUARD_URL_END_H #define GUARD_URL_END_H #include <string> std::string::const_iterator url_end(std::string::const_iterator, std::string::const_iterator); #endif // GUARD_URL_END_H
vcout.h
#ifndef GUARD_VCOUT_H #define GUARD_VCOUT_H #include <string> #include <vector> int vcout(const std::vector<std::string>&); #endif // GUARD_VCOUT_H
Test program
Let’s do some simple tests:
- Submitting the line “Have you tried http://google.co.uk and http://bbc.co.uk ?”, the program should return: “http://google.co.uk” and “http://bbc.co.uk”.
- Submitting the line “Have you tried http:// or google.co.uk or http://google.co.uk ?”, the program should return: “http://google.co.uk”.
- Submitting the line “Have you tried http or :// or http://bbc.co.uk ?”, the program should return: “http://bbc.co.uk “.