forked from HTTPArchive/legacy.httparchive.org
-
Notifications
You must be signed in to change notification settings - Fork 0
/
addsite.php
127 lines (109 loc) · 3.8 KB
/
addsite.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
require_once("ui.inc");
require_once("urls.inc");
$gTitle = "Add a Site";
$gRurl = getParam("rurl", "");
$gRurl = strtolower($gRurl);
$is_valid_url = false;
if ( $gRurl ) {
// Do some basic validation
$is_valid_url = preg_match("/^(http|https):\/\/([A-Z0-9][A-Z0-9_-]*(?:\.[A-Z0-9][A-Z0-9_-]*)+):?(\d+)?\/?/i", $gRurl);
if ( $is_valid_url && FALSE === strpos($gRurl, "/", 10) ) {
$gRurl .= "/";
}
}
?>
<!doctype html>
<html>
<head>
<title><?php echo $gTitle ?></title>
<meta charset="UTF-8">
<?php echo headfirst() ?>
<link type="text/css" rel="stylesheet" href="style.css" />
</head>
<body>
<?php echo uiHeader($gTitle); ?>
<?php
if ( $gRurl ) {
if ( ! $is_valid_url ) {
echo "<p class=warning>The URL entered is invalid: $gRurl</p>\n";
}
else {
$urlObj = getUrl($gRurl, true);
$bAdd = false;
if ( $urlObj ) {
if ( $urlObj['optout'] ) {
$bAdd = false;
echo "<p class=warning>The owner of $gRurl has opted out of the HTTP Archive.</p>\n";
}
else if ( ! $urlObj['other'] ) {
// If it exists but it's not marked "other" - then add it and set other=true;
$bAdd = true;
}
else {
$bAdd = false;
$query = "select max(pageid) as pageid from $gPagesTable where url='$existingUrl';";
$pageid = doSimpleQuery($query);
if ( $pageid ) {
echo "<p class=warning>$gRurl is already in the list of URLs. See the <a href='viewsite.php?pageid=$pageid'>latest results</a>.</p>\n";
}
else {
echo "<p class=warning>$gRurl is already in the list of URLs but doesn't have any data yet. It will be included in the next crawl.</p>\n";
}
}
}
else {
// We get A LOT of requests to add deep pages (eg, "http://www.youtube.com/blahblah").
// But we only allow one page per hostname if the URL is not in the list of top sites.
// Here we check if this is a deep URL.
$rooturl = substr($gRurl, 0, strpos($gRurl, "/", 10));
$rooturlObj = getUrl($rooturl, true);
if ( $rooturlObj ) {
echo "<p class=warning>$gRurl will not be added because <a href='{$rooturlObj['url']}'>{$rooturlObj['url']}</a> is already in the crawl. If a URL is not in the list of <a href='about.php#listofurls'>top URLs</a> it can only be added if there are no other URLs with the same hostname already in the crawl.</p>\n";
}
else {
$bAdd = true;
}
}
if ( $bAdd ) {
addSite($gRurl); // queue it for adding
echo "<p class=warning>$gRurl will be added within five business days and will be included in the next crawl after that.</p>\n";
}
}
}
?>
<h1><?php echo $gTitle ?></h1>
<script type="text/javascript">
function confirmAdd() {
var url = document.getElementById("rurl").value;
if ( ! url ) {
alert("Please select a URL.");
}
else if ( confirm("This will add " + url + " to the HTTP Archive crawls and record data about the site. Do you want to continue?") ) {
return true;
}
return false;
}
</script>
<p>
Enter the URL you want to add to the HTTP Archive.
</p>
<form name=addsite action="<? echo $_SERVER['PHP_SELF'] ;?>" onsubmit="return confirmAdd()">
URL:
<span class="ui-widget" style="font-size: 1em;"> <input id="rurl" name="rurl" style="margin: 0;" size=35 /> </span>
<input type="submit" value="Add" name="submit" placeholder="http://www.example.com" style="margin: 0; margin-left: 1em;" />
</form>
<?php echo uiFooter() ?>
</body>
</html>