1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
21 * UUNET Canada, April 16, 1995
23 * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
25 * Usage: logresolve [-s filename] [-c] < access_log > new_log
28 * -s filename name of a file to record statistics
29 * -c check the DNS for a matching A record for the host.
33 * To generate meaningful statistics from an HTTPD log file, it's good
34 * to have the domain name of each machine that accessed your site, but
35 * doing this on the fly can slow HTTPD down.
37 * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
38 * resolution off. Before running your stats program, just run your log
39 * file through this program (logresolve) and all of your IP numbers will
40 * be resolved into hostnames (where possible).
42 * logresolve takes an HTTPD access log (in the COMMON log file format,
43 * or any other format that has the IP number/domain name as the first
44 * field for that matter), and outputs the same file with all of the
45 * domain names looked up. Where no domain name can be found, the IP
48 * To minimize impact on your nameserver, logresolve has its very own
49 * internal hash-table cache. This means that each IP number will only
50 * be looked up the first time it is found in the log file.
52 * The -c option causes logresolve to apply the same check as httpd
53 * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
54 * address, it looks up the IP addresses for the hostname and checks
55 * that one of these matches the original address.
71 #if APR_HAVE_NETINET_IN_H
72 #include <netinet/in.h>
77 #if APR_HAVE_SYS_SOCKET_H
78 #include <sys/socket.h>
80 #if APR_HAVE_ARPA_INET_H
81 #include <arpa/inet.h>
84 static void cgethost(struct in_addr ipnum, char *string, int check);
85 static int get_line(char *s, int n);
86 static void stats(FILE *output);
89 #define NO_ADDRESS NO_DATA
93 /* maximum line length */
98 /* maximum length of a domain name */
103 /* number of buckets in cache hash apr_table_t */
107 char *strdup (const char *str)
111 if (!(dup = (char *) malloc(strlen(str) + 1)))
113 dup = strcpy(dup, str);
120 * struct nsrec - record of nameservice for cache linked list
122 * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
123 * hostname, i.e. hostname=IP number
127 struct in_addr ipnum;
134 * statistics - obvious
139 extern __declspec(dllimport) int h_errno;
141 extern int h_errno; /* some machines don't have this in their headers */
145 /* largest value for h_errno */
147 #define MAX_ERR (NO_ADDRESS)
148 #define UNKNOWN_ERR (MAX_ERR+1)
149 #define NO_REVERSE (MAX_ERR+2)
151 static int cachehits = 0;
152 static int cachesize = 0;
153 static int entries = 0;
154 static int resolves = 0;
155 static int withname = 0;
156 static int errors[MAX_ERR + 3];
159 * cgethost - gets hostname by IP address, caching, and adding unresolvable
160 * IP numbers with their IP number as hostname, setting noname flag
163 static void cgethost (struct in_addr ipnum, char *string, int check)
165 struct nsrec **current, *new;
166 struct hostent *hostdata;
169 current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
170 (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
172 while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
173 current = &(*current)->next;
175 if (*current == NULL) {
177 new = (struct nsrec *) malloc(sizeof(struct nsrec));
180 fprintf(stderr, "Insufficient memory\n");
188 hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
190 if (hostdata == NULL) {
191 if (h_errno > MAX_ERR)
192 errors[UNKNOWN_ERR]++;
195 new->noname = h_errno;
196 name = strdup(inet_ntoa(ipnum));
200 name = strdup(hostdata->h_name);
204 fprintf(stderr, "Insufficient memory\n");
207 hostdata = gethostbyname(name);
208 if (hostdata != NULL) {
211 for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
212 if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
217 if (hostdata == NULL) {
218 fprintf(stderr, "Bad host: %s != %s\n", name,
220 new->noname = NO_REVERSE;
222 name = strdup(inet_ntoa(ipnum));
223 errors[NO_REVERSE]++;
227 new->hostname = name;
228 if (new->hostname == NULL) {
230 fprintf(stderr, "Insufficient memory\n");
237 /* size of string == MAXDNAME +1 */
238 strncpy(string, (*current)->hostname, MAXDNAME);
239 string[MAXDNAME] = '\0';
243 * prints various statistics to output
246 static void stats (FILE *output)
250 struct nsrec *current;
251 char *errstring[MAX_ERR + 3];
253 for (i = 0; i < MAX_ERR + 3; i++)
254 errstring[i] = "Unknown error";
255 errstring[HOST_NOT_FOUND] = "Host not found";
256 errstring[TRY_AGAIN] = "Try again";
257 errstring[NO_RECOVERY] = "Non recoverable error";
258 errstring[NO_DATA] = "No data record";
259 errstring[NO_ADDRESS] = "No address";
260 errstring[NO_REVERSE] = "No reverse entry";
262 fprintf(output, "logresolve Statistics:\n");
264 fprintf(output, "Entries: %d\n", entries);
265 fprintf(output, " With name : %d\n", withname);
266 fprintf(output, " Resolves : %d\n", resolves);
267 if (errors[HOST_NOT_FOUND])
268 fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
269 if (errors[TRY_AGAIN])
270 fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
272 fprintf(output, " - No data : %d\n", errors[NO_DATA]);
273 if (errors[NO_ADDRESS])
274 fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
275 if (errors[NO_REVERSE])
276 fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
277 fprintf(output, "Cache hits : %d\n", cachehits);
278 fprintf(output, "Cache size : %d\n", cachesize);
279 fprintf(output, "Cache buckets : IP number * hostname\n");
281 for (i = 0; i < BUCKETS; i++)
282 for (current = nscache[i]; current != NULL; current = current->next) {
283 ipstring = inet_ntoa(current->ipnum);
284 if (current->noname == 0)
285 fprintf(output, " %3d %15s - %s\n", i, ipstring,
288 if (current->noname > MAX_ERR + 2)
289 fprintf(output, " %3d %15s : Unknown error\n", i,
292 fprintf(output, " %3d %15s : %s\n", i, ipstring,
293 errstring[current->noname]);
300 * gets a line from stdin
303 static int get_line (char *s, int n)
307 if (!fgets(s, n, stdin))
309 cp = strchr(s, '\n');
315 int main (int argc, char *argv[])
317 struct in_addr ipnum;
318 char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
321 #if defined(WIN32) || defined(NETWARE)
322 /* If we apr'ify this code, apr_pool_create/apr_pool_destroy
323 * should perform the WSAStartup/WSACleanup for us.
326 WSAStartup(MAKEWORD(2, 0), &wsaData);
331 for (i = 1; i < argc; i++) {
332 if (strcmp(argv[i], "-c") == 0)
334 else if (strcmp(argv[i], "-s") == 0) {
336 fprintf(stderr, "logresolve: missing filename to -s\n");
343 fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
348 for (i = 0; i < BUCKETS; i++)
350 for (i = 0; i < MAX_ERR + 2; i++)
353 while (get_line(line, MAXLINE)) {
357 if (!apr_isdigit(line[0])) { /* short cut */
362 bar = strchr(line, ' ');
365 ipnum.s_addr = inet_addr(line);
366 if (ipnum.s_addr == 0xffffffffu) {
376 cgethost(ipnum, hoststring, check);
378 printf("%s %s\n", hoststring, bar + 1);
383 #if defined(WIN32) || defined(NETWARE)
387 if (statfile != NULL) {
389 fp = fopen(statfile, "w");
391 fprintf(stderr, "logresolve: could not open statistics file '%s'\n"