Skip to content

Commit

Permalink
chore: change the logic to not break jobs when a domain has a network…
Browse files Browse the repository at this point in the history
… issue (temporary or permanent)
  • Loading branch information
sneko committed Feb 22, 2024
1 parent 286cef4 commit c75bab2
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 6 deletions.
54 changes: 50 additions & 4 deletions src/features/domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { PeerCertificate, TLSSocket } from 'tls';
import z from 'zod';

import { downloadFile } from '@etabli/src/common';
import { getWebsiteData, guessWebsiteNameFromPageTitles } from '@etabli/src/features/website';
import { getWebsiteData, getWebsiteDataResponse, guessWebsiteNameFromPageTitles } from '@etabli/src/features/website';
import { LitePeerCertificateSchema } from '@etabli/src/models/entities/certificate';
import { BusinessDomainError, unexpectedDomainRedirectionError } from '@etabli/src/models/entities/errors';
import { LiteRawDomainSchema, LiteRawDomainSchemaType } from '@etabli/src/models/entities/raw-domain';
Expand Down Expand Up @@ -313,7 +313,20 @@ export async function updateRobotsTxtOnDomains() {
try {
const rootUrl = new URL(`https://${rawDomain.name}`);
const robotsUrl = `${rootUrl.toString()}robots.txt`;
const result = await fetch(robotsUrl);

let result: Response;
try {
result = await fetch(robotsUrl);
} catch (error) {
if (error instanceof Error) {
handleReachabilityError(error);

// Skip this one to perform other domains
continue;
} else {
throw error;
}
}

// We want to prevent redirection on another domain to keep integrity but we let pathname redirection pass, so looking at domain only
const resultingUrl = new URL(result.url);
Expand Down Expand Up @@ -424,6 +437,7 @@ export async function updateWildcardCertificateOnDomains() {
`try to process SSL certificate for domain ${rawDomain.name} (${rawDomain.id}) ${formatArrayProgress(rawDomainIndex, rawDomains.length)}`
);

let toSkip: boolean = false;
const certificate = await new Promise<PeerCertificate | null>((resolve, reject) => {
const request = https.request(
{
Expand All @@ -445,6 +459,8 @@ export async function updateWildcardCertificateOnDomains() {
if (error instanceof Error) {
handleReachabilityError(error);

// Skip this one to perform other domains
toSkip = true;
resolve(null);
} else {
throw error;
Expand All @@ -457,6 +473,10 @@ export async function updateWildcardCertificateOnDomains() {
request.end();
});

if (toSkip) {
continue;
}

// The content has no defined structure, we just kept the library format to debug if needed main processed values as it comes
let certificateContent: object | null = null;
if (certificate) {
Expand Down Expand Up @@ -497,7 +517,20 @@ export async function updateWebsiteDataOnDomains() {

try {
const url = new URL(`https://${rawDomain.name}`);
const websiteData = await getWebsiteData(url.toString());

let websiteData: getWebsiteDataResponse;
try {
websiteData = await getWebsiteData(url.toString());
} catch (error) {
if (error instanceof Error) {
handleReachabilityError(error);

// Skip this one to perform other domains
continue;
} else {
throw error;
}
}

if (websiteData.status >= 200 && websiteData.status < 300) {
if (containsHtml(websiteData.html)) {
Expand Down Expand Up @@ -560,7 +593,20 @@ export async function updateWebsiteDataOnDomains() {
// Wait a bit to not flood this website (tiny delay in this loop because it's just the second request to this domain in this iteration)
await sleep(50);

const anotherPageData = await getWebsiteData(anotherPageUrl);
let anotherPageData: getWebsiteDataResponse;
try {
anotherPageData = await getWebsiteData(url.toString());
} catch (error) {
if (error instanceof Error) {
handleReachabilityError(error);

// Skip this one to perform other domains
continue;
} else {
throw error;
}
}

anotherPageTitle = anotherPageData.title;

break;
Expand Down
13 changes: 11 additions & 2 deletions src/utils/request.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
// When it's an error related to the remote server or the connection we skip it since we have no control over it
import * as Sentry from '@sentry/nextjs';

// This should be used close to network calls because it silents errors
// And in our case of long-running jobs we want the loop to continue despite network errors because it will be fetch again next time
// TODO: in the future we could register the error into the database for specific domains so we can tell to the list source to look at removing them if appropriate
export function handleReachabilityError(error: Error) {
if (
![
Expand All @@ -15,8 +19,13 @@ export function handleReachabilityError(error: Error) {
// Hostname/IP does not match certificate's altnames (which makes the certificate invalid)
'ERR_TLS_CERT_ALTNAME_INVALID',
'DEPTH_ZERO_SELF_SIGNED_CERT',
'UNABLE_TO_VERIFY_LEAF_SIGNATURE',
'CERT_HAS_EXPIRED',
].includes((error.cause as any)?.code || '')
) {
throw error;
// Since we do not throw error, we log them for record but we also notify Sentry so we can make the list above updated with appropriate codes
console.error(error);

Sentry.captureException(error);
}
}

0 comments on commit c75bab2

Please sign in to comment.