Skip to content

Commit

Permalink
Separate sanitization and add correctable concern
Browse files Browse the repository at this point in the history
This is so that we can explicitly declare email addresses for companies
that have been manually verified against privacy policies.
  • Loading branch information
nshki committed Jun 11, 2024
1 parent ace2e8f commit 9e2b53b
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 20 deletions.
2 changes: 1 addition & 1 deletion app/mailers/deletion_request_mailer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def deletion_request(serialized_deletion_request)
password: @deletion_request.smtp_config.password,
address: @deletion_request.smtp_config.address,
port: @deletion_request.smtp_config.port,
authentication: :login
authentication: @deletion_request.smtp_config.authentication
}

mail \
Expand Down
20 changes: 2 additions & 18 deletions app/models/company.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
class Company < ApplicationRecord
include CaliforniaDataBrokersRequestable
include DataBrokersWatchRequestable
include Sanitizable
include EmailCorrectable # Important to be after `Sanitizable`

CATEGORIES = {
california_data_broker: "california_data_broker",
Expand All @@ -29,24 +31,6 @@ class Company < ApplicationRecord
validates :name, presence: true
validates :website, presence: true, uniqueness: true

before_save :domainify_website!
before_save :downcase_email!

# Ensures we're only saving website domains to better enforce uniqueness.
#
# @return [void]
def domainify_website!
hostified_website = URI(website).host || website
self.website = hostified_website.gsub(/^www\./, "")
end

# Ensures we're normalizing emails before saving.
#
# @return [void]
def downcase_email!
email.downcase!
end

# Generates a human-readable source from which this company was gathered.
#
# @return [String]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@ module Company::DataBrokersWatchRequestable
end

class_methods do
# Attempts to fetch and update all companies available via the DataBrokersWatch API.
#
# @return [void]
def update_data_brokers_watch_companies
response = Net::HTTP.get(API_URI)
response_json = JSON.parse(response)
companies = response_json.dig("DataBrokers")

companies.each do |company|
email = company.dig("Emails").split(";").first
emails = company.dig("Emails").split(";")
email = Company.most_likely_email(emails)
name = company.dig("Company Name") || company.dig("Domain")
website = company.dig("Domain")
next if email.blank? || name.blank? || website.blank?
Expand Down
228 changes: 228 additions & 0 deletions app/models/concerns/company/email_correctable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
module Company::EmailCorrectable
extend ActiveSupport::Concern

# List of emails that have been verified by website. If a website here does not match
# domains with the associated email address, it is because the domain resolved to a
# different one.
VERIFIED_EMAILS_BY_WEBSITE = {
# Bounce list. The previous email for each of these sites from sources were invalid,
# so we're using the emails listed on each site's privacy policy.
"0ptimus.com" => "[email protected]", # https://0ptimus.com/privacy-policy/
"180bytwo.com" => "[email protected]", # https://anteriad.com/privacy-policy
"adttribution.com" => "[email protected]", # https://adttribution.com/privacy-policy.html
"alc.com" => "[email protected]", # https://adstradata.com/privacy-policy/
"altisource.com" => "[email protected]", # https://www.altisource.com/Privacy-Policy/
"biscred.com" => "[email protected]", # https://www.biscred.com/privacy-policy
"carevoyance.com" => "[email protected]", # https://www.definitivehc.com/privacy-center/notices
"catalina.com" => "[email protected]", # https://www.catalina.com/legal
"cbcinnovis.com" => "[email protected]", # https://www.factualdata.com/privacy/privacy-policy
"compactlists.com" => "[email protected]", # https://deepsync.com/privacy-policy/
"dataverify.com" => "[email protected]", # https://info.dataverify.com/privacy
"digicenter.com" => "[email protected]", # https://www.collectivedata.io/website-visitor-privacy-policy/
"dotin.us" => "[email protected]", # https://www.opensesame.com/privacy
"easybackgrounds.com" => "[email protected]", # https://ghrr.com/privacy-policy
"enginemediaexchange.com" => "[email protected]", # https://cadent.tv/website-privacy-policy/
"entelo.com" => "[email protected]", # https://www.entelo.com/legal/privacy
"infortal.com" => "[email protected]", # https://infortal.com/privacy-policy/
"kbmg.com" => "[email protected]", # https://www.vml.com/privacy-policy
"killi.io" => "[email protected]", # https://www.reklaimyours.com/privacy-policy
"localpages.com" => "[email protected]", # https://localpages.com/privacy
"lscmarketinggroup.com" => "[email protected]", # https://lscmarketinggroup.com/privacy-policy/
"monocl.com" => "[email protected]", # https://www.definitivehc.com/privacy-center/notices
"nielsen.com" => "[email protected]", # https://www.nielsen.com/legal/privacy-principles/
"paramountdirectmarketing.com" => "[email protected]", # https://paramountdirectmarketing.com/privacy-policy.php
"paramountlists.com" => "[email protected]", # https://paramountdirectmarketing.com/privacy-policy.php
"possiblenow.com" => "[email protected]", # https://www.possiblenow.com/privacy-statement
"proximaplatform.com" => "[email protected]", # https://www.proximaplatform.com/ccpa
"quorum.us" => "[email protected]", # https://www.quorum.us/privacy-policy/
"rpmleader.com" => "[email protected]", # https://rpmleader.com/privacy-policy
"simiocloud.com" => "[email protected]", # https://simiocloud.com/simiocloud-products-services-privacy-policy/
"smarteinc.com" => "[email protected]", # https://www.smarte.pro/privacy-policy/english
"sterling.ai" => "[email protected]", # https://sterling.ai/privacy-policy/
"sterlingstrategies.co" => "[email protected]", # https://sterling.ai/privacy-policy/
"superpages.com" => "[email protected]", # https://www.thryv.com/privacy/
"valassis.com" => "[email protected]", # https://www.vericast.com/privacy-policy-2023/
"yobi.ai" => "[email protected]", # https://yobi.ai/privacy-policy/

# General list. The previous email for each of these sites pointed toward a specific
# individual working at these companies, but those aren't reliable especially if
# there's employee churn. This, again, uses the emails listed on each site's privacy
# policy.
"4-eyes.ai" => "[email protected]", # https://4-eyes.ai
"accucomcorp.com" => "[email protected]", # https://www.infopay.com/privacy
"achcoop.com" => "[email protected]", # https://www.achcoop.com/privacy-policy
"acxiom.com" => "[email protected]", # https://www.acxiom.com/privacy/privacy-policy-www-acxiom-com/
"advantagesolutions.net" => "[email protected]", # https://advantagesolutions.net/privacy-policy/
"agrgroupinc.com" => "[email protected]", # https://www.agrgroupinc.com/
"altairdata.com" => "[email protected]", # https://www.altairdata.com/altair-privacy-policy/
"anteriad.com" => "[email protected]", # https://anteriad.com/privacy-policy
"applecart.co" => "[email protected]", # https://www.applecart.co/privacy
"arrakis.ai" => "[email protected]", # https://www.arrakis.ai/arrakis-privacy-policy
"astoriacompany.com" => "[email protected]", # https://astoriacompany.com/privacy-policy/
"attomdata.com" => "[email protected]", # https://www.attomdata.com/privacy/
"audigent.com" => "[email protected]", # https://audigent.com/privacypolicy/
"autoweb.com" => "[email protected]", # https://www.autoweb.com/
"backgroundchecks.com" => "[email protected]", # https://www.backgroundchecks.com/privacy-policy
"backgroundsonline.com" => "[email protected]", # https://clients.backgroundsonline.com/policies/privacy
"bigdbm.com" => "[email protected]", # https://bigdbm.com/privacy-policy/
"bookyourdata.com" => "[email protected]", # https://www.bookyourdata.com/privacy-policy
"brightcheck.com" => "[email protected]", # https://brightcheck.com/privacy-policy/
"catalist.us" => "[email protected]", # https://catalist.us/privacy-policy/
"censia.com" => "[email protected]", # https://www.censia.com/privacy-policy/
"cicredit.com" => "[email protected]", # https://www.ciccredit.com/information-security-policy-and-guideline/
"claritas.com" => "[email protected]", # https://claritas.com/privacy-legal/
"clickagy.com" => "[email protected]", # https://www.clickagy.com/privacy/
"coleinformation.com" => "[email protected]", # https://coleinformation.com/contact-us/
"compile.com" => "[email protected]", # https://www.compile.com/privacy/
"completemailinglists.com" => "[email protected]", # https://completemailinglists.com/privacy-policy
"completemedicallists.com" => "[email protected]", # https://completemedicallists.com/privacy.php
"connectedinvestors.com" => "[email protected]", # https://connectedinvestors.com/content/privacy-policy
"connextdigital.com" => "[email protected]", # https://connextdigital.com/privacy-policy/
"contentgine.com" => "[email protected]", # https://legal.pharosiq.com/privacy-policy
"costar.com" => "[email protected]", # https://www.costar.com/about/privacy-notice
"crosspixel.net" => "[email protected]", # https://crosspixel.net/privacy-policy/
"dataaxlenonprofit.com" => "[email protected]", # https://www.dataaxlenonprofit.com/privacy-policy/
"datadelivers.com" => "[email protected]", # https://datadelivers.com/privacy-policy/
"datadirectmarketing.com" => "[email protected]", # https://datadirectmarketing.com/
"datasys.com" => "[email protected]", # https://datasys.com/privacy-policy
"deloitte.com" => "[email protected]", # https://www2.deloitte.com/us/en/footerlinks1/privacy.html
"deluxe.com" => "[email protected]", # https://www.deluxe.com/policy/privacy/
"digdevdirect.com" => "[email protected]", # https://www.digdevdirect.com/privacy-policy/
"disqus.com" => "[email protected]", # https://disqus.com/privacy-policy/
"emerges.com" => "[email protected]", # https://www.emerges.com/Privacy-Policy_ep_42-1.html
"evorra.com" => "[email protected]", # https://evorra.com/privacy-data-policies/privacy-policy/
"fetcher.ai" => "[email protected]", # https://fetcher.ai/privacy
"findem.ai" => "[email protected]", # https://www.findem.ai/privacy-policy
"finthrive.com" => "[email protected]", # https://finthrive.com/privacy-policy
"firstorion.com" => "[email protected]", # https://firstorion.com/first-orion-global-privacy-and-compliance-dashboard/
"fourthwall.tv" => "[email protected]", # https://www.fourthwall.tv/privacy-fourthwall
"fusedleads.com" => "[email protected]", # https://www.fusedleads.com/contact-us/
"fushiamedia.com" => "[email protected]", # https://fushiamedia.com/privacy
"gladiknow.com" => "[email protected]", # https://gladiknow.com/privacy-policy
"harmonresearch.com" => "[email protected]", # https://www.harmonresearch.com/privacy-policy
"healthcare.com" => "[email protected]", # https://www.healthcare.com/privacy-policy
"healthwisedata.com" => "[email protected]", # https://www.healthwisedata.com/privacy
"homeownersmarketingservices.com" => "[email protected]", # https://homeownersmarketingservices.com/privacy-policy/
"hunter.io" => "[email protected]", # https://hunter.io/privacy-policy
"idengine.com" => "[email protected]", # https://idengine.com/privacy-policy/
"inmarket.com" => "[email protected]", # https://inmarket.com/privacy/
"intentgine.com" => "[email protected]", # https://intentgine.com/privacy-policy/
"intentiq.com" => "[email protected]", # https://www.intentiq.com/technology-privacy-policy/
"intentmacro.com" => "[email protected]", # https://intentmacro.com/privacy-policy/
"internetbrands.com" => "[email protected]", # https://www.internetbrands.com/privacy/privacy-main
"irys.us" => "[email protected]", # https://irys.us/privacy-policy
"jdmlistservices.com" => "[email protected]", # https://jdmlistservices.com/privacy-policy
"jkidconsulting.com" => "[email protected]", # https://jkidconsulting.com/
"kalibrate.com" => "[email protected]", # https://kalibrate.com/privacy-policy/
"keono.com" => "[email protected]", # https://keono.com/privacy-policy/
"komodohealth.com" => "[email protected]", # https://www.komodohealth.com/privacy-notice
"l2-data.com" => "[email protected]", # https://l2-data.com/l2-privacy-policy/
"lciinc.com" => "[email protected]", # https://g2risksolutions.com/privacy-notice/
"lead.co" => "[email protected]", # https://lead.co/privacy
"lead411.com" => "[email protected]", # https://www.lead411.com/privacy-policy/
"lighthouselist.com" => "[email protected]", # https://www.lighthouselist.com/privacy-policy
"lionsharemarketing.com" => "[email protected]", # https://www.lionsharemarketing.com/privacy-policy/
"listservicedirect.com" => "[email protected]", # https://listservicedirect.com/privacy-policy/
"locatesmarter.com" => "[email protected]", # https://locatesmarter.com/privacy-policy/
"lsmapps.com" => "[email protected]", # https://www.lsmapps.com/privacy-policy
"m1-data.com" => "[email protected]", # https://m1-data.com/privacy-policy/
"marketforcecorp.com" => "[email protected]", # https://marketforcecorp.com/privacy-policy/
"mchdata.com" => "[email protected]", # https://www.mchdata.com/about/privacy-policy
"mediasourcesolutions.com" => "[email protected]", # https://www.mediasourcesolutions.com/contact-us/
"medprosystems.com" => "[email protected]", # https://www.medprosystems.com/privacy-policy/
"minervadata.xyz" => "[email protected]", # https://realtors.minervadata.xyz/privacy-policy
"mobilewalla.com" => "[email protected]", # https://www.mobilewalla.com/website-privacy
"modfxlabs.com" => "[email protected]", # https://modfxlabs.com/
"mrginc.com" => "[email protected]", # https://www.mrginc.com/privacy-policy
"multimedialists.com" => "[email protected]", # https://multimedialists.com/privacy-policy/
"myfico.com" => "[email protected]", # https://www.myfico.com/policy/privacy-policy
"newfrontierdata.com" => "[email protected]", # https://newfrontierdata.com/privacy-policy/
"nextwavemarketingstrategies.com" => "[email protected]", # https://agedleadstore.com/privacy-policy/
"owneriq.com" => "[email protected]", # https://www.owneriq.com/privacy-notice
"pacificeast.com" => "[email protected]", # https://www.pacificeast.com/privacy-policy/
"partnerscredit.com" => "[email protected]", # https://partnerscredit.com/privacy-policy.html
"popacta.com" => "[email protected]", # https://popacta.com/privacy-policy/
"porchgroupmedia.com" => "[email protected]", # https://porchgroupmedia.com/privacy-policy/
"quantcast.com" => "[email protected]", # https://legal.quantcast.com/#products-and-services-privacy-policy
"quinstreet.com" => "[email protected]", # https://www.quinstreet.com/privacy-notice/
"radaris.com" => "[email protected]", # https://radaris.com/page/privacy
"realeflow.com" => "[email protected]", # https://cdn.realeflow.com/privacy-policy.pdf
"realsourcedata.com" => "[email protected]", # https://www.realsourcedata.com/privacy-policy
"refinition.com" => "[email protected]", # https://refinition.com/privacy-policy/
"resonate.com" => "[email protected]", # https://www.resonate.com/privacy-policy/
"revealmobile.com" => "[email protected]", # https://revealmobile.com/privacy/
"sheerid.com" => "[email protected]", # https://www.sheerid.com/global-privacy-policy/
"siteimpact.com" => "[email protected]", # https://siteimpact.com/privacypolicy.php
"slashdotmedia.com" => "[email protected]", # https://slashdotmedia.com/privacy-statement/
"slintel.com" => "[email protected]", # https://6sense.com/privacy-policy/
"smartmove.us" => "[email protected]", # https://www.smartmove.us/company/privacy
"socialgist.com" => "[email protected]", # https://socialgist.com/privacy-policy/
"statlistics.com" => "[email protected]", # https://statlistics.com/privacy-policy/
"studentclearinghouse.org" => "[email protected]", # https://www.studentclearinghouse.org/privacy-policy/
"telephonelists.biz" => "[email protected]", # https://www.telephonelists.biz/privacy-policy-of-telephonelists-biz/
"thebridgecorp.com" => "[email protected]", # https://www.thebridgecorp.com/privacy-policy/
"thedma.org" => "[email protected]", # https://www.ana.net/privacy.html
"towerdata.com" => "[email protected]", # https://atdata.com/privacy-policy/
"traversedata.com" => "[email protected]", # https://www.traversedata.com/index.html%3Fp=123.html
"verisk.com" => "[email protected]", # https://www.verisk.com/company/contact/
"viantinc.com" => "[email protected]", # https://www.viantinc.com/wp-content/uploads/2024/01/Viant-Privacy-Policy-Platform-1.29.2024-FINAL.pdf
"visualvisitor.com" => "[email protected]", # https://files.elfsightcdn.com/a89e75f3-d14f-4297-bccb-e21fd3d60d03/f5dc35df-4c04-4421-8611-f07678e579bf/Privacy-Policy-v-3-2023-B.pdf
"wisdommediagroupllc.com" => "[email protected]" # https://wisdommediagroupllc.com/privacy.php
}.freeze

# Stack-ranked list of keywords that indicate the most relevant email address for a
# list of emails.
LIKELY_EMAIL_KEYWORDS = %w[
privacy
dataprivacy
dpo
legal
compliance
consumer
support
ccpa
data
help
contact
info
admin
hello
].freeze

included do
before_save :correct_email_if_known!

scope :with_corrected_email, -> { where(website: VERIFIED_EMAILS_BY_WEBSITE.keys) }
end

class_methods do
# Out of a list of emails, returns the one that is most likely to be the correct
# one.
#
# @param emails [Array<String>]
# @return [String]
def most_likely_email(emails)
likely_email = nil

LIKELY_EMAIL_KEYWORDS.each do |keyword|
likely_email = emails.find { |email| email.match?(/#{keyword}.*@/) }
break if likely_email.present?
end

likely_email || emails.first
end
end

# Ensures we're setting the correct email for a company if we've already verified
# it. This is in place to prevent saving company records with emails that:
#
# - Are not valid
# - Are not the right point person
# - Are not the right department
#
# @return [void]
def correct_email_if_known!
verified_email = VERIFIED_EMAILS_BY_WEBSITE.dig(website)
self.email = verified_email if verified_email.present?
end
end
31 changes: 31 additions & 0 deletions app/models/concerns/company/sanitizable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
module Company::Sanitizable
extend ActiveSupport::Concern

included do
before_save :strip_name!
before_save :domainify_website!
before_save :downcase_email!
end

# Ensures we're stripping leading and trailing whitespace from the name.
#
# @return [void]
def strip_name!
name.strip!
end

# Ensures we're only saving website domains to better enforce uniqueness.
#
# @return [void]
def domainify_website!
hostified_website = URI(website).host || website
self.website = hostified_website.gsub(/^www\./, "")
end

# Ensures we're normalizing emails before saving.
#
# @return [void]
def downcase_email!
email.downcase!
end
end
Loading

0 comments on commit 9e2b53b

Please sign in to comment.