Skip to content

Commit

Permalink
uploaded files
Browse files Browse the repository at this point in the history
  • Loading branch information
itsekiri-tatashe committed Sep 7, 2022
0 parents commit fd3eced
Show file tree
Hide file tree
Showing 78 changed files with 366,695 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.vscode/
__pycache__/
263 changes: 263 additions & 0 deletions 1 Web Scraper.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "WOsmoXE7p9nm",
"metadata": {
"id": "WOsmoXE7p9nm"
},
"source": [
"# Web Scraper file\n",
"\n",
"The data would be extracted from The Real Estate website: PropertyPro.NG\n",
"\n",
"I have researched the website and see some features that most houses/ listings have:\n",
"\n",
"We would first get the data for these locations - Ikeja, Ikoyi, Lekki, Victoria Island \n",
"\n",
"After that for each city we would get the following information\n",
"* Title of the home/listing - which we can get info like\n",
" * If the house is a duplex, apartment, land, flat\n",
"___\n",
"* Location of home : we can get info like\n",
" * city home is located, neighborhood/area it is located\n",
"___\n",
"* Price of Home : It is a string and would be converted into integers\n",
"___\n",
"* If home is Newly Built (True/ False)\n",
"* If home is Serviced (True/ False)\n",
"* If home is Furnished (True/ False)\n",
"___\n",
"* No of Bedrooms (String convert to integer)\n",
"___\n",
"* No of Bathrooms (String convert to integer)\n",
"___\n",
"* No of Toilets (String convert to integer)\n",
"___\n",
"* No of Bathrooms (String convert to integer)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "lkuqHn6kp9nv",
"metadata": {
"id": "lkuqHn6kp9nv"
},
"outputs": [],
"source": [
"# !pip install bs4 requests --quiet"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0Qy4A-1Yp9nx",
"metadata": {
"id": "0Qy4A-1Yp9nx"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\EMEH\\Anaconda3\\lib\\site-packages\\requests\\__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.8) or chardet (3.0.4) doesn't match a supported version!\n",
" RequestsDependencyWarning)\n"
]
}
],
"source": [
"# Import Seperate Libraries\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "zZkBJYC7p9ny",
"metadata": {
"id": "zZkBJYC7p9ny"
},
"outputs": [],
"source": [
"from requests.adapters import HTTPAdapter\n",
"from requests.packages.urllib3.util.retry import Retry"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "QpPNtiFMp9nz",
"metadata": {
"id": "QpPNtiFMp9nz"
},
"outputs": [],
"source": [
"# This would make us scrape in sessions so that if we have network issues\n",
"# we wont need to start scraping again we would just revert to the previous\n",
"# session tracked\n",
"session = requests.Session()\n",
"retry = Retry(connect=3, backoff_factor=0.5)\n",
"adapter = HTTPAdapter(max_retries=retry)\n",
"session.mount('http://', adapter)\n",
"session.mount('https://', adapter)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "wPSVrDGvp9nz",
"metadata": {
"id": "wPSVrDGvp9nz"
},
"outputs": [],
"source": [
"# Save in this file\n",
"csv_file = open(\"ikoyi.csv\", \"w\")"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "1na0vgB0p9n0",
"metadata": {
"id": "1na0vgB0p9n0"
},
"outputs": [],
"source": [
"csv_writer = csv.writer(csv_file)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "6l9nahUbp9n1",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6l9nahUbp9n1",
"outputId": "f174246d-9639-4be6-e70e-a4622271f444"
},
"outputs": [
{
"data": {
"text/plain": [
"89"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv_writer.writerow([\"Title\", \"Location\", \"Price\",\"Serviced\", \"Newly Built\", \"Furnished\", \"Bedrooms\", \"Bathrooms\", \"Toilets\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "Vig6nktLp9n3",
"metadata": {
"id": "Vig6nktLp9n3"
},
"outputs": [],
"source": [
"# Get Location data\n",
"for page in range(0, 484):\n",
" # Replace the ikoyi with the city you want e.g. surulere, ajah, ikeja, lekki\n",
" link = session.get(f\"https://www.propertypro.ng/property-for-sale/in/lagos/ikoyi?page={page}\").text \n",
" soup = BeautifulSoup(link, \"lxml\")\n",
" \n",
" for listing in soup.find_all(\"div\", class_=\"single-room-sale listings-property\"):\n",
" title = listing.find(\"h3\", class_=\"listings-property-title2\").text\n",
" location = listing.find_all(\"h4\")[1].text\n",
"\n",
" price_info = listing.find(\"h3\", class_=\"listings-price\")\n",
" price = price_info.find_all(\"span\")[1].text\n",
"\n",
" if listing.find(\"a\", href=\"/property-for-sale/is-new\"):\n",
" is_new = 1\n",
" else:\n",
" is_new = 0\n",
" \n",
" if listing.find(\"a\", href=\"/property-for-sale/is-serviced\"):\n",
" serviced = 1\n",
" else:\n",
" serviced = 0\n",
" \n",
" if listing.find(\"a\", href=\"/property-for-sale/is-furnished\"):\n",
" furnished = 1\n",
" else:\n",
" furnished = 0\n",
" \n",
" more_info = listing.find(\"div\", class_=\"fur-areea\")\n",
" bed = more_info.find_all(\"span\")[0].text\n",
" bathroom = more_info.find_all(\"span\")[1].text\n",
" toilet = more_info.find_all(\"span\")[2].text\n",
"\n",
" csv_writer.writerow([title, location, price, serviced, is_new, furnished, bed, bathroom, toilet])\n",
"print(\"Done\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "HdZmwO_dp9n5",
"metadata": {
"id": "HdZmwO_dp9n5"
},
"outputs": [],
"source": [
"csv_file.close()"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "1 House Web Scraper.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.9.12 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"vscode": {
"interpreter": {
"hash": "397fa5c989aefacd265b089cd9fa90a007575229207dba4eead3a1aa550249df"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit fd3eced

Please sign in to comment.