104 lines
3.9 KiB
Python
104 lines
3.9 KiB
Python
# import webdriver
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
|
#list of Cities
|
|
Cities =["Dallas","Fort Worth","Arlington","Plano","Irving","Denton","Richardson","Grapevine"]
|
|
state="tx"
|
|
baseurl="https://www.apartments.com/" #baseURL
|
|
search ="/2-bedrooms-under-1500" #search terms
|
|
search_dict = {
|
|
"min_rent" : 0,
|
|
"max_rent" : 1500,
|
|
"min_bed" : 0,
|
|
"max_bed" : 2,
|
|
"apartments" : True,
|
|
"houses" : False,
|
|
"condos" : False,
|
|
"townhomes" : False,
|
|
"mid" : "", #move-in date
|
|
"bathroom" : 1, # 0-3
|
|
"new": True
|
|
}
|
|
|
|
def genSearch(city, state, search_dict):
|
|
search = "" #base search
|
|
# type parameter
|
|
if not (search_dict['apartments'] and search_dict['houses'] and search_dict['condos'] and search_dict['townhomes']):
|
|
if search_dict['apartments']:
|
|
search = f"{search}apartments-"
|
|
if search_dict['houses']:
|
|
search = f"{search}houses-"
|
|
if search_dict['condos']:
|
|
search = f"{search}condos-"
|
|
if search_dict['townhomes']:
|
|
search = f"{search}townhomes-"
|
|
if len(search) > 0:
|
|
search = search[:-1] + "/"
|
|
# next add city and state
|
|
search = search + city + "-" + state + "/"
|
|
|
|
# next add bedrooms
|
|
if search_dict['min_bed'] and search_dict['max_bed']:
|
|
search = f"{search}{search_dict['min_bed']}-to-{search_dict['max_bed']}-bedrooms-"
|
|
elif search_dict['min_bed']:
|
|
search = f"{search}min-{search_dict['min_bed']}-bedrooms-"
|
|
elif search_dict['max_bed']:
|
|
search = f"{search}max-{search_dict['max_bed']}-bedrooms-"
|
|
|
|
# next add bathrooms
|
|
if search_dict['bathroom']:
|
|
search = f"{search}{search_dict['bathroom']}-bathrooms-"
|
|
# next add price range
|
|
if search_dict['min_rent'] and search_dict['max_rent']:
|
|
search = f"{search}{search_dict['min_rent']}-to-{search_dict['max_rent']}-"
|
|
elif search_dict['min_rent']:
|
|
search = f"{search}over-{search_dict['min_rent']}-"
|
|
elif search_dict['max_rent']:
|
|
search = f"{search}under-{search_dict['max_rent']}-"
|
|
|
|
|
|
return f"{search[:-1]}/new/" if search_dict['new'] else search[:-1]
|
|
|
|
|
|
#Configure the WebDriver
|
|
#chrome_options = Options()
|
|
#chrome_options.add_argument("disable-extensions")
|
|
#chrome_options.add_argument("disable-gpu")
|
|
#chrome_options.add_argument("headless")
|
|
|
|
driver = webdriver.Chrome()
|
|
wait = WebDriverWait(driver, 20)
|
|
|
|
url = baseurl+'Dallas'+"-"+state+search
|
|
driver.get(url)
|
|
src = driver.page_source
|
|
soup = BeautifulSoup(src, 'html.parser')
|
|
|
|
for city in Cities:
|
|
url= genSearch(city, state, search_dict)
|
|
#url = baseurl+city+"-"+St+search
|
|
driver.get(url)
|
|
src = driver.page_source
|
|
soup = BeautifulSoup(src, 'html.parser')
|
|
Apartment = soup.find_all('span', class_='js-placardTitle title')
|
|
ApartmentAddr = soup.find_all('div', class_='property-address js-url')
|
|
ApartmentPrice = soup.find_all('p', class_='property-pricing')
|
|
ApartmentBeds = soup.find_all('p', class_='property-beds')
|
|
ApartmentInfo= soup.find_all('div', class_="property-information") #finds all div elements with class of property-information and its children
|
|
ApartmentLink = [] #used to collect all href links
|
|
for x in range(0,len(ApartmentInfo)): #go through all the div elements
|
|
ApartmentLink.append(ApartmentInfo[x].find_all('a')[0]['href']) # find all the a elements in each div and appends its href value to ApartmentLink
|
|
for apartment, address, link in zip(Apartment, ApartmentAddr, ApartmentLink):
|
|
#print(link)
|
|
Name = apartment.contents[0]
|
|
Addr = address.contents[0]
|
|
Web = link
|
|
print(Name + ", ", Addr + ", ", Web)
|
|
|