dawn_top30_headlines.csv — 30 rows × 4 cols
psx_indices.csv + psx_mainboard.csv
qs_top50.csv + qs_by_country_top15.csv + qs_by_region.csv
daraz_iphone15_listings.csv — 20 rows × 6 cols
goodreads_books.csv + goodreads_books_partial.csv
# task1_dawn.py — Dawn top 30 headlines
import requests, csv
from bs4 import BeautifulSoup
url = "https://www.dawn.com"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
headlines = soup.select("article.story h2 a")[:30]
with open("dawn_top30_headlines.csv", "w", newline="") as f:
w = csv.writer(f)
w.writerow(["Headline", "URL", "Category"])
for h in headlines:
w.writerow([h.text.strip(), h["href"]])
# task2_PSX.py — PSX indices & mainboard
import requests, csv, json
from bs4 import BeautifulSoup
url = "https://www.psx.com.pk/market-summary/"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
rows = soup.select("table.indices tr")
data = []
for row in rows[1:]:
cols = [td.text.strip() for td in row.select("td")]
if cols: data.append(cols)
# task3_QSWR.py — QS World Rankings top 50
from selenium import webdriver
from selenium.webdriver.common.by import By
import csv, time
driver = webdriver.Chrome()
driver.get("https://www.topuniversities.com/world-university-rankings")
time.sleep(3)
rows = driver.find_elements(By.CSS_SELECTOR, "div.uni-row")
# task4_Daraz.py — iPhone 15 listings scraper
from selenium import webdriver
from selenium.webdriver.common.by import By
import csv, time
driver = webdriver.Chrome()
driver.get("https://www.daraz.pk/iphone-15/")
time.sleep(3)
products = driver.find_elements(By.CSS_SELECTOR, "div[data-qa-locator]")
with open("daraz_iphone15_listings.csv", "w") as f:
w = csv.writer(f)
w.writerow(["Title","Price","Seller","Ratings","DeliveryOptions","ProductURL"])
# task5_GoodReads.py — top books scraper
import requests, csv
from bs4 import BeautifulSoup
url = "https://www.goodreads.com/list/show/1.Best_Books_Ever"
headers = {"User-Agent": "Mozilla/5.0"}
soup = BeautifulSoup(requests.get(url, headers=headers).text, "html.parser")
books = soup.select("tr[itemtype]")
Web-Scraping-Python-Project/
├── task1_dawn/
│ ├── task1_dawn.py
│ ├── export_task1_reports.py
│ ├── dawn_top30_headlines.csv
│ ├── dawn_top30_headlines.json
│ └── reports/ ← 2 PDF + 2 DOCX exports
├── task2_psx/
│ ├── task2_PSX.py
│ ├── export_task2_reports.py
│ ├── psx_indices.csv + psx_mainboard.csv
│ └── reports/ ← 4 PDF + 4 DOCX exports
├── task3_qswr/
│ ├── task3_QSWR.py
│ ├── export_task3_reports.py
│ ├── qs_top50.csv + qs_by_country_top15.csv + qs_by_region.csv
│ └── reports/ ← 5 PDF exports
├── task4_daraz/
│ ├── task4_Daraz.py
│ ├── export_task4_reports.py
│ ├── daraz_iphone15_listings.csv
│ └── reports/ ← 3 PDF exports
├── task5_goodreads/
│ ├── task5_GoodReads.py
│ ├── export_task5_reports.py
│ ├── goodreads_books.csv + goodreads_books_partial.csv
│ └── reports/ ← 1 PDF export
├── main.py
├── requirements.txt
└── Intro to the Project