From 602196be137a46dabe876850345125f9c850876b Mon Sep 17 00:00:00 2001 From: Abdussamet Kocak Date: Mon, 20 Jan 2020 07:36:40 +0300 Subject: [PATCH] Add diyanet.gov.tr scraper --- .gitignore | 2 ++ app/__init__.py | 0 app/app.py | 0 core/__init__.py | 0 core/diyanet.py | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 43 insertions(+) create mode 100644 .gitignore create mode 100644 app/__init__.py create mode 100644 app/app.py create mode 100644 core/__init__.py create mode 100644 core/diyanet.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82195aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +venv +.idea \ No newline at end of file diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/app.py b/app/app.py new file mode 100644 index 0000000..e69de29 diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/diyanet.py b/core/diyanet.py new file mode 100644 index 0000000..14d08c2 --- /dev/null +++ b/core/diyanet.py @@ -0,0 +1,41 @@ +from datetime import datetime +from typing import List + +import httpx +from bs4 import BeautifulSoup +from pydantic import BaseModel + +_http = httpx.AsyncClient() + + +class PrayerTimes(BaseModel): + date: datetime + fajr: str + sun: str + dhuhr: str + asr: str + maghrib: str + isha: str + + +async def parse_prayer_times(url: str) -> List[PrayerTimes]: + res = await _http.get(url) + soup = BeautifulSoup(res.text, 'html.parser') + + items = [] + for row in soup.select('#tab-1 .vakit-table tbody tr'): + cell_texts = (c.text.strip() for c in row.select('td')) + headers = PrayerTimes.__fields__.keys() + parsed = dict(zip(headers, cell_texts)) + parsed['date'] = datetime.strptime(parsed['date'], '%d.%m.%Y') + items.append(PrayerTimes(**parsed)) + return items + + +if __name__ == '__main__': + import asyncio + import pprint + + loop = asyncio.get_event_loop() + results = loop.run_until_complete(parse_prayer_times('https://namazvakitleri.diyanet.gov.tr/en-US')) + pprint.pprint(results)