In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

data = []

for year in range(2020, 2024):  # January 2020 to December 2020
    for month in range(1, 13):  # January to February
        for day in range(1, 32):
            url = f"https://altin.in/arsiv/{year}/{month:02d}/{day:02d}"
            response = requests.get(url)

            if response.status_code == 200:
                soup = BeautifulSoup(response.text, 'html.parser')

                alis_fiyati = soup.find('li', title='Gram Altın - Alış').text.strip()
                satis_fiyati = soup.find('li', title='Gram Altın - Satış').text.strip()

                # Extracting the date from the HTML
                tarih_tag = soup.find('li', class_='lrow')
                tarih_match = re.search(r'Tarih: <b>(.*?)</b>', str(tarih_tag))
                tarih = tarih_match.group(1) if tarih_match else None

                data.append({
                    'TarihSource': tarih,
                    'Tarih': f"{year}-{month:02d}-{day:02d}",
                    'Gram Altın Alış Fiyatı': alis_fiyati,
                    'Gram Altın Satış Fiyatı': satis_fiyati,
                    'Url': url
                })
            else:
                print(f"Hata! HTTP Hatası: {response.status_code} - Tarih: {year}-{month:02d}-{day:02d}")

df = pd.DataFrame(data)
df["TarihSource"] = df["TarihSource"].str.split(",", expand=True)[0]
df["TarihSource"] = df["TarihSource"].apply(lambda x: ' '.join(['0' + part if len(part) == 1 else part for part in x.split(" ", 2)]))
month_mapping = {
    'Ocak': '01',
    'Şubat': '02',
    'Mart': '03',
    'Nisan': '04',
    'Mayıs': '05',
    'Haziran': '06',
    'Temmuz': '07',
    'Ağustos': '08',
    'Eylül': '09',
    'Ekim': '10',
    'Kasım': '11',
    'Aralık': '12'
}

df["TarihSource"] = df["TarihSource"].replace(month_mapping, regex=True)
df["TarihSource"] = df["TarihSource"].str.replace(" ", "-")
df["TarihSource"] = pd.to_datetime(df["TarihSource"], format='%d-%m-%Y')
df["Tarih"] = pd.to_datetime(df["Tarih"], errors='coerce')
problematic_rows = df[df["Tarih"].isnull()]
df.drop(problematic_rows.index, inplace=True)

In [3]:
df.to_csv("altin.csv",index=False)

In [4]:
df

Unnamed: 0,TarihSource,Tarih,Gram Altın Alış Fiyatı,Gram Altın Satış Fiyatı,Url
0,2020-01-01,2020-01-01,287.8730,290.3410,https://altin.in/arsiv/2020/01/01
1,2020-01-02,2020-01-02,287.6260,290.0910,https://altin.in/arsiv/2020/01/02
2,2020-01-03,2020-01-03,290.1970,292.6850,https://altin.in/arsiv/2020/01/03
3,2020-01-04,2020-01-04,294.9960,297.5250,https://altin.in/arsiv/2020/01/04
4,2020-01-05,2020-01-05,294.9960,297.5250,https://altin.in/arsiv/2020/01/05
...,...,...,...,...,...
1483,2023-12-27,2023-12-27,1918.2470,1934.6920,https://altin.in/arsiv/2023/12/27
1484,2023-12-28,2023-12-28,1947.3040,1963.9980,https://altin.in/arsiv/2023/12/28
1485,2023-12-29,2023-12-29,1939.8500,1956.4810,https://altin.in/arsiv/2023/12/29
1486,2023-12-30,2023-12-30,1937.9070,1954.5210,https://altin.in/arsiv/2023/12/30
