In [2]:
import pandas as pd
import numpy as np

# Реальные средние температуры (примерные данные) для городов по сезонам
seasonal_temperatures = {
    "New York": {"winter": 0, "spring": 10, "summer": 25, "autumn": 15},
    "London": {"winter": 5, "spring": 11, "summer": 18, "autumn": 12},
    "Paris": {"winter": 4, "spring": 12, "summer": 20, "autumn": 13},
    "Tokyo": {"winter": 6, "spring": 15, "summer": 27, "autumn": 18},
    "Moscow": {"winter": -10, "spring": 5, "summer": 18, "autumn": 8},
    "Sydney": {"winter": 12, "spring": 18, "summer": 25, "autumn": 20},
    "Berlin": {"winter": 0, "spring": 10, "summer": 20, "autumn": 11},
    "Beijing": {"winter": -2, "spring": 13, "summer": 27, "autumn": 16},
    "Rio de Janeiro": {"winter": 20, "spring": 25, "summer": 30, "autumn": 25},
    "Dubai": {"winter": 20, "spring": 30, "summer": 40, "autumn": 30},
    "Los Angeles": {"winter": 15, "spring": 18, "summer": 25, "autumn": 20},
    "Singapore": {"winter": 27, "spring": 28, "summer": 28, "autumn": 27},
    "Mumbai": {"winter": 25, "spring": 30, "summer": 35, "autumn": 30},
    "Cairo": {"winter": 15, "spring": 25, "summer": 35, "autumn": 25},
    "Mexico City": {"winter": 12, "spring": 18, "summer": 20, "autumn": 15},
}

# Сопоставление месяцев с сезонами
month_to_season = {12: "winter", 1: "winter", 2: "winter",
                   3: "spring", 4: "spring", 5: "spring",
                   6: "summer", 7: "summer", 8: "summer",
                   9: "autumn", 10: "autumn", 11: "autumn"}

# Генерация данных о температуре
def generate_realistic_temperature_data(cities, num_years=10):
    dates = pd.date_range(start="2010-01-01", periods=365 * num_years, freq="D")
    data = []

    for city in cities:
        for date in dates:
            season = month_to_season[date.month]
            mean_temp = seasonal_temperatures[city][season]
            # Добавляем случайное отклонение
            temperature = np.random.normal(loc=mean_temp, scale=5)
            data.append({"city": city, "timestamp": date, "temperature": temperature})

    df = pd.DataFrame(data)
    df['season'] = df['timestamp'].dt.month.map(lambda x: month_to_season[x])
    return df

# Генерация данных
data = generate_realistic_temperature_data(list(seasonal_temperatures.keys()))
data.to_csv('temperature_data.csv', index=False)

## 1.Анализ исторических данных

In [178]:
import pandas as pd
import numpy as np
import time

In [216]:
df = pd.read_csv('temperature_data.csv')

In [217]:
df.describe()

Unnamed: 0,temperature
count,54750.0
mean,18.241583
std,11.018598
min,-25.37648
25%,11.185024
50%,18.751896
75%,25.996977
max,57.433844


In [218]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54750 entries, 0 to 54749
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   city         54750 non-null  object 
 1   timestamp    54750 non-null  object 
 2   temperature  54750 non-null  float64
 3   season       54750 non-null  object 
dtypes: float64(1), object(3)
memory usage: 1.7+ MB


In [219]:
df.head()

Unnamed: 0,city,timestamp,temperature,season
0,New York,2010-01-01,4.784128,winter
1,New York,2010-01-02,6.923097,winter
2,New York,2010-01-03,1.719448,winter
3,New York,2010-01-04,7.158969,winter
4,New York,2010-01-05,-2.162887,winter


In [220]:
def anomaly(temperature, mean_t, std_t):
    left_boundary = mean_t - 2*std_t
    rigrt_boundary = mean_t + 2*std_t
    if temperature > rigrt_boundary or temperature < left_boundary:
        return 1
    else:
        return 0
        
def processing_data(df):
    df["rolling_avg"] = df["temperature"].rolling(window=30).mean()
    df['rolling_std'] = df['rolling_avg'].std()
    df['mean_temp'] = df.groupby(['city','season'])['temperature'].transform('mean')
    df['std_temp'] = df.groupby(['city','season'])['temperature'].transform('std')
    df['anomaly'] = df.apply(lambda x: anomaly(x.temperature, x.mean_temp, x.std_temp), axis=1)

    df_stats = pd.DataFrame()
    df_stats = df.groupby(by=['city', 'season']).agg({'temperature':['mean','std', 'max', 'min']})
    return df_stats, df

In [221]:
start_time = time.time()
stats, df = processing_data(df)
print(stats)
processed_time = time.time() - start_time
print(f"Время последовательного выполнения: {processed_time:.4f} секунд")

                      temperature                                
                             mean       std        max        min
city           season                                            
Beijing        autumn   16.076133  4.946264  31.057521   0.403563
               spring   13.026696  5.063593  31.931823  -3.521780
               summer   26.987208  5.147958  45.844978  11.545709
               winter   -2.368063  4.798118  13.991464 -16.365497
Berlin         autumn   10.776010  4.977665  26.051909  -9.153468
               spring   10.080384  4.908314  27.577566  -7.207666
               summer   19.843060  4.995084  37.322776   5.311913
               winter    0.072255  4.959507  15.572642 -16.035860
Cairo          autumn   24.794886  5.060714  40.350314  10.860416
               spring   24.888802  4.772143  39.155375   6.220631
               summer   34.939769  5.055321  51.180871  18.369410
               winter   14.904753  4.915651  28.921676   1.779932
Dubai     

In [222]:
from multiprocessing import Pool

num_processes = 4
parts = np.array_split(df, num_processes)

with Pool(num_processes) as pool:
    start_time = time.time()
    results = pool.map(processing_data, parts)
    processed_time = time.time() - start_time

print(f"Время параллельного выполнения: {processed_time:.4f} секунд")

  return bound(*args, **kwds)


Время параллельного выполнения: 0.2561 секунд


В случае функции processing_data параллельное выполнение является более эффективным, чем последовательное

In [223]:
df

Unnamed: 0,city,timestamp,temperature,season,rolling_avg,rolling_std,mean_temp,std_temp,anomaly
0,New York,2010-01-01,4.784128,winter,,9.61764,-0.087885,5.040229,0
1,New York,2010-01-02,6.923097,winter,,9.61764,-0.087885,5.040229,0
2,New York,2010-01-03,1.719448,winter,,9.61764,-0.087885,5.040229,0
3,New York,2010-01-04,7.158969,winter,,9.61764,-0.087885,5.040229,0
4,New York,2010-01-05,-2.162887,winter,,9.61764,-0.087885,5.040229,0
...,...,...,...,...,...,...,...,...,...
54745,Mexico City,2019-12-25,15.803617,winter,13.040718,9.61764,11.745275,5.348906,0
54746,Mexico City,2019-12-26,2.551694,winter,12.562215,9.61764,11.745275,5.348906,0
54747,Mexico City,2019-12-27,12.014722,winter,12.173232,9.61764,11.745275,5.348906,0
54748,Mexico City,2019-12-28,3.001218,winter,11.786468,9.61764,11.745275,5.348906,0


In [224]:
from sklearn.linear_model import LinearRegression
import datetime as dt

def calculate_trend(df):
    df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda x: x.toordinal())
    X = df['timestamp'].values.reshape(-1, 1)
    y = df['temperature'].values
    model = LinearRegression()
    model.fit(X, y)
    trend = model.coef_[0]
    
    return trend

In [225]:
selected_city = 'New York'
trend = calculate_trend(df[df['city'] == selected_city])
if trend > 0:
    print(f'Позитивный тренд для города {selected_city}')
elif trend < 0:
    print(f'Негативный тренд для города {selected_city}')

Позитивный тренд для города New York


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['timestamp'] = pd.to_datetime(df['timestamp']).apply(lambda x: x.toordinal())


## 2. Мониторинг текущей температуры

In [226]:
import requests

API_KEY = ""
CITY = "Moscow"

url = f"http://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print(f"Погода в {CITY}: {data['main']['temp']}°C")
else:
    print("Ошибка при запросе данных")

Ошибка при запросе данных


In [227]:
city = 'Moscow'
season = 'winter'

temp_city = data['main']['temp']

In [228]:
df_city = df[(df['city'] == city) & (df['season'] == season)]
mean_city = df_city['mean_temp'].mean()
std_city = df_city['std_temp'].mean()
result = anomaly(temp_city, mean_city, std_city)
if result == 0:
    print('Температура не аномальная')
else:
    print('Температура аномальная')

Температура аномальная


In [229]:
df_city

Unnamed: 0,city,timestamp,temperature,season,rolling_avg,rolling_std,mean_temp,std_temp,anomaly
14600,Moscow,2010-01-01,-9.050402,winter,4.815888,9.61764,-10.00844,4.696065,0
14601,Moscow,2010-01-02,-2.735505,winter,4.395418,9.61764,-10.00844,4.696065,0
14602,Moscow,2010-01-03,-13.512324,winter,4.098695,9.61764,-10.00844,4.696065,0
14603,Moscow,2010-01-04,-12.497852,winter,3.414472,9.61764,-10.00844,4.696065,0
14604,Moscow,2010-01-05,-7.995529,winter,3.035208,9.61764,-10.00844,4.696065,0
...,...,...,...,...,...,...,...,...,...
18245,Moscow,2019-12-25,-5.042670,winter,-7.917424,9.61764,-10.00844,4.696065,0
18246,Moscow,2019-12-26,-14.339857,winter,-8.730191,9.61764,-10.00844,4.696065,0
18247,Moscow,2019-12-27,-18.353209,winter,-9.249793,9.61764,-10.00844,4.696065,0
18248,Moscow,2019-12-28,-7.379280,winter,-9.824772,9.61764,-10.00844,4.696065,0


In [204]:
API_KEY = ""
CITY = "Berlin"

url = f"http://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print(f"Погода в {CITY}: {data['main']['temp']}°C")
else:
    print("Ошибка при запросе данных")

city = 'Berlin'
season = 'winter'
temp_city = data['main']['temp']

df_city = df[(df['city'] == city) & (df['season'] == 'winter')]
mean_city = df_city['mean_temp'].mean()
std_city = df_city['std_temp'].mean()
result = anomaly(temp_city, mean_city, std_city)
if result == 0:
    print('Температура не аномальная')
else:
    print('Температура аномальная')

Погода в Berlin: 4.08°C
Температура не аномальная


In [205]:
API_KEY = ""
CITY = "Cairo"

url = f"http://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print(f"Погода в {CITY}: {data['main']['temp']}°C")
else:
    print("Ошибка при запросе данных")

city = CITY
season = 'winter'
temp_city = data['main']['temp']

df_city = df[(df['city'] == city) & (df['season'] == 'winter')]
mean_city = df_city['mean_temp'].mean()
std_city = df_city['std_temp'].mean()
result = anomaly(temp_city, mean_city, std_city)
if result == 0:
    print('Температура не аномальная')
else:
    print('Температура аномальная')

Погода в Cairo: 17.42°C
Температура не аномальная


In [206]:
API_KEY = ""
CITY = "Dubai"

url = f"http://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print(f"Погода в {CITY}: {data['main']['temp']}°C")
else:
    print("Ошибка при запросе данных")

city = CITY
season = 'winter'
temp_city = data['main']['temp']

df_city = df[(df['city'] == city) & (df['season'] == 'winter')]
mean_city = df_city['mean_temp'].mean()
std_city = df_city['std_temp'].mean()
result = anomaly(temp_city, mean_city, std_city)
if result == 0:
    print('Температура не аномальная')
else:
    print('Температура аномальная')

Погода в Dubai: 20.96°C
Температура не аномальная


## Синхронное выполнение функции получения текущей температуры

In [230]:
import time
import requests

def current_temp(CITY):
    API_KEY = ""
    url = f"http://api.openweathermap.org/data/2.5/weather?q={CITY}&appid={API_KEY}&units=metric"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        print(f"Погода в {CITY}: {data['main']['temp']}°C")
    else:
        print("Ошибка при запросе данных")


In [231]:
start_time = time.time()
current_temp('Moscow')
elapsed_time = time.time() - start_time
print(f"Время синхронного выполнения: {elapsed_time:.4f} секунд")

Погода в Moscow: -2.84°C
Время синхронного выполнения: 0.1795 секунд


## Асинхронное выполнение получения текущей температуры

In [214]:
import asyncio
import aiohttp

import nest_asyncio
nest_asyncio.apply()

async def fetch(session, city):
    API_KEY = ""
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={API_KEY}&units=metric"
    async with session.get(url) as response:
        if response.status == 200:
            data = await response.json()
            return f"Погода в {city}: {data['main']['temp']}°C"
        else:
            return "Ошибка при запросе данных"

async def current_temp_async(cities):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for city in cities:
            task = asyncio.create_task(fetch(session, city))
            tasks.append(task)
        
        results = await asyncio.gather(*tasks)
        return results

loop = asyncio.get_event_loop()

start_time = time.time()
loop.run_until_complete(current_temp_async('Moscow'))
elapsed_time = time.time() - start_time
print(f"Время асинхронного выполнения: {elapsed_time:.4f} секунд")

Время асинхронного выполнения: 0.2676 секунд


Для поиска температуры для одного города лучше использовать синхронные методы, это более выгодно по времени в данном случае.