In [10]:
import seaborn as sns
import numpy as np
import pandas as pd

planets = sns.load_dataset('planets')
planets.shape

(1035, 6)

`mean()`: Определяет центральное значение набора данных.

`mad()`: Измеряет среднюю величину отклонений от центрального значения, давая представление о разбросе данных.

### Среднее (mean)

**Пример:**
```python
import pandas as pd

data = pd.Series([1, 2, 3, 4, 5])
mean_value = data.mean()
print("Среднее значение:", mean_value)
```

В данном примере среднее значение будет равно 3, так как (1+2+3+4+5)/5 = 3.

### Среднее абсолютное отклонение (MAD)

Среднее абсолютное отклонение (MAD) измеряет среднюю величину отклонений от среднего значения. В отличие от стандартного отклонения, MAD использует абсолютные значения отклонений, что делает его менее чувствительным к выбросам.

**Пример:**
```python
import pandas as pd

data = pd.Series([1, 2, 3, 4, 5])
mad_value = data.mad()
print("Среднее абсолютное отклонение:", mad_value)
```

В данном примере сначала рассчитывается среднее значение (3), затем вычисляются абсолютные отклонения от среднего: [2, 1, 0, 1, 2], и, наконец, среднее этих отклонений: (2+1+0+1+2)/5 = 1.2.


**Формула:**
   - **mean()**: $\text{mean} = \frac{\sum_{i=1}^{n} x_i}{n}$
   - **mad()**: $\text{MAD} = \frac{\sum_{i=1}^{n} |x_i - \text{mean}|}{n}$


In [11]:
planets

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [13]:
random_state = np.random.RandomState(42)
series = pd.Series(random_state.rand(5)) # iniform distributon
series

0    0.374540
1    0.950714
2    0.731994
3    0.598658
4    0.156019
dtype: float64

In [14]:
series.sum()

2.811925491708157

In [15]:
series.mean()

0.5623850983416314

In [17]:
df = pd.DataFrame({'A': random_state.rand(5),
                  'B': random_state.rand(5)})
df

Unnamed: 0,A,B
0,0.155995,0.020584
1,0.058084,0.96991
2,0.866176,0.832443
3,0.601115,0.212339
4,0.708073,0.181825


In [20]:
(df.mean()['A'] + df.mean()['B']) / 2

0.46065429329621876

In [21]:
df.mean()

A    0.477888
B    0.443420
dtype: float64

In [22]:
df.mean(axis=1) # by rows, axis='columns'

0    0.088290
1    0.513997
2    0.849309
3    0.406727
4    0.444949
dtype: float64

In [23]:
planets.describe() # with not applicable 

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [25]:
planets.dropna().describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,498.0,498.0,498.0,498.0,498.0
mean,1.73494,835.778671,2.50932,52.068213,2007.37751
std,1.17572,1469.128259,3.636274,46.596041,4.167284
min,1.0,1.3283,0.0036,1.35,1989.0
25%,1.0,38.27225,0.2125,24.4975,2005.0
50%,1.0,357.0,1.245,39.94,2009.0
75%,2.0,999.6,2.8675,59.3325,2011.0
max,6.0,17337.5,25.0,354.0,2014.0


In [26]:
planets.dropna().count() # общее кол во элементов

method            498
number            498
orbital_period    498
mass              498
distance          498
year              498
dtype: int64

In [29]:
planets.dropna().mean()

  planets.dropna().mean()


number               1.734940
orbital_period     835.778671
mass                 2.509320
distance            52.068213
year              2007.377510
dtype: float64

In [30]:
planets.dropna().median()

  planets.dropna().median()


number               1.000
orbital_period     357.000
mass                 1.245
distance            39.940
year              2009.000
dtype: float64

In [31]:
planets.dropna().min()

method            Radial Velocity
number                          1
orbital_period             1.3283
mass                       0.0036
distance                     1.35
year                         1989
dtype: object

In [32]:
planets.dropna().max()

method            Transit
number                  6
orbital_period    17337.5
mass                 25.0
distance            354.0
year                 2014
dtype: object

In [33]:
planets.dropna().std() # stabdard deviation

  planets.dropna().std()


number               1.175720
orbital_period    1469.128259
mass                 3.636274
distance            46.596041
year                 4.167284
dtype: float64

In [34]:
planets.dropna().var() # variance

  planets.dropna().var() # variance


number            1.382318e+00
orbital_period    2.158338e+06
mass              1.322249e+01
distance          2.171191e+03
year              1.736625e+01
dtype: float64

In [35]:
planets.dropna().mad() # среднее абсолютное отклонение

number              0.867760
orbital_period    863.505805
mass                2.439216
distance           30.492406
year                3.439767
dtype: float64

In [36]:
planets.dropna().prod()

  return umr_prod(a, axis, dtype, out, keepdims, initial, where)
  planets.dropna().prod()


number            0.000000e+00
orbital_period             inf
mass              1.521982e-68
distance                   inf
year              0.000000e+00
dtype: float64

In [37]:
planets.dropna().sum()

method            Radial VelocityRadial VelocityRadial VelocityR...
number                                                          864
orbital_period                                        416217.777994
mass                                                     1249.64138
distance                                                   25929.97
year                                                         999674
dtype: object

In [39]:
planets.groupby('method')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001CA0ABC2C40>

In [40]:
planets.groupby('method')['orbital_period']

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000001CA0AA83550>

In [43]:
planets.groupby('method')['orbital_period'].median()

method
Astrometry                         631.180000
Eclipse Timing Variations         4343.500000
Imaging                          27500.000000
Microlensing                      3300.000000
Orbital Brightness Modulation        0.342887
Pulsar Timing                       66.541900
Pulsation Timing Variations       1170.000000
Radial Velocity                    360.200000
Transit                              5.714932
Transit Timing Variations           57.011000
Name: orbital_period, dtype: float64

In [48]:
for (method, group) in planets.groupby('method'):
    print("{0} group={1}\n".format(method, group))

Astrometry group=         method  number  orbital_period  mass  distance  year
113  Astrometry       1          246.36   NaN     20.77  2013
537  Astrometry       1         1016.00   NaN     14.98  2010

Eclipse Timing Variations group=                       method  number  orbital_period  mass  distance  year
32  Eclipse Timing Variations       1        10220.00  6.05       NaN  2009
37  Eclipse Timing Variations       2         5767.00   NaN    130.72  2008
38  Eclipse Timing Variations       2         3321.00   NaN    130.72  2008
39  Eclipse Timing Variations       2         5573.55   NaN    500.00  2010
40  Eclipse Timing Variations       2         2883.50   NaN    500.00  2010
41  Eclipse Timing Variations       1         2900.00   NaN       NaN  2011
42  Eclipse Timing Variations       1         4343.50  4.20       NaN  2012
43  Eclipse Timing Variations       2         5840.00   NaN       NaN  2011
44  Eclipse Timing Variations       2         1916.25   NaN       NaN  2011

Ima

In [51]:
for (method, group) in planets.groupby('method'):
    print("{0:30s} shape={1}".format(method, group.shape))

Astrometry                     shape=(2, 6)
Eclipse Timing Variations      shape=(9, 6)
Imaging                        shape=(38, 6)
Microlensing                   shape=(23, 6)
Orbital Brightness Modulation  shape=(3, 6)
Pulsar Timing                  shape=(5, 6)
Pulsation Timing Variations    shape=(1, 6)
Radial Velocity                shape=(553, 6)
Transit                        shape=(397, 6)
Transit Timing Variations      shape=(4, 6)


`apply()` faster!

In [52]:
planets.groupby('method')['year'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,2011.5,2.12132,2010.0,2010.75,2011.5,2012.25,2013.0
Eclipse Timing Variations,9.0,2010.0,1.414214,2008.0,2009.0,2010.0,2011.0,2012.0
Imaging,38.0,2009.131579,2.781901,2004.0,2008.0,2009.0,2011.0,2013.0
Microlensing,23.0,2009.782609,2.859697,2004.0,2008.0,2010.0,2012.0,2013.0
Orbital Brightness Modulation,3.0,2011.666667,1.154701,2011.0,2011.0,2011.0,2012.0,2013.0
Pulsar Timing,5.0,1998.4,8.38451,1992.0,1992.0,1994.0,2003.0,2011.0
Pulsation Timing Variations,1.0,2007.0,,2007.0,2007.0,2007.0,2007.0,2007.0
Radial Velocity,553.0,2007.518987,4.249052,1989.0,2005.0,2009.0,2011.0,2014.0
Transit,397.0,2011.236776,2.077867,2002.0,2010.0,2012.0,2013.0,2014.0
Transit Timing Variations,4.0,2012.5,1.290994,2011.0,2011.75,2012.5,2013.25,2014.0


Большинство планет было открыто методом измерения лучевой скорости и транзитным методом. Транзитный метод стал распростроненным благодаря новым точным телескопам только в последние десятилетия . Новейшие методы они 2011: метод вариации времени транзитов и метод модлуяции орбитальной яркости

In [62]:
decade = 10 * (planets['year'] // 10)
decade = decade.astype(str) + 's'
decade

0       2000s
1       2000s
2       2010s
3       2000s
4       2000s
        ...  
1030    2000s
1031    2000s
1032    2000s
1033    2000s
1034    2000s
Name: year, Length: 1035, dtype: object

In [63]:
decade.name

'year'

In [69]:
decade.name = 'decade'
planets.groupby(['method', decade])['number'].sum().unstack().fillna(0)

decade,1980s,1990s,2000s,2010s
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Astrometry,0.0,0.0,0.0,2.0
Eclipse Timing Variations,0.0,0.0,5.0,10.0
Imaging,0.0,0.0,29.0,21.0
Microlensing,0.0,0.0,12.0,15.0
Orbital Brightness Modulation,0.0,0.0,0.0,5.0
Pulsar Timing,0.0,9.0,1.0,1.0
Pulsation Timing Variations,0.0,0.0,1.0,0.0
Radial Velocity,1.0,52.0,475.0,424.0
Transit,0.0,0.0,64.0,712.0
Transit Timing Variations,0.0,0.0,0.0,9.0
