## Инпут

In [4]:
from utilities import names_dict
x = names_dict("resources\\names.xlsx")

print(x.get_names("Манны"))
print(x.get_names("Ахматова"))

[]
['Ахматова Екатерина Вадимовна', 'Ахматова Александровна Екатерина']


In [16]:
from utilities import names_dict

x = names_dict("resources\\names.xlsx")

print(x.get_names("Манны"))
print(x.get_names("Ахматова"))
print(x.get_names("Иван"))

[]
['Ахматова Екатерина Вадимовна', 'Ахматова Александровна Екатерина']
['Инокеньтев Иван', 'Губинич Красавцев Иван', 'Губин Иван']


In [27]:
from natasha import (Segmenter, MorphVocab, NewsEmbedding, NewsMorphTagger, NewsNERTagger, PER, NamesExtractor, Doc)

def split_name(name : str) -> dict:
    segmenter = Segmenter()
    emb = NewsEmbedding()
    morph_tagger = NewsMorphTagger(emb)
    ner_tagger = NewsNERTagger(emb)
    morph_vocab = MorphVocab()
    names_extractor = NamesExtractor(MorphVocab())
    doc = Doc(name)
    doc.segment(segmenter)
    doc.tag_morph(morph_tagger)
    for token in doc.tokens:
        token.lemmatize(morph_vocab)
    doc.tag_ner(ner_tagger)
    for span in doc.spans:
        span.normalize(morph_vocab)
    for span in doc.spans:
        if span.type == PER:
            span.extract_fact(names_extractor)
    return {_.normal: _.fact.as_dict for _ in doc.spans if _.type == PER}

print(split_name("Хармс АВ"))
print(split_name("Хармс А. В."))
print(split_name("Николай Андреевич Римский-Корсаков"))
print(split_name("Красавцев Иван Губинич"))
print(split_name("Орлов Иван Губинич"))


{}
{'Хармс А. В.': {'first': 'А', 'last': 'Хармс', 'middle': 'В'}}
{'Николай Андреевич Римский-Корсаков': {'first': 'Николай', 'last': 'Римский', 'middle': 'Андреевич'}}
{'Иван Губинич': {'first': 'Иван', 'last': 'Губинич'}}
{'Орлов Иван Губинич': {'first': 'Иван', 'last': 'Орлов'}}


In [None]:
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import pandas as pd

x = np.arange(0, 5, 0.1)
def f(x):
    return x**2

px.scatter(x=x, y=f(x)).show()

fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=f(x),  name='f(x)=x<sup>2</sup>'))
fig.add_trace(go.Scatter(x=x, y=x, name='$$g(x)=x$$'))
fig.update_layout(legend_orientation="h",
                  legend=dict(x=.5, xanchor="center"),
                  title="Plot Title",
                  xaxis_title="x Axis Title",
                  yaxis_title="y Axis Title",
                  margin=dict(l=0, r=0, t=30, b=0))
fig.show()

In [None]:
from pymystem3 import Mystem

text = 'Николай Андреевич Римский-Корсаков'

m = Mystem()

analyze = m.analyze(text)

first_name = None
second_name = None
middle_name = None

for word in analyze:
    try:
        analysis = word['analysis'][0]
    except KeyError:
        continue

    if 'имя' in analysis['gr']:
        first_name = word['text'].capitalize()
    elif 'фам' in analysis['gr']:
        second_name = word['text'].capitalize()
    elif 'отч' in analysis['gr']:
        middle_name = word['text'].capitalize()

print(f'{second_name} {first_name} {middle_name}')


None Иван Губинич


In [None]:
import matplotlib.pyplot as plt

x_var, y_var = "thermal_sensation_round", "thermal_preference"
df_grouped = df.groupby(x_var)[y_var].value_counts(normalize=True).unstack(y_var)
df_grouped.plot.barh(stacked=True)
plt.legend(
    bbox_to_anchor=(0.5, 1.02),
    loc="lower center",
    borderaxespad=0,
    frameon=False,
    ncol=3,
)
for ix, row in df_grouped.reset_index(drop=True).iterrows():
    print(ix, row)
    cumulative = 0
    for element in row:
        if element == element and element > 0.1:
            plt.text(
                cumulative + element / 2,
                ix,
                f"{int(element * 100)} %",
                va="center",
                ha="center",
            )
        cumulative += element
plt.tight_layout()

# Рабочая часть

In [2]:
from preprocessing import Preprocessing

data_path = "data.xlsx"
structure_path = "resources\\survey_structure.json"
names_path = "resources\\names.xlsx"

collector = Preprocessing(data_path, structure_path, names_path)
collector.collect()

person_stat = collector.get_person_info("Илюхина Мария", "Непосредственный руководитель")

In [27]:
import plotly.graph_objects as go

#top_labels = ['Оценили позитивно','Оценили негативно']

def create_plot(name : str, group : str, collector : Preprocessing):
    person_stat = collector.get_select_vals_for_plot(name, group)
    colors = ['rgba(168,209,141,0.8)',
            'rgba(251,114,134,0.8)']

    x_data = person_stat[1]
    y_data = person_stat[0]
    fig = go.Figure()

    for i in range(0, len(x_data[0])):
        for xd, yd in zip(x_data, y_data):
            fig.add_trace(go.Bar(
                x=[xd[i]], y=[yd],
                orientation='h',
                marker=dict(
                    color=colors[i],
                    line=dict(width=0)
                )
            ))

    fig.update_layout(
        xaxis=dict(
            showgrid=True,
            showline=False,
            tickformat= '0.00%',
            showticklabels=True,
            zeroline=False,
            domain=[0.15, 1]
        ),
        yaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False,
        ),
        barmode='stack',
        paper_bgcolor='rgb(0, 0, 0)',
        plot_bgcolor='rgb(0, 0, 0)',
        margin=dict(l=300, r=10, t=140, b=80),
        showlegend=False,
    )

    annotations = []

    for yd, xd in zip(y_data, x_data):
        # labeling the y-axis
        annotations.append(dict(xref='paper', yref='y',
                                x=0.14, y=yd,
                                xanchor='right',
                                text=str(yd),
                                font=dict(family='Arial', size=14,
                                        color='rgb(255, 255, 255)'),
                                showarrow=False, align='right'))
        # labeling the first percentage of each bar (x_axis)
        annotations.append(dict(xref='x', yref='y',
                                x=xd[0] / 2, y=yd,
                                text=str(int(xd[0]*100)) + '%',
                                font=dict(family='Arial', size=14,
                                        color='rgb(0, 0, 0)'),
                                showarrow=False))
        # labeling the second percentage of each bar (x_axis)
        if xd[i]!=0:
            annotations.append(dict(xref='x', yref='y',
                                    x=xd[0] + (xd[i]/2), y=yd,
                                    text=str(int(xd[1]*100)) + '%',
                                    font=dict(family='Arial', size=14,
                                                color='rgb(0, 0, 0)'),
                                    showarrow=False))

    fig.update_layout(annotations=annotations, title=dict(text=group, x=0.5 ,font=dict(size=50, color='rgb(255, 255, 255)')))
    fig.update_traces(width=0.5)
    fig.show()

In [28]:

create_plot("Губин Павел","Непосредственный руководитель", collector)

In [29]:
create_plot("Пушкин Иванович Валерий", "Вышестоящий руководитель", collector)

In [30]:
create_plot("Александр Альфонс", "Функциональный руководитель", collector)