In [1]:
from flashtext import KeywordProcessor
import pandas as pd
from pathlib import Path
from collections import defaultdict
from IPython.display import display, HTML
from collections import OrderedDict
import networkx as nx
from ipywidgets import interact_manual, widgets, Layout

## Exploring WikiCSSH

In [2]:
%%time
cat2pages = pd.read_csv('../data/v1/WikiCSSH_category2page.csv').groupby("cat_title").page_title.agg(list)

Wall time: 1.97 s


In [3]:
def generated_WikiCSSH_graph():
    df_categories = pd.read_csv('../data/v1/WikiCSSH_categories.csv')
    df_category_links_all = pd.read_csv('../data/v1/WikiCSSH_category_links_all.csv')
#     df_category_links_all = pd.concat([
#         df_category_links,
#         df_categories[df_categories.category.isin(root_child_cats)].rename(columns={
#             "category": "child_cat", 
#             "level": "child_level",
#         }).assign(parent_cat="<ROOT>", parent_level=0),
#         pd.DataFrame({
#             "parent_cat": [""],
#             "child_cat": ["<ROOT>"],
#             "parent_level": [-1],
#             "child_level": [0],
#         })
#     ], axis=0, sort=True
#     ).sort_values(["parent_level", "child_level"])

    G = nx.DiGraph()
    G.add_edges_from(
        df_category_links_all
        .set_index(["parent_cat", "child_cat"])
        .to_dict(orient="index", into=OrderedDict)
    )
    return G

In [4]:
G = generated_WikiCSSH_graph()

In [5]:
def show_context_of_category(node):
    parents = list(G.predecessors(node))
    children = list(G.neighbors(node))
    table_header = "<thead><tr><th>Parents</th><th>Node</th><th>Children</th></tr></thead>"
    parents_list = "\n".join([f"<li><a href='https://en.wikipedia.org/wiki/Category:{i}'>{i}</a></li>" for i in parents])
    children_list = "\n".join([f"<li><a href='https://en.wikipedia.org/wiki/Category:{i}'>{i}</a></li>" for i in children])
    node_list = f"<li><a href='https://en.wikipedia.org/wiki/Category:{node}'>{node}</a></li>"
    page_list = " | ".join([
        f"<a href='https://en.wikipedia.org/wiki/{i}'>{i}</a>"
        for i in cat2pages[node]
    ])
    table_body = f"""<tbody>
    <tr>
        <td><ul>{parents_list}</ul></td>
        <td style='background-color: pink'>{node_list}</td>
        <td><ul>{children_list}</ul></td>
    </tr>
    <tr><td colspan='3'><strong>Pages</strong><br/>{page_list}</td></tr>
    </tbody>"""
    div = f"""
    <style>.wikicssh td, .wikicssh th {{text-align:left}}</style>
    <div class='wikicssh'>
    <table>{table_header}{table_body}</table>
    </div>"""
    return HTML(div)

In [6]:
show_context_of_category("Artificial_intelligence")

Parents,Node,Children
Areas_of_computer_science Unsolved_problems_in_computer_science,Artificial_intelligence,Fuzzy_logic Machine_learning Turing_tests Rule_engines Robots Computer_vision Logic_programming Open_source_artificial_intelligence Virtual_assistants Cloud_robotics Game_artificial_intelligence AI_accelerators Affective_computing Cognitive_architecture Artificial_intelligence_applications Artificial_immune_systems Robotics Artificial_Intelligence_existential_risk Evolutionary_computation
"Pages User_behavior_analytics | FEDOR_(Armed_AI_bot) | Intelligent_word_recognition | Autonomous_agent | List_of_programming_languages_for_artificial_intelligence | Virtual_intelligence | Radiant_AI | Knowledge-based_recommender_system | Situated | Colloquis | Embodied_agent | Moral_Machine | Winner-take-all_in_action_selection | Onnx | Dartmouth_workshop | Fuzzy_logic | Knowledge_compilation | Music_and_artificial_intelligence | Trenchard_More | Artificial_empathy | Artificial_intelligence,_situated_approach | 3D_reconstruction_from_multiple_images | Roborace | Multi-Agent_Programming_Contest | VaultML | Fred_(chatterbot) | Automated_personal_assistant | Computer_audition | Stochastic_semantic_analysis | Structure_mapping_engine | Artificial_imagination | Game_theory | Algorithmic_probability | API.AI | Percept_(artificial_intelligence) | Maluuba | ADS-AC | SUPS | Automated_Mathematician | Web_intelligence | DeepDream | LIDA_(cognitive_architecture) | Shyster_(expert_system) | Recurrent_neural_network | Artificial_brain | Color_moments | Legal_expert_system | Admissible_heuristic | Reasoning_system | NewsRx | Language_Acquisition_Device_(computer) | Collective_intelligence | Psychology_of_reasoning | Commonsense_reasoning | Symbol_level | Agent_systems_reference_model | Gxc3xb6del_machine | Probabilistic_logic_network | Behavior_informatics | Chess_as_mental_training | Belief-desire-intention_model | KL-ONE | Meca_Sapiens | List_of_artificial_intelligence_projects | Artificial_general_intelligence | Susan_Schneider_(philosopher) | Fast-And-Frugal_trees | Language/action_perspective | Recursive_neural_network | Artificial_intuition | Machine_perception | Nouvelle_AI | Voice_Mate | Babelfy | Loebner_Prize | Type-1_OWA_operators | Gomocup | Epistemic_modal_logic | Instrumental_convergence | Cerebellar_model_articulation_controller | List_of_datasets_for_machine-learning_research | Thompson_sampling | Backward_chaining | IRCF360 | Computer_vision | Computer-assisted_proof | Blackboard_system | Qloo | Hierarchical_control_system | Kinect | Enterprise_cognitive_system | Uncanny_valley | Embodied_cognitive_science | Neuro-fuzzy | A.I._Artificial_Intelligence | Ontology_engineering | Knowledge_acquisition | Syman | Behavior_tree_(artificial_intelligence,_robotics_and_control) | Google.ai | ACROSS_Project | International_Conference_on_Autonomous_Agents_and_Multiagent_Systems | AIVA | Neural_computation | Cloud_robotics | Chatterbox_Challenge | Singleton_(global_governance) | Project_Joshua_Blue | 0music | Emily_Howell | Rule-based_system | Knowledge_level | Personoid | Lawbot | Cobweb_(clustering) | Noogenesis | Incremental_heuristic_search | Ontology_learning | Moravec's_paradox | Argumentation_framework | Right_to_explanation | ICarbonX | Knowledge_Based_Software_Assistant | Cognitive_philology | Inductive_programming | The_Leaf_(AI)_Project | Hybrid_intelligent_system | Extremal_optimization | Differentiable_neural_computer | Artificial_intelligence_for_video_surveillance | MNIST_database | Medical_intelligence_and_language_engineering_lab | Contextual_image_classification | Hybrid_neural_network | Applications_of_artificial_intelligence | Oriented_Energy_Filters | Model-based_reasoning | Neural_network_software | Intelligent_agent | Synthetic_Environment_for_Analysis_and_Simulations | Concurrent_MetateM | Open_Information_Extraction | Data_pack | DAYDREAMER | Knowledge-based_configuration | Deductive_classifier | OpenCog | Manifold_integration | Perceptual_computing | Anytime_algorithm | Mivar-based_approach | Neurorobotics | Plug_&_Pray | Zeuthen_strategy | Computational_humor | Puckstering | Schema-agnostic_databases | Fuzzy_agent | Brain_technology | Intel_RealSense | User_illusion | Cognitive_computer | Open_Letter_on_Artificial_Intelligence | Turing_Robot | March_of_the_Machines | NTU_RGB-D_dataset | IJCAI_Computers_and_Thought_Award | Mind-body_problem | Computational_Heuristic_Intelligence | Ordered_weighted_averaging_aggregation_operator | Distributed_artificial_intelligence | Darwin_machine | SNePS | Glossary_of_artificial_intelligence | Soft_computing | Intelligent_control | Anticipation_(artificial_intelligence) | Expert_system | Catastrophic_interference | Knowledge-based_systems | Means-ends_analysis | Spreading_activation | POP-11 | Rough_fuzzy_hybridization | Natural_language_understanding | Artificial_psychology | Vaumpus_world | Scilab_Image_Processing | Emospark | Hindsight_optimization | Self-management_(computer_science) | Connectionist_expert_system | Cognitive_infocommunications | Wetware_(brain) | Luminoso | Mycroft_(software) | Principle_of_rationality | Sensorium_Project | MANIC_(Cognitive_Architecture) | Competitions_and_prizes_in_artificial_intelligence | Any-angle_path_planning | Manifold_alignment | Conference_on_Semantics_in_Healthcare_and_Life_Sciences | Cognitive_computing | Natural_language_processing | Informatics | Diagnosis_(artificial_intelligence) | Constructionist_design_methodology | Australian_Artificial_Intelligence_Institute | S_Voice | AI-complete | Generalized_distributive_law | Computational_intelligence | Angel_F | AgentSheets | WordDive | Frame_problem | Intelligent_decision_support_system | Problem_solving | Kuwahara_filter | Artificial_intelligence | Leverhulme_Centre_for_the_Future_of_Intelligence | BabyX | Weak_AI | Autognostics | Bayesian_programming | Outline_of_machine_learning | Rational_agent | And-or_tree | DragonLord_Enterprises,_Inc. | CALO | Alesis_Artificial_Intelligence | Neural_modeling_fields | Discovery_system | Histogram_of_oriented_displacements | ASR-complete | Pattern_theory | Explainable_AI | The_Fable_of_Oscar | Automated_reasoning | OpenAIR | Intelligent_database | GENESIS_(software) | Mindpixel | Google | Action_selection | Gabbay's_separation_theorem | Allen_(robot) | Extreme:_Personal_Assistant | Outline_of_artificial_intelligence | Wojciech_Zaremba | Clone_Algo_Inc | Cognitive_tutor | Industrial_artificial_intelligence | Description_logic | Evolving_intelligent_system | Autonomic_computing | Software_agent | Artificial_intelligence_and_law | Human_Problem_Solving | Combs_method | Committee_machine | ICAD_(software) | INDECT | Artificial_consciousness | Frame_language | Computational_human_modeling | Evolutionary_developmental_robotics | BabelNet | Computational_creativity | Conflict_resolution_strategy | OpenIRIS | Cognitive_robotics | Information_space_analysis | Artificial_intelligence_systems_integration | Darkforest | Distributional-Relational_Databases | Smart_objects | Subrata_Dasgupta | Document_mosaicing | Nervous_system_network_models | Robot_lawyer | Information_extraction | Aurora_(novel) | Pedagogical_agent | Simulated_consciousness_in_fiction | K-line_(artificial_intelligence) | 20Q | Artificial_Intelligence_System | KAoS | Ensemble_averaging_(machine_learning) | Computer_Arimaa | Bio-inspired_computing | Type-2_fuzzy_sets_and_systems | Grammar_systems_theory | Attributional_calculus | Symbolic_artificial_intelligence | Decision_list | Progress_in_artificial_intelligence | Autonomic_networking | Winograd_Schema_Challenge | Dynamic_epistemic_logic","Pages User_behavior_analytics | FEDOR_(Armed_AI_bot) | Intelligent_word_recognition | Autonomous_agent | List_of_programming_languages_for_artificial_intelligence | Virtual_intelligence | Radiant_AI | Knowledge-based_recommender_system | Situated | Colloquis | Embodied_agent | Moral_Machine | Winner-take-all_in_action_selection | Onnx | Dartmouth_workshop | Fuzzy_logic | Knowledge_compilation | Music_and_artificial_intelligence | Trenchard_More | Artificial_empathy | Artificial_intelligence,_situated_approach | 3D_reconstruction_from_multiple_images | Roborace | Multi-Agent_Programming_Contest | VaultML | Fred_(chatterbot) | Automated_personal_assistant | Computer_audition | Stochastic_semantic_analysis | Structure_mapping_engine | Artificial_imagination | Game_theory | Algorithmic_probability | API.AI | Percept_(artificial_intelligence) | Maluuba | ADS-AC | SUPS | Automated_Mathematician | Web_intelligence | DeepDream | LIDA_(cognitive_architecture) | Shyster_(expert_system) | Recurrent_neural_network | Artificial_brain | Color_moments | Legal_expert_system | Admissible_heuristic | Reasoning_system | NewsRx | Language_Acquisition_Device_(computer) | Collective_intelligence | Psychology_of_reasoning | Commonsense_reasoning | Symbol_level | Agent_systems_reference_model | Gxc3xb6del_machine | Probabilistic_logic_network | Behavior_informatics | Chess_as_mental_training | Belief-desire-intention_model | KL-ONE | Meca_Sapiens | List_of_artificial_intelligence_projects | Artificial_general_intelligence | Susan_Schneider_(philosopher) | Fast-And-Frugal_trees | Language/action_perspective | Recursive_neural_network | Artificial_intuition | Machine_perception | Nouvelle_AI | Voice_Mate | Babelfy | Loebner_Prize | Type-1_OWA_operators | Gomocup | Epistemic_modal_logic | Instrumental_convergence | Cerebellar_model_articulation_controller | List_of_datasets_for_machine-learning_research | Thompson_sampling | Backward_chaining | IRCF360 | Computer_vision | Computer-assisted_proof | Blackboard_system | Qloo | Hierarchical_control_system | Kinect | Enterprise_cognitive_system | Uncanny_valley | Embodied_cognitive_science | Neuro-fuzzy | A.I._Artificial_Intelligence | Ontology_engineering | Knowledge_acquisition | Syman | Behavior_tree_(artificial_intelligence,_robotics_and_control) | Google.ai | ACROSS_Project | International_Conference_on_Autonomous_Agents_and_Multiagent_Systems | AIVA | Neural_computation | Cloud_robotics | Chatterbox_Challenge | Singleton_(global_governance) | Project_Joshua_Blue | 0music | Emily_Howell | Rule-based_system | Knowledge_level | Personoid | Lawbot | Cobweb_(clustering) | Noogenesis | Incremental_heuristic_search | Ontology_learning | Moravec's_paradox | Argumentation_framework | Right_to_explanation | ICarbonX | Knowledge_Based_Software_Assistant | Cognitive_philology | Inductive_programming | The_Leaf_(AI)_Project | Hybrid_intelligent_system | Extremal_optimization | Differentiable_neural_computer | Artificial_intelligence_for_video_surveillance | MNIST_database | Medical_intelligence_and_language_engineering_lab | Contextual_image_classification | Hybrid_neural_network | Applications_of_artificial_intelligence | Oriented_Energy_Filters | Model-based_reasoning | Neural_network_software | Intelligent_agent | Synthetic_Environment_for_Analysis_and_Simulations | Concurrent_MetateM | Open_Information_Extraction | Data_pack | DAYDREAMER | Knowledge-based_configuration | Deductive_classifier | OpenCog | Manifold_integration | Perceptual_computing | Anytime_algorithm | Mivar-based_approach | Neurorobotics | Plug_&_Pray | Zeuthen_strategy | Computational_humor | Puckstering | Schema-agnostic_databases | Fuzzy_agent | Brain_technology | Intel_RealSense | User_illusion | Cognitive_computer | Open_Letter_on_Artificial_Intelligence | Turing_Robot | March_of_the_Machines | NTU_RGB-D_dataset | IJCAI_Computers_and_Thought_Award | Mind-body_problem | Computational_Heuristic_Intelligence | Ordered_weighted_averaging_aggregation_operator | Distributed_artificial_intelligence | Darwin_machine | SNePS | Glossary_of_artificial_intelligence | Soft_computing | Intelligent_control | Anticipation_(artificial_intelligence) | Expert_system | Catastrophic_interference | Knowledge-based_systems | Means-ends_analysis | Spreading_activation | POP-11 | Rough_fuzzy_hybridization | Natural_language_understanding | Artificial_psychology | Vaumpus_world | Scilab_Image_Processing | Emospark | Hindsight_optimization | Self-management_(computer_science) | Connectionist_expert_system | Cognitive_infocommunications | Wetware_(brain) | Luminoso | Mycroft_(software) | Principle_of_rationality | Sensorium_Project | MANIC_(Cognitive_Architecture) | Competitions_and_prizes_in_artificial_intelligence | Any-angle_path_planning | Manifold_alignment | Conference_on_Semantics_in_Healthcare_and_Life_Sciences | Cognitive_computing | Natural_language_processing | Informatics | Diagnosis_(artificial_intelligence) | Constructionist_design_methodology | Australian_Artificial_Intelligence_Institute | S_Voice | AI-complete | Generalized_distributive_law | Computational_intelligence | Angel_F | AgentSheets | WordDive | Frame_problem | Intelligent_decision_support_system | Problem_solving | Kuwahara_filter | Artificial_intelligence | Leverhulme_Centre_for_the_Future_of_Intelligence | BabyX | Weak_AI | Autognostics | Bayesian_programming | Outline_of_machine_learning | Rational_agent | And-or_tree | DragonLord_Enterprises,_Inc. | CALO | Alesis_Artificial_Intelligence | Neural_modeling_fields | Discovery_system | Histogram_of_oriented_displacements | ASR-complete | Pattern_theory | Explainable_AI | The_Fable_of_Oscar | Automated_reasoning | OpenAIR | Intelligent_database | GENESIS_(software) | Mindpixel | Google | Action_selection | Gabbay's_separation_theorem | Allen_(robot) | Extreme:_Personal_Assistant | Outline_of_artificial_intelligence | Wojciech_Zaremba | Clone_Algo_Inc | Cognitive_tutor | Industrial_artificial_intelligence | Description_logic | Evolving_intelligent_system | Autonomic_computing | Software_agent | Artificial_intelligence_and_law | Human_Problem_Solving | Combs_method | Committee_machine | ICAD_(software) | INDECT | Artificial_consciousness | Frame_language | Computational_human_modeling | Evolutionary_developmental_robotics | BabelNet | Computational_creativity | Conflict_resolution_strategy | OpenIRIS | Cognitive_robotics | Information_space_analysis | Artificial_intelligence_systems_integration | Darkforest | Distributional-Relational_Databases | Smart_objects | Subrata_Dasgupta | Document_mosaicing | Nervous_system_network_models | Robot_lawyer | Information_extraction | Aurora_(novel) | Pedagogical_agent | Simulated_consciousness_in_fiction | K-line_(artificial_intelligence) | 20Q | Artificial_Intelligence_System | KAoS | Ensemble_averaging_(machine_learning) | Computer_Arimaa | Bio-inspired_computing | Type-2_fuzzy_sets_and_systems | Grammar_systems_theory | Attributional_calculus | Symbolic_artificial_intelligence | Decision_list | Progress_in_artificial_intelligence | Autonomic_networking | Winograd_Schema_Challenge | Dynamic_epistemic_logic","Pages User_behavior_analytics | FEDOR_(Armed_AI_bot) | Intelligent_word_recognition | Autonomous_agent | List_of_programming_languages_for_artificial_intelligence | Virtual_intelligence | Radiant_AI | Knowledge-based_recommender_system | Situated | Colloquis | Embodied_agent | Moral_Machine | Winner-take-all_in_action_selection | Onnx | Dartmouth_workshop | Fuzzy_logic | Knowledge_compilation | Music_and_artificial_intelligence | Trenchard_More | Artificial_empathy | Artificial_intelligence,_situated_approach | 3D_reconstruction_from_multiple_images | Roborace | Multi-Agent_Programming_Contest | VaultML | Fred_(chatterbot) | Automated_personal_assistant | Computer_audition | Stochastic_semantic_analysis | Structure_mapping_engine | Artificial_imagination | Game_theory | Algorithmic_probability | API.AI | Percept_(artificial_intelligence) | Maluuba | ADS-AC | SUPS | Automated_Mathematician | Web_intelligence | DeepDream | LIDA_(cognitive_architecture) | Shyster_(expert_system) | Recurrent_neural_network | Artificial_brain | Color_moments | Legal_expert_system | Admissible_heuristic | Reasoning_system | NewsRx | Language_Acquisition_Device_(computer) | Collective_intelligence | Psychology_of_reasoning | Commonsense_reasoning | Symbol_level | Agent_systems_reference_model | Gxc3xb6del_machine | Probabilistic_logic_network | Behavior_informatics | Chess_as_mental_training | Belief-desire-intention_model | KL-ONE | Meca_Sapiens | List_of_artificial_intelligence_projects | Artificial_general_intelligence | Susan_Schneider_(philosopher) | Fast-And-Frugal_trees | Language/action_perspective | Recursive_neural_network | Artificial_intuition | Machine_perception | Nouvelle_AI | Voice_Mate | Babelfy | Loebner_Prize | Type-1_OWA_operators | Gomocup | Epistemic_modal_logic | Instrumental_convergence | Cerebellar_model_articulation_controller | List_of_datasets_for_machine-learning_research | Thompson_sampling | Backward_chaining | IRCF360 | Computer_vision | Computer-assisted_proof | Blackboard_system | Qloo | Hierarchical_control_system | Kinect | Enterprise_cognitive_system | Uncanny_valley | Embodied_cognitive_science | Neuro-fuzzy | A.I._Artificial_Intelligence | Ontology_engineering | Knowledge_acquisition | Syman | Behavior_tree_(artificial_intelligence,_robotics_and_control) | Google.ai | ACROSS_Project | International_Conference_on_Autonomous_Agents_and_Multiagent_Systems | AIVA | Neural_computation | Cloud_robotics | Chatterbox_Challenge | Singleton_(global_governance) | Project_Joshua_Blue | 0music | Emily_Howell | Rule-based_system | Knowledge_level | Personoid | Lawbot | Cobweb_(clustering) | Noogenesis | Incremental_heuristic_search | Ontology_learning | Moravec's_paradox | Argumentation_framework | Right_to_explanation | ICarbonX | Knowledge_Based_Software_Assistant | Cognitive_philology | Inductive_programming | The_Leaf_(AI)_Project | Hybrid_intelligent_system | Extremal_optimization | Differentiable_neural_computer | Artificial_intelligence_for_video_surveillance | MNIST_database | Medical_intelligence_and_language_engineering_lab | Contextual_image_classification | Hybrid_neural_network | Applications_of_artificial_intelligence | Oriented_Energy_Filters | Model-based_reasoning | Neural_network_software | Intelligent_agent | Synthetic_Environment_for_Analysis_and_Simulations | Concurrent_MetateM | Open_Information_Extraction | Data_pack | DAYDREAMER | Knowledge-based_configuration | Deductive_classifier | OpenCog | Manifold_integration | Perceptual_computing | Anytime_algorithm | Mivar-based_approach | Neurorobotics | Plug_&_Pray | Zeuthen_strategy | Computational_humor | Puckstering | Schema-agnostic_databases | Fuzzy_agent | Brain_technology | Intel_RealSense | User_illusion | Cognitive_computer | Open_Letter_on_Artificial_Intelligence | Turing_Robot | March_of_the_Machines | NTU_RGB-D_dataset | IJCAI_Computers_and_Thought_Award | Mind-body_problem | Computational_Heuristic_Intelligence | Ordered_weighted_averaging_aggregation_operator | Distributed_artificial_intelligence | Darwin_machine | SNePS | Glossary_of_artificial_intelligence | Soft_computing | Intelligent_control | Anticipation_(artificial_intelligence) | Expert_system | Catastrophic_interference | Knowledge-based_systems | Means-ends_analysis | Spreading_activation | POP-11 | Rough_fuzzy_hybridization | Natural_language_understanding | Artificial_psychology | Vaumpus_world | Scilab_Image_Processing | Emospark | Hindsight_optimization | Self-management_(computer_science) | Connectionist_expert_system | Cognitive_infocommunications | Wetware_(brain) | Luminoso | Mycroft_(software) | Principle_of_rationality | Sensorium_Project | MANIC_(Cognitive_Architecture) | Competitions_and_prizes_in_artificial_intelligence | Any-angle_path_planning | Manifold_alignment | Conference_on_Semantics_in_Healthcare_and_Life_Sciences | Cognitive_computing | Natural_language_processing | Informatics | Diagnosis_(artificial_intelligence) | Constructionist_design_methodology | Australian_Artificial_Intelligence_Institute | S_Voice | AI-complete | Generalized_distributive_law | Computational_intelligence | Angel_F | AgentSheets | WordDive | Frame_problem | Intelligent_decision_support_system | Problem_solving | Kuwahara_filter | Artificial_intelligence | Leverhulme_Centre_for_the_Future_of_Intelligence | BabyX | Weak_AI | Autognostics | Bayesian_programming | Outline_of_machine_learning | Rational_agent | And-or_tree | DragonLord_Enterprises,_Inc. | CALO | Alesis_Artificial_Intelligence | Neural_modeling_fields | Discovery_system | Histogram_of_oriented_displacements | ASR-complete | Pattern_theory | Explainable_AI | The_Fable_of_Oscar | Automated_reasoning | OpenAIR | Intelligent_database | GENESIS_(software) | Mindpixel | Google | Action_selection | Gabbay's_separation_theorem | Allen_(robot) | Extreme:_Personal_Assistant | Outline_of_artificial_intelligence | Wojciech_Zaremba | Clone_Algo_Inc | Cognitive_tutor | Industrial_artificial_intelligence | Description_logic | Evolving_intelligent_system | Autonomic_computing | Software_agent | Artificial_intelligence_and_law | Human_Problem_Solving | Combs_method | Committee_machine | ICAD_(software) | INDECT | Artificial_consciousness | Frame_language | Computational_human_modeling | Evolutionary_developmental_robotics | BabelNet | Computational_creativity | Conflict_resolution_strategy | OpenIRIS | Cognitive_robotics | Information_space_analysis | Artificial_intelligence_systems_integration | Darkforest | Distributional-Relational_Databases | Smart_objects | Subrata_Dasgupta | Document_mosaicing | Nervous_system_network_models | Robot_lawyer | Information_extraction | Aurora_(novel) | Pedagogical_agent | Simulated_consciousness_in_fiction | K-line_(artificial_intelligence) | 20Q | Artificial_Intelligence_System | KAoS | Ensemble_averaging_(machine_learning) | Computer_Arimaa | Bio-inspired_computing | Type-2_fuzzy_sets_and_systems | Grammar_systems_theory | Attributional_calculus | Symbolic_artificial_intelligence | Decision_list | Progress_in_artificial_intelligence | Autonomic_networking | Winograd_Schema_Challenge | Dynamic_epistemic_logic"


In [7]:
interact_manual(show_context_of_category, node="Computer_vision");

interactive(children=(Text(value='Computer_vision', description='node'), Button(description='Run Interact', st…

## Tagging text using WikiCSSH

In [8]:
%%time
def get_WikiCSSH_tagger():
    page2cats = (
        pd.read_csv('../data/v1/WikiCSSH_category2page.csv')
        .groupby("page_title")
        .cat_title
        .agg(lambda x: list(x))
        .to_dict()
    )
    
    processor = KeywordProcessor()
    # categories
    processor.add_keywords_from_dict(
        {
            f'Category:{k}': [f'{k.lower().replace("_", " ")}']
            for k in pd.read_csv("../data/v1/WikiCSSH_categories.csv").category.values
        }
    )

    for row in pd.read_csv('../data/v1/WikiCSSH_page2redirect.csv').values:
        if isinstance(row[-1], float):
            row[-1] = row[0]
        processor.add_keyword(row[-1].lower().replace("_", " "), row[0])
        
    return processor, page2cats

Wall time: 0 ns


In [9]:
%%time
processor, page2cats = get_WikiCSSH_tagger()

Wall time: 48.8 s


In [10]:
text = """In the last decade, we experienced an urgent need for a flexible, context-sensitive, fine-grained, and machine-actionable representation of scholarly knowledge and corresponding infrastructures for knowledge curation, publishing and processing. Such technical infrastructures are becoming increasingly popular in representing scholarly knowledge as structured, interlinked, and semantically rich Scientific Knowledge Graphs (SKG). Knowledge graphs are large networks of entities and relationships, usually expressed in W3C standards such as OWL and RDF. SKGs focus on the scholarly domain and describe the actors (e.g., authors, organizations), the documents (e.g., publications, patents), and the research knowledge (e.g., research topics, tasks, technologies) in this space as well as their reciprocal relationships. These resources provide substantial benefits to researchers, companies, and policymakers by powering several data-driven services for navigating, analysing, and making sense of research dynamics. Some examples include Microsoft Academic Graph (MAG), Open Academic Graph (combining MAG and AMiner), ScholarlyData, PID Graph, Open Research Knowledge Graph, OpenCitations, and OpenAIRE research graph. Current challenges in this area include: i) the design of ontologies able to conceptualise scholarly knowledge, ii) (semi-)automatic extraction of entities and concepts, integration of information from heterogeneous sources, identification of duplicates, finding connections between entities, and iii) the development of new services using this data, that allow to explore this information, measure research impact and accelerate science. This workshop aims at bringing together researchers and practitioners from different fields (including, but not limited to, Digital Libraries, Information Extraction, Machine Learning, Semantic Web, Knowledge Engineering, Natural Language Processing, Scholarly Communication, and Bibliometrics) in order to explore innovative solutions and ideas for the production and consumption of Scientific Knowledge Graphs (SKGs)."""

In [11]:
processor.extract_keywords(text, span_info=True)

[('Experience', 23, 34),
 ('Granularity', 85, 97),
 ('Scholarly_method', 140, 149),
 ('Knowledge', 150, 159),
 ('Knowledge', 198, 207),
 ('Scholarly_method', 326, 335),
 ('Knowledge', 336, 345),
 ('Semantics', 378, 390),
 ('Knowledge', 407, 416),
 ('Category:Graphs', 417, 423),
 ('Knowledge', 431, 440),
 ('Category:Graphs', 441, 447),
 ('Entity', 470, 478),
 ('World_Wide_Web_Consortium', 519, 532),
 ('Scholarly_method', 572, 581),
 ('Document', 649, 658),
 ('Research', 698, 706),
 ('Knowledge', 707, 716),
 ('Research', 724, 732),
 ('Category:Space', 770, 775),
 ('Research', 867, 878),
 ('Business', 880, 889),
 ('Research', 996, 1004),
 ('CONFIG.SYS', 1029, 1036),
 ('Microsoft_Academic', 1037, 1055),
 ('Academy_(educational_institution)', 1074, 1082),
 ('Open_research', 1143, 1156),
 ('Ontology_(information_science)', 1157, 1172),
 ('Research', 1202, 1210),
 ('Category:Area', 1245, 1249),
 ('CONFIG.SYS', 1250, 1257),
 ('Category:Design', 1266, 1272),
 ('Ontology', 1276, 1286),
 ('Concep

In [12]:
def get_html(text, processor):
    spans = processor.extract_keywords(text, span_info=True)
    prev = 0
    parts = []
    category_counts = defaultdict(int)
    for entity, start, end in spans:
        if entity.startswith("Category:"):
            entity_cats = [entity.replace("Category:", "")]
        else:
            entity_cats = [c for c in page2cats.get(entity, [])]
        for cat in entity_cats:
            category_counts[cat] += 1
        if start > prev:
            parts.append(text[prev:start])
        parts.append(f"<a href='https://en.wikipedia.org/wiki/{entity}' title='{entity}'>{text[start:end]}</a>")
        prev = end
    tagged_doc = "".join(parts).replace("\n", "<br/>")
    pred_categories = " | ".join([
        f"<a href='https://en.wikipedia.org/wiki/Category:{k}' title='{k}'>{k}</a> ({v})"
        for k,v in sorted(category_counts.items(), key=lambda x: x[1], reverse=True)
    ])
    final_div = f"""<div>
    <div>
        <h3>Tagged document:</h3>
        {tagged_doc}
    </div>
    <div>
        <h3>Predicted categories:</h3>
        {pred_categories}
    </div>
    </div>"""
    return HTML(final_div)
    

In [13]:
display(get_html(text, processor))

In [14]:
text_area_widget = widgets.Textarea(
    value=text,
    placeholder="Type your text hear",
    description='String:',
    disabled=False,
    layout=Layout(width="90%")
)
text_area_widget.rows=10;
interact_manual(lambda text: get_html(text, processor), text=text_area_widget);

interactive(children=(Textarea(value='In the last decade, we experienced an urgent need for a flexible, contex…