In [12]:
import pandas as pd

# Load the CSV file into a DataFrame
example_df = pd.read_csv('example_coding_sheet.csv')

# Reset the index to ensure it is in default order
example_df = example_df.reset_index(drop=True)

# Set the first row as the column headers
example_df.columns = example_df.iloc[0]

# Drop the first row as it is now being used as the header
example_df = example_df.drop(0).reset_index(drop=True)

# Remove unnamed columns (those without meaningful headers)
example_df = example_df.loc[:, ~example_df.columns.str.contains('^Unnamed')]

# Handle missing values
example_df['like_count'] = pd.to_numeric(example_df['like_count'], errors='coerce').fillna(0).astype(int)
example_df['share_count'] = pd.to_numeric(example_df['share_count'], errors='coerce').fillna(0).astype(int)
example_df['view_count'] = pd.to_numeric(example_df['view_count'], errors='coerce').fillna(0).astype(int)

# Fill missing values in textual columns with default values
example_df['username'] = example_df['username'].fillna('Unknown')
example_df['video_description'] = example_df['video_description'].fillna('No description')

# Convert 'create_time' to a datetime object
example_df['create_time'] = pd.to_datetime(example_df['create_time'], errors='coerce')

# Remove any duplicate rows
example_df = example_df.drop_duplicates()

# Display the cleaned DataFrame using print or display in a notebook
example_df.head()  # or print(example_df.head())


Unnamed: 0,EVENT,create_time,like_count,share_count,username,video_description,view_count,comment_count,hashtag_names,voice_to_text,...,Primary contemporary news value,Rhetorical Mode,Content Focus,Visual Source Material,Visual Structure,Function of Text Overlays,Does the video contain any non-news digressions or asides?,Creator Persona,Does it appear that the creator is engaged in authentic sensemaking and/or citizen journalist activities?,Use this space to make notes
0,maui,2023-08-12 09:16:00,25,0,eliaschavez058,#maui #hawaii #mauifire #hermosospaisajes #tik...,10,2,"['fire', 'disneyland', 'disney', 'lasvegas', '...",,...,,,,,,,,,,
1,maui,2023-08-12 20:40:00,191,5,madwyt3hnky,I mean come on dude #CapCut #maui #hawai #wild...,4416,7,"['wildfire', 'maui', 'hawai', 'hammerofdawn', ...","Tell me I'm crazy, but more than likely I am not",...,timeliness,Argumentation/Persuasion: Presenting a positio...,Broader Implications,,,descriptive text: sets more detailed context; ...,YES,,Maybe,
2,maui,2023-08-14 21:53:00,127,0,wendycastillo.90,#hawaii #lahaina #fire #usa #news #us,657,7,"['fire', 'usa', 'hawaii', 'us', 'news', 'lahai...",Contin√∫an los incendios imparables en haw√°i ...,...,,,,,,,,,,
3,ohio,2023-02-17 13:58:00,2,0,ownit3,New Thomas The Train Episode? #eastpalestine #...,4,0,"['train', 'trains', 'eastpalestine', 'thomastg...",,...,timeliness,N/A or Other,Broader Implications,,,N/A - no text overlay,Maybe,,Maybe,
4,ohio,2023-02-17 19:39:00,2,0,everlastinglite,#norfolk #norfolksouthern #train #ohio This s...,91,1,"['train', 'ohio', 'norfolk', 'norfolksouthern']",,...,timeliness,Exposition: Explaining or Informing. Presentin...,Human/Human-Health Impact,,,"topic text: sets some brief context, usually o...",NO,,YES,


In [None]:
import matplotlib.pyplot as plt

# Assuming the column for news values is named something like 'news_value' or similar
# First, clean the 'news_value' column by filling NaN with 'Unknown' or similar
example_df['news_value'] = example_df['Primary contemporary news value'].fillna('Unknown')

# Count the occurrences of each news value
news_value_counts = example_df['news_value'].value_counts()

# Plot the bar chart
plt.figure(figsize=(10, 6))
news_value_counts.plot(kind='bar', color='skyblue')

# Add title and labels
plt.title('Distribution of News Values in Crisis-Related TikTok Content')
plt.xlabel('News Values')
plt.ylabel('Frequency')

# Display the chart
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
