In [45]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import count, col, to_date, sum

spark = SparkSession.builder\
.appName('log_analysis')\
.getOrCreate()

df = spark.read.option('header', 'true').csv('/content/web_server_logs.csv')

first_task_p_1 = df.groupBy('ip')\
.agg(count('*').alias('requests_count'))
first_task = first_task_p_1.orderBy(col('requests_count').desc()).limit(10)
print('Top 10 active IP adresses:')
first_task.show()

second_task = df.groupBy('method')\
.agg(count('*').alias('method_count'))
print('Request count by HTTP method:')
second_task.show()

third_task_p_1 = df.filter(col('response_code') == 404)
third_task = third_task_p_1.count()
print(f'Number of 404 response codes: {third_task}')

fourth_task = df.withColumn('date', to_date('response_size'))
total_response_size = fourth_task.groupBy('date')\
.agg(sum('response_size').alias('total_response_size'))\
.orderBy('date')\
.limit(15)
print('Total response size by day:')
total_response_size.show()

spark.stop()

+---------------+--------------------+------+--------------------+-------------+-------------+
|             ip|           timestamp|method|                 url|response_code|response_size|
+---------------+--------------------+------+--------------------+-------------+-------------+
| 95.224.114.241|2025-02-13T20:46:...|  POST|   posts/search/tags|          301|         3122|
|  109.117.54.10|2025-01-21T15:02:...|   GET|          categories|          404|         2647|
|  131.95.11.138|2025-01-12T14:08:...|   PUT|                 tag|          301|         8918|
|  55.204.174.65|2025-01-10T17:07:...|   GET|           posts/tag|          200|         6573|
|115.218.165.164|2025-01-06T15:13:...|  POST|        explore/list|          500|         6482|
| 26.193.100.157|2025-01-07T10:35:...|DELETE|                main|          500|         5216|
|   181.43.19.25|2025-02-04T05:20:...|   PUT|category/posts/ex...|          200|         3338|
|  19.143.90.145|2025-01-04T00:04:...|   PUT|    t

# Новый раздел