In [1]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup

In [2]:
def get_phones():
    res = []
    for i in range(1, 7):
        url = f"https://www.dxomark.com/category/mobile-reviews/page/{i}/"
        web = requests.get(url).content
        soup = BeautifulSoup(web, "lxml")
        for phone in soup.find_all("div",{"class":"tile-image"}):
            res.append(phone.find("a").get("href"))
    return res

def get_scores(url):
    """Extracts all the scores of the DXO-mark camera test for a smartphon3
    parsing the url where its review is"""
    
    scores = {}
    result = requests.get(url)
    web = result.content
    soup = BeautifulSoup(web)
    sections = soup.find_all('div', {'class':'col large-9'}) # 0 photo, 1 zoom, 2 video
    prefixes = ['Photo-', 'Zoom-', 'Video-']
    scores["Device"] = soup.find("span", {"class":"name"}).text
    scores["final_score"]= int(soup.find("div",{"class":"scoreBadgeValue"}).text)
    for k,v in zip(["Photo", "Zoom","Video"], soup.find_all("span",{"class":"value"})):
        scores[k]= int(v.text)
        
    for i, section in enumerate(sections):
        prefix = prefixes[i]
        # buscar las listas que nos interesan con re
        lista = re.findall('\[(.*?)\]', str(section))
        claves = lista[0][1:-1].split("', '") # lista claves (sin strip)
        valores = lista[1].split(',') # lista valores (en strings)
        for j in range(len(claves)):
            scores[prefix + claves[j].strip()] = int(valores[j])
    return scores

def get_data():
    "Creates a dataframe with the scores of each individuals smartphone."
    df = pd.DataFrame()
    for link in get_phones():
        phone = get_scores(link)
        df = df.append(phone, ignore_index=True)
    return df


Probamos las funciones creadas y observamos que funcionan a la perfección.

In [4]:
print(get_phones()[1:5])

['https://www.dxomark.com/samsung-galaxy-s21-fe-5g-snapdragon-camera-review-the-better-s21/', 'https://www.dxomark.com/realme-gt-neo-2-camera-review-good-exposures-in-bright-light-and-indoors/', 'https://www.dxomark.com/vivo-x70-pro-mediatek-camera-review-new-premium-leader/', 'https://www.dxomark.com/vivo-v21-5g-camera-review-good-stills-in-its-segment/']


In [3]:
url1 = 'https://www.dxomark.com/samsung-galaxy-s22-ultra-exynos-camera-test/' 
url2 = 'https://www.dxomark.com/samsung-galaxy-s21-fe-5g-snapdragon-camera-review-the-better-s21/'

data = get_scores(url2)
print(data)

{'Device': 'Samsung Galaxy S21 FE 5G (Snapdragon)', 'final_score': 120, 'Photo': 126, 'Zoom': 63, 'Video': 104, 'Photo-Exposure': 84, 'Photo-Color': 102, 'Photo-Autofocus': 92, 'Photo-Texture': 98, 'Photo-Noise': 73, 'Photo-Artifacts': 67, 'Photo-Night': 56, 'Photo-Bokeh': 60, 'Photo-Preview': 62, 'Zoom-Tele': 75, 'Zoom-Wide': 45, 'Video-Exposure': 95, 'Video-Color': 100, 'Video-Autofocus': 94, 'Video-Texture': 77, 'Video-Noise': 75, 'Video-Artifacts': 71, 'Video-Stabilization': 95}


In [4]:
df = get_data()
df.to_csv("dxomark.csv", sep=";", index=False) #guardamos los datos

In [5]:
df #el dataframe resultante 

Unnamed: 0,Device,Photo,Photo-Artifacts,Photo-Autofocus,Photo-Bokeh,Photo-Color,Photo-Exposure,Photo-Night,Photo-Noise,Photo-Preview,...,Video-Autofocus,Video-Color,Video-Exposure,Video-Noise,Video-Stabilization,Video-Texture,Zoom,Zoom-Tele,Zoom-Wide,final_score
0,Samsung Galaxy S22 Ultra (Exynos),134.0,62.0,94.0,75.0,105.0,99.0,65.0,72.0,68.0,...,107.0,101.0,107.0,94.0,100.0,84.0,86.0,112.0,48.0,131.0
1,Samsung Galaxy S21 FE 5G (Snapdragon),126.0,67.0,92.0,60.0,102.0,84.0,56.0,73.0,62.0,...,94.0,100.0,95.0,75.0,95.0,77.0,63.0,75.0,45.0,120.0
2,Realme GT Neo 2 5G,107.0,62.0,88.0,60.0,87.0,71.0,35.0,59.0,53.0,...,86.0,93.0,90.0,91.0,90.0,64.0,60.0,79.0,32.0,106.0
3,Vivo X70 Pro (MediaTek),139.0,69.0,96.0,65.0,106.0,106.0,60.0,84.0,66.0,...,95.0,103.0,96.0,97.0,101.0,75.0,77.0,103.0,38.0,131.0
4,Vivo V21 5G,112.0,62.0,87.0,55.0,87.0,91.0,35.0,75.0,45.0,...,70.0,89.0,82.0,86.0,87.0,53.0,46.0,57.0,29.0,105.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,Google Pixel 4,120.0,75.0,92.0,60.0,102.0,92.0,35.0,59.0,47.0,...,95.0,97.0,82.0,96.0,95.0,48.0,55.0,91.0,0.0,113.0
104,Huawei P40,115.0,59.0,75.0,65.0,92.0,86.0,55.0,75.0,35.0,...,94.0,71.0,94.0,98.0,82.0,75.0,70.0,97.0,29.0,112.0
105,Oppo Find X2 Neo,106.0,54.0,93.0,55.0,94.0,68.0,39.0,62.0,44.0,...,79.0,81.0,65.0,77.0,93.0,80.0,61.0,81.0,31.0,105.0
106,Sony Xperia 1,89.0,49.0,90.0,50.0,82.0,45.0,25.0,62.0,39.0,...,38.0,82.0,72.0,84.0,92.0,46.0,43.0,46.0,38.0,87.0
