# Import Required Libraries
Import the necessary libraries, including akshare, time, pandas, selenium, and requests.

In [1]:
# Import Required Libraries

# Import time for handling time-related tasks
import time

# Import pandas for data manipulation and analysis
import pandas as pd

# Import selenium for web scraping
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Import requests for making HTTP requests
import requests



# Define Function to Get Data from Jisilu
Define the function get_jsl_data() to fetch data from Jisilu.

In [2]:
# Define Function to Get Data from Jisilu

def get_jsl_data(url):
    """
    Fetch data from Jisilu using Selenium and return it as a pandas DataFrame.
    
    Parameters:
    url (str): The URL of the Jisilu page to fetch data from.
    
    Returns:
    pd.DataFrame: DataFrame containing the fetched data.
    """
    # Initialize the Safari WebDriver
    driver = webdriver.Safari()
    
    try:
        # Open the URL
        driver.get(url)
        
        # Wait until the data table is present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//table"))
        )
        
        # Extract the table element
        table = driver.find_element(By.XPATH, "//table")
        
        # Get the HTML content of the table
        table_html = table.get_attribute('outerHTML')
        
        # Convert the HTML table to a pandas DataFrame
        df = pd.read_html(table_html)[0]
        
    finally:
        # Close the WebDriver
        driver.quit()
    
    return df

# Login to Jisilu Using Safari
Use Safari WebDriver to login to Jisilu and get cookies.

In [3]:
# Login to Jisilu Using Safari

def login_to_jisilu(username, password):
    """
    Login to Jisilu using Safari WebDriver and return the session cookies.
    
    Parameters:
    username (str): The username for Jisilu login.
    password (str): The password for Jisilu login.
    
    Returns:
    dict: Dictionary containing the session cookies.
    """
    # Initialize the Safari WebDriver
    driver = webdriver.Safari()
    
    try:
        # Open the Jisilu login page
        driver.get("https://www.jisilu.cn/account/login/")
        
        # Wait until the username input is present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.NAME, "user_name"))
        )
        
        # Find the username and password input elements
        username_input = driver.find_element(By.NAME, "user_name")
        password_input = driver.find_element(By.NAME, "password")
        
        # Enter the username and password
        username_input.send_keys(username)
        password_input.send_keys(password)
        
        # Find and click the login button
        login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
        login_button.click()
        
        # Wait until the login is complete and the user is redirected
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//div[@class='user-info']"))
        )
        
        # Get the cookies from the session
        cookies = driver.get_cookies()
        
        # Convert cookies to a dictionary
        cookies_dict = {cookie['name']: cookie['value'] for cookie in cookies}
        
    finally:
        # Close the WebDriver
        driver.quit()
    
    return cookies_dict

# Fetch Data from Jisilu
Use the requests library to fetch data from Jisilu using the obtained cookies.

In [4]:
# Fetch Data from Jisilu

def fetch_data_from_jisilu(url, cookies):
    """
    Fetch data from Jisilu using the requests library and the provided cookies.
    
    Parameters:
    url (str): The URL of the Jisilu page to fetch data from.
    cookies (dict): Dictionary containing the session cookies.
    
    Returns:
    pd.DataFrame: DataFrame containing the fetched data.
    """
    # Send a GET request to the URL with the cookies
    response = requests.get(url, cookies=cookies)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Convert the HTML content to a pandas DataFrame
        df = pd.read_html(response.text)[0]
        return df
    else:
        # Raise an exception if the request was not successful
        response.raise_for_status()

# Process and Save Data
Process the fetched data into a pandas DataFrame and save it as a CSV file.

In [5]:
# Process and Save Data

def process_and_save_data(df, file_path):
    """
    Process the fetched data and save it as a CSV file.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing the fetched data.
    file_path (str): The file path where the CSV file will be saved.
    """
    # Perform any necessary data processing here
    # For example, let's drop any rows with missing values
    df_cleaned = df.dropna()
    
    # Save the processed DataFrame to a CSV file
    df_cleaned.to_csv(file_path, index=False)

# Example usage:
# url = "https://www.jisilu.cn/data/some_data_page/"
# cookies = login_to_jisilu("your_username", "your_password")
# df = fetch_data_from_jisilu(url, cookies)
# process_and_save_data(df, "output.csv")

In [6]:
url = "https://www.jisilu.cn/data/cbnew/cb_list_new/"
cookies = login_to_jisilu("15651669136", "St7950819")
df = fetch_data_from_jisilu(url, cookies)
process_and_save_data(df, "output.csv")

SessionNotCreatedException: Message: Could not create a session: You must enable 'Allow remote automation' in the Developer section of Safari Settings to control Safari via WebDriver.


In [7]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests

# Function to get data from Jisilu
def get_jsl_data():
    # Set up the Chrome driver
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run in headless mode
    driver = webdriver.Chrome(options=options)
    
    try:
        # Open Jisilu login page
        driver.get('https://www.jisilu.cn/account/login/')
        
        # Enter login credentials
        driver.find_element(By.XPATH, '/html/body/div[3]/div/div/div[1]/div[1]/div[3]/form/div[2]/input').send_keys('15651669136')  # Replace with your email
        driver.find_element(By.XPATH, '/html/body/div[3]/div/div/div[1]/div[1]/div[3]/form/div[3]/input').send_keys('St7950819')  # Replace with your password
        
        # Click the login button
        driver.find_element(By.XPATH, '/html/body/div[3]/div/div/div[1]/div[1]/div[3]/form/div[5]/div[1]/input').click()  # Remember me
        driver.find_element(By.XPATH, '/html/body/div[3]/div/div/div[1]/div[1]/div[3]/form/div[5]/div[2]/input').click()  # Agree to terms
        driver.find_element(By.XPATH, '/html/body/div[3]/div/div/div[1]/div[1]/div[3]/form/div[6]/a').click()  # Login
        
        time.sleep(2)  # Wait for login to complete
        
        # Navigate to the data page
        driver.get('https://www.jisilu.cn/data/cbnew/cb_list_new/')
        
        # Get cookies
        cookies = driver.get_cookies()
        
    finally:
        driver.quit()
    
    # Prepare headers with cookies
    headers_jsl = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'Cookie': '; '.join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])
    }
    
    # Fetch data from Jisilu
    url = 'https://www.jisilu.cn/data/cbnew/cb_list_new/'
    response = requests.get(url, headers=headers_jsl)
    data = response.json()
    
    # Process data into DataFrame
    df = pd.DataFrame(data["rows"])["cell"].apply(pd.Series)
    df = df.rename(columns={
        "bond_id": "代码",
        "bond_nm": "转债名称",
        "price": "现价",
        "increase_rt": "涨跌幅",
        "stock_id": "正股代码",
        "stock_nm": "正股名称",
        "sprice": "正股价",
        "sincrease_rt": "正股涨跌",
        "pb": "正股PB",
        "convert_price": "转股价",
        "convert_value": "转股价值",
        "premium_rt": "转股溢价率",
        "dblow": "双低",
        "rating_cd": "评级",
        "put_convert_price": "回售触发价",
        "force_redeem_price": "强赎触发价",
        "convert_amt_ratio": "转债流通市值占比",
        "maturity_dt": "到期时间",
        "year_left": "剩余年限",
        "curr_iss_amt": "剩余规模-亿",
        "volume": "成交额-万",
        "turnover_rt": "换手率",
        "ytm_rt": "到期税前收益",
        "real_force_redeem_price": "实时赎回价",
        "ref_yield_info": "纯债价值",
        "option_tip": "期权价值"
    })
    
    df = df[[
        "代码", "转债名称", "现价", "涨跌幅", "正股代码", "正股名称", "正股价", "正股涨跌", "正股PB", "转股价", "转股价值",
        "转股溢价率", "双低", "评级", "回售触发价", "强赎触发价", "转债流通市值占比", "到期时间", "剩余年限", "剩余规模-亿",
        "成交额-万", "换手率", "到期税前收益", "实时赎回价", "纯债价值", "期权价值"
    ]]
    
    return df

# Fetch data and save to CSV
df = get_jsl_data()
df.to_csv('可转债.csv', index=False)
print(df)

KeyboardInterrupt: 