# Finding versions from website

In [2]:
import sys, os, re, csv, subprocess, operator
import pandas as pd
from urllib.request import urlopen
import urllib.request
from pprint import pprint
from bs4 import BeautifulSoup

# Defining repository

In [3]:
userhome = os.path.expanduser("~")
repository = userhome + r"/historage-mining/historage/hbase/"

# Load textfile contains bug-links

In [4]:
txt_file = open("bug_links.txt", "r")
bug_links = txt_file.read().split('\n')
print ("Found " + str(len(bug_links)) + " bug_links")

Found 6 bug_links


In [5]:
bug_links

['HBASE-9426',
 'HBASE-5209',
 'HBASE-15467',
 'HBASE-13686',
 'HBASE-10598',
 'HBASE-10569']

# Finding affected versions by bug links

In [6]:
affected_version = []
for a,b in enumerate(bug_links):
    link = "https://issues.apache.org/jira/browse/" + bug_links[a]
    sys.stdout.write("\rExtracting: " + bug_links[a])
    sys.stdout.flush()
    page = urllib.request.urlopen(link)
    soup = BeautifulSoup(page, 'html.parser')
    aff_version = soup.find('span', attrs={'id':'versions-val'}).text.replace("\n",'').replace(" ",'').split(",")
    aff_version = sorted(aff_version)
    aff_version.insert(0, bug_links[a])
    affected_version.append(aff_version)

print("\nExtraction has been completed.")

Extracting: HBASE-10569
Extraction has been completed.


In [7]:
for itemaff in affected_version:
    print (itemaff)

['HBASE-9426', '0.94.11', '0.95.2']
['HBASE-5209', '0.90.5', '0.92.0', '0.94.0']
['HBASE-15467', '2.0.0']
['HBASE-13686', '1.1.0', '2.0.0']
['HBASE-10598', '0.94.16']
['HBASE-10569', 'None']


In [8]:
#Finding the earliest version affected by the bug
earliest_version = []
for num, affver in enumerate(affected_version):
    earliest_version.append(affver[:2])

for early in earliest_version:
    print (early)

['HBASE-9426', '0.94.11']
['HBASE-5209', '0.90.5']
['HBASE-15467', '2.0.0']
['HBASE-13686', '1.1.0']
['HBASE-10598', '0.94.16']
['HBASE-10569', 'None']


# Defining the function for git command 

In [9]:
def execute_command(cmd, work_dir):
    #Executes a shell command in a subprocess, waiting until it has completed.
    pipe = subprocess.Popen(cmd, shell=True, cwd=work_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    (out, error) = pipe.communicate()
    return out, error
    pipe.wait()

# Finding the versions related with earliest version

In [10]:
related_version = []
for n, item in enumerate(earliest_version):
    if item[1] != "None":
        git_cmd = 'git tag -l "' + item[1] + '*"'
        temp = str(execute_command(git_cmd, repository)).replace("b'",'').replace("(",'').replace(")",'').split("\\n")
        del temp[len(temp)-1]
    else:
        temp = ['None']
    temp.insert(0, item[0])
    related_version.append(temp)

for xx in related_version:
    print (xx)

['HBASE-9426', '0.94.11', '0.94.11RC0', '0.94.11mvn', '0.94.11mvn2']
['HBASE-5209', '0.90.5', '0.90.5RC0', '0.90.5mvn', '0.90.5mvn2']
['HBASE-15467', '2.0.0-alpha-1RC0', '2.0.0-alpha-2RC0', '2.0.0-alpha-3RC0', '2.0.0-alpha-3RC0.2', '2.0.0-alpha4RC0', '2.0.0-beta-1-RC0', '2.0.0-beta-1-RC0.2', '2.0.0-beta-1-RC1', '2.0.0-beta-1-RC1.2', '2.0.0-beta-1-RC1.3', '2.0.0-beta-1-RC1.4', '2.0.0-beta-1-RC1.5', '2.0.0-beta-1-RC1.6', '2.0.0-beta-1-RC1.7']
['HBASE-13686', '1.1.0', '1.1.0-SNAPSHOT-testing', '1.1.0.1', '1.1.0.1RC0', '1.1.0RC0', '1.1.0RC1', '1.1.0RC2']
['HBASE-10598', '0.94.16', '0.94.16RC0', '0.94.16mvn', '0.94.16mvn2']
['HBASE-10569', 'None']


# Finding the date release for affected version

In [17]:
date_release = []
for n, item in enumerate(related_version):
    if item[1] != "None":
        for m in range(1, len(item)):
            git_cmd = "git log -1 --format=%ai " + item[m]
            temp = str(execute_command(git_cmd, repository)).replace("b'",'').replace("(",'').replace(")",'').split("\\n")
            del temp[len(temp)-1]
            temp = temp[0].split(" ")
            temp.insert(0,item[0])
            temp.insert(1,item[m])
            date_release.append(temp)
            date_release = sorted(date_release, key=operator.itemgetter(0, 2))
    else:
        date_release.append(item)

date_release = sorted(date_release, key=operator.itemgetter(0), reverse=True) 

for jj in date_release:
    print (jj)

['HBASE-9426', '0.94.11RC0', '2013-08-13', '23:48:11', '+0000']
['HBASE-9426', '0.94.11', '2013-08-20', '23:34:48', '+0000']
['HBASE-9426', '0.94.11mvn', '2013-08-20', '23:50:17', '+0000']
['HBASE-9426', '0.94.11mvn2', '2013-08-20', '23:50:15', '+0000']
['HBASE-5209', '0.90.5RC0', '2011-12-09', '20:01:31', '+0000']
['HBASE-5209', '0.90.5', '2011-12-23', '04:12:21', '+0000']
['HBASE-5209', '0.90.5mvn', '2012-01-03', '17:25:49', '+0000']
['HBASE-5209', '0.90.5mvn2', '2012-01-03', '17:25:46', '+0000']
['HBASE-15467', '2.0.0-alpha-1RC0', '2017-06-07', '14:29:19', '-0500']
['HBASE-15467', '2.0.0-alpha-2RC0', '2017-08-02', '22:54:21', '-0700']
['HBASE-15467', '2.0.0-alpha-3RC0', '2017-09-14', '17:20:59', '-0700']
['HBASE-15467', '2.0.0-alpha-3RC0.2', '2017-09-15', '16:35:21', '+0800']
['HBASE-15467', '2.0.0-alpha4RC0', '2017-10-31', '10:08:53', '-0700']
['HBASE-15467', '2.0.0-beta-1-RC0', '2017-12-28', '22:44:06', '+0200']
['HBASE-15467', '2.0.0-beta-1-RC0.2', '2017-12-29', '08:27:08', '-080

In [None]:
#save in csv file
with open('/Users/YusufNugroho/historage-mining/Analyze/analyze_hbase/step_3/affected_version/affected_version.csv','w') as csvfile:
    writers = csv.writer(csvfile)
    writers.writerow(['bug_id','affected_version','date_release','time_release','tz'])
    for item in date_release:
        writers.writerow(item)

In [None]:
df = pd.read_csv('/Users/YusufNugroho/historage-mining/Analyze/analyze_hbase/step_3/affected_version/affected_version.csv')
df.head()

In [None]:
earliest_vers = df.groupby('bug_id', as_index=False).first()
earliest_vers = earliest_vers.sort_values('bug_id', ascending=False)
earliest_vers.to_csv('/Users/YusufNugroho/historage-mining/Analyze/analyze_hbase/step_3/affected_version/earliest_version.csv')
earliest_vers

# Finding fix versions of bug links

In [None]:
fixed_version = []
for a,b in enumerate(bug_links):
    link = "https://issues.apache.org/jira/browse/" + bug_links[a]
    sys.stdout.write("\rExtracting: " + bug_links[a])
    sys.stdout.flush()
    page = urllib.request.urlopen(link)
    soup = BeautifulSoup(page, 'html.parser')
    fix_version = soup.find('span', attrs={'id':'fixVersions-field'}).text.replace("\n",'').replace(" ",'').split(",")
    fix_version = sorted(fix_version)
    fix_version.insert(0, bug_links[a])
    fixed_version.append(fix_version)

print("\nExtraction has been completed.")

In [None]:
for itemfix in fixed_version:
    print (itemfix)