python - I can't figure out why I get a blank output file -
import csv import requests import re bs4 import beautifulsoup import sys reload(sys) sys.setdefaultencoding('utf8') #create csv file outfile = open("./output.csv", "wb") writer = csv.writer(outfile) #import matches import csv open('matches.csv', 'rb') f: reader = csv.reader(f) matches = list(reader) id in matches: id = str(id) id = re.sub("[^0-9]","",id) url = 'http://www.virtualpronetwork.com/apps/fvpaa/matches/match_report/' + id print (url) response = requests.get(url) html = response.content soup = beautifulsoup(html) #get teams , scores score = soup.findall("div",{"class":"col-md-5 center"}) team_home = score[0] team_home = str(team_home) team_home = re.search('title="(.*)" />',team_home) team_home = team_home.group(1) team_away = score[1] team_away = str(team_away) team_away = re.search('title="(.*)" />',team_away) team_away = team_away.group(1) goals_home = score[2] goals_home = str(goals_home) goals_home = re.sub('</h2></div>','',goals_home) goals_home = re.sub('<div class="col-md-5 center"><h2>','',goals_home) goals_away = score[3] goals_away = str(goals_away) goals_away = re.sub('</h2></div>','',goals_away) goals_away = re.sub('<div class="col-md-5 center"><h2>','',goals_away) #get home stats tables = soup.findchildren('table') stats_home = tables[0] list_of_rows_home = [] row in stats_home.findchildren('tr')[1:]: list_of_cells = [] cell in row.findchildren('td')[0]: text = cell.text list_of_cells.append(text) cell in row.findchildren('td')[1]: text = cell.text list_of_cells.append(text) cell in row.findchildren('td')[2:]: list_of_cells.append(cell) list_of_rows_home.append(list_of_cells) in range(len(list_of_rows_home)): row = list_of_rows_home[i] cell = list_of_rows_home[i][2] cell = str(cell) goal = re.findall('goal',cell) goal = goal.count('goal') goal = goal / 2 assist = re.findall('assist',cell) assist = assist.count('assist') assist = assist / 2 motm = re.findall('motm',cell) motm = motm.count('motm') row.append(goal) row.append(assist) row.append(motm) row in list_of_rows_home: del row[2] in range(len(list_of_rows_home)): row = list_of_rows_home[i] row.append(team_home) row.append(goals_home) row.append(team_away) row.append(goals_away) #get away stats stats_away = tables[1] list_of_rows_away = [] row in stats_away.findchildren('tr')[1:]: list_of_cells = [] cell in row.findchildren('td')[0]: text = cell.text list_of_cells.append(text) cell in row.findchildren('td')[1]: text = cell.text list_of_cells.append(text) cell in row.findchildren('td')[2:]: list_of_cells.append(cell) list_of_rows_away.append(list_of_cells) in range(len(list_of_rows_away)): row = list_of_rows_away[i] cell = list_of_rows_away[i][2] cell = str(cell) goal = re.findall('goal',cell) goal = goal.count('goal') goal = goal / 2 assist = re.findall('assist',cell) assist = assist.count('assist') assist = assist / 2 motm = re.findall('motm',cell) motm = motm.count('motm') row.append(goal) row.append(assist) row.append(motm) row in list_of_rows_away: del row[2] in range(len(list_of_rows_away)): row = list_of_rows_away[i] row.append(team_away) row.append(goals_away) row.append(team_home) row.append(goals_home) #compile 1 table list_of_rows = list_of_rows_home + list_of_rows_away #write csv writer.writerows(list_of_rows) my input file basic excel file match id's lined in column 1 of excel file. when creates output file, it's blank. not getting error messages either.
the issue in regex search, perhaps change to:
team_home = re.search('title="(.*)"',team_home) team_home = team_home.group(1) alternative:
team_home = re.search('title="(.*)"/>',team_home) team_home = team_home.group(1) the /> not needed, , makes title="" not match group(1), in turn creates attribute error, , script stops. if want include /> remove space in regex pattern, since kills it.
Comments
Post a Comment