python - I can't figure out why I get a blank output file -


import csv import requests import re bs4 import beautifulsoup import sys reload(sys) sys.setdefaultencoding('utf8')  #create csv file  outfile = open("./output.csv", "wb") writer = csv.writer(outfile)  #import matches  import csv open('matches.csv', 'rb') f:     reader = csv.reader(f)     matches = list(reader)  id in matches:     id = str(id)     id = re.sub("[^0-9]","",id)     url = 'http://www.virtualpronetwork.com/apps/fvpaa/matches/match_report/' + id     print (url)     response = requests.get(url)     html = response.content     soup = beautifulsoup(html) #get teams , scores  score = soup.findall("div",{"class":"col-md-5 center"})      team_home = score[0]     team_home = str(team_home)     team_home = re.search('title="(.*)" />',team_home)     team_home = team_home.group(1)      team_away = score[1]     team_away = str(team_away)     team_away = re.search('title="(.*)" />',team_away)     team_away = team_away.group(1)      goals_home = score[2]     goals_home = str(goals_home)     goals_home = re.sub('</h2></div>','',goals_home)     goals_home = re.sub('<div class="col-md-5 center"><h2>','',goals_home)      goals_away = score[3]     goals_away = str(goals_away)     goals_away = re.sub('</h2></div>','',goals_away)     goals_away = re.sub('<div class="col-md-5 center"><h2>','',goals_away) #get home stats  tables = soup.findchildren('table')      stats_home = tables[0]     list_of_rows_home = []     row in stats_home.findchildren('tr')[1:]:         list_of_cells = []         cell in row.findchildren('td')[0]:             text = cell.text             list_of_cells.append(text)         cell in row.findchildren('td')[1]:             text = cell.text             list_of_cells.append(text)         cell in row.findchildren('td')[2:]:             list_of_cells.append(cell)         list_of_rows_home.append(list_of_cells)       in range(len(list_of_rows_home)):         row = list_of_rows_home[i]         cell = list_of_rows_home[i][2]         cell = str(cell)         goal = re.findall('goal',cell)         goal = goal.count('goal')         goal = goal / 2         assist = re.findall('assist',cell)         assist = assist.count('assist')         assist = assist / 2         motm = re.findall('motm',cell)         motm = motm.count('motm')         row.append(goal)         row.append(assist)         row.append(motm)      row in list_of_rows_home:         del row[2]      in range(len(list_of_rows_home)):         row = list_of_rows_home[i]         row.append(team_home)         row.append(goals_home)         row.append(team_away)         row.append(goals_away)    #get away stats stats_away = tables[1]     list_of_rows_away = []     row in stats_away.findchildren('tr')[1:]:         list_of_cells = []         cell in row.findchildren('td')[0]:             text = cell.text             list_of_cells.append(text)         cell in row.findchildren('td')[1]:             text = cell.text             list_of_cells.append(text)         cell in row.findchildren('td')[2:]:             list_of_cells.append(cell)         list_of_rows_away.append(list_of_cells)      in range(len(list_of_rows_away)):         row = list_of_rows_away[i]         cell = list_of_rows_away[i][2]         cell = str(cell)         goal = re.findall('goal',cell)         goal = goal.count('goal')         goal = goal / 2         assist = re.findall('assist',cell)         assist = assist.count('assist')         assist = assist / 2         motm = re.findall('motm',cell)         motm = motm.count('motm')         row.append(goal)         row.append(assist)         row.append(motm)      row in list_of_rows_away:         del row[2]      in range(len(list_of_rows_away)):         row = list_of_rows_away[i]         row.append(team_away)         row.append(goals_away)         row.append(team_home)         row.append(goals_home) #compile 1 table list_of_rows = list_of_rows_home + list_of_rows_away #write csv writer.writerows(list_of_rows) 

my input file basic excel file match id's lined in column 1 of excel file. when creates output file, it's blank. not getting error messages either.

the issue in regex search, perhaps change to:

team_home = re.search('title="(.*)"',team_home) team_home = team_home.group(1) 

alternative:

team_home = re.search('title="(.*)"/>',team_home) team_home = team_home.group(1) 

the /> not needed, , makes title="" not match group(1), in turn creates attribute error, , script stops. if want include /> remove space in regex pattern, since kills it.


Comments

Popular posts from this blog

get url and add instance to a model with prefilled foreign key :django admin -

android - Keyboard hides my half of edit-text and button below it even in scroll view -

css - Make div keyboard-scrollable in jQuery Mobile? -