Download webpage

1) Download and read webpage line by line

Read complete webpage

# Python3

import urllib.request

fid=urllib.request.urlopen('http://www.example.org/')

webpage=fid.read().decode('utf-8')

print(webpage)

# Python2

import urllib

fid=urllib.urlopen('http://www.example.org/')

webpage=fid.read()

print(webpage)

# Error

AttributeError: 'module' object has no attribute 'request' / 'urlopen'

→ Mismatch of Python code version 2 versus 3.

Read line by line

# print line by line

for line in webpage.split('\n'):

print(line)

# extract webpage title

for line in webpage.split('\n'):

if '<title>' in line:

pagetitle=line.split('<title>')[1].split('</title>')[0]

print(pagetitle)

'Example Domain'

2) download webpage and save as local file

# save as local file 'webpage.html'

import urllib.request

urllib.request.urlretrieve('http://www.example.org/', 'webpage.html')

# read local file

for line in open('webpage.html'):

print(line.strip())