BasicSyncAdapter / src / com.example.android.basicsyncadapter / net /

FeedParser.java

1
/*
2
 * Copyright 2013 The Android Open Source Project
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
 
17
package com.example.android.basicsyncadapter.net;
18
 
19
import android.text.format.Time;
20
import android.util.Xml;
21
 
22
import org.xmlpull.v1.XmlPullParser;
23
import org.xmlpull.v1.XmlPullParserException;
24
 
25
import java.io.IOException;
26
import java.io.InputStream;
27
import java.text.ParseException;
28
import java.util.ArrayList;
29
import java.util.List;
30
 
31
/**
32
 * This class parses generic Atom feeds.
33
 *
34
 * <p>Given an InputStream representation of a feed, it returns a List of entries,
35
 * where each list element represents a single entry (post) in the XML feed.
36
 *
37
 * <p>An example of an Atom feed can be found at:
38
 * http://en.wikipedia.org/w/index.php?title=Atom_(standard)&oldid=560239173#Example_of_an_Atom_1.0_feed
39
 */
40
public class FeedParser {
41
 
42
    // Constants indicting XML element names that we're interested in
43
    private static final int TAG_ID = 1;
44
    private static final int TAG_TITLE = 2;
45
    private static final int TAG_PUBLISHED = 3;
46
    private static final int TAG_LINK = 4;
47
 
48
    // We don't use XML namespaces
49
    private static final String ns = null;
50
 
51
    /** Parse an Atom feed, returning a collection of Entry objects.
52
     *
53
     * @param in Atom feed, as a stream.
54
     * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
55
     * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
56
     * @throws java.io.IOException on I/O error.
57
     */
58
    public List<Entry> parse(InputStream in)
59
            throws XmlPullParserException, IOException, ParseException {
60
        try {
61
            XmlPullParser parser = Xml.newPullParser();
62
            parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false);
63
            parser.setInput(in, null);
64
            parser.nextTag();
65
            return readFeed(parser);
66
        } finally {
67
            in.close();
68
        }
69
    }
70
 
71
    /**
72
     * Decode a feed attached to an XmlPullParser.
73
     *
74
     * @param parser Incoming XMl
75
     * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
76
     * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
77
     * @throws java.io.IOException on I/O error.
78
     */
79
    private List<Entry> readFeed(XmlPullParser parser)
80
            throws XmlPullParserException, IOException, ParseException {
81
        List<Entry> entries = new ArrayList<Entry>();
82
 
83
        // Search for <feed> tags. These wrap the beginning/end of an Atom document.
84
        //
85
        // Example:
86
        // <?xml version="1.0" encoding="utf-8"?>
87
        // <feed xmlns="http://www.w3.org/2005/Atom">
88
        // ...
89
        // </feed>
90
        parser.require(XmlPullParser.START_TAG, ns, "feed");
91
        while (parser.next() != XmlPullParser.END_TAG) {
92
            if (parser.getEventType() != XmlPullParser.START_TAG) {
93
                continue;
94
            }
95
            String name = parser.getName();
96
            // Starts by looking for the <entry> tag. This tag repeates inside of <feed> for each
97
            // article in the feed.
98
            //
99
            // Example:
100
            // <entry>
101
            //   <title>Article title</title>
102
            //   <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
103
            //   <link rel="edit" href="http://example.com/admin/article/1234"/>
104
            //   <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
105
            //   <published>2003-06-27T12:00:00Z</published>
106
            //   <updated>2003-06-28T12:00:00Z</updated>
107
            //   <summary>Article summary goes here.</summary>
108
            //   <author>
109
            //     <name>Rick Deckard</name>
110
            //     <email>deckard@example.com</email>
111
            //   </author>
112
            // </entry>
113
            if (name.equals("entry")) {
114
                entries.add(readEntry(parser));
115
            } else {
116
                skip(parser);
117
            }
118
        }
119
        return entries;
120
    }
121
 
122
    /**
123
     * Parses the contents of an entry. If it encounters a title, summary, or link tag, hands them
124
     * off to their respective "read" methods for processing. Otherwise, skips the tag.
125
     */
126
    private Entry readEntry(XmlPullParser parser)
127
            throws XmlPullParserException, IOException, ParseException {
128
        parser.require(XmlPullParser.START_TAG, ns, "entry");
129
        String id = null;
130
        String title = null;
131
        String link = null;
132
        long publishedOn = 0;
133
 
134
        while (parser.next() != XmlPullParser.END_TAG) {
135
            if (parser.getEventType() != XmlPullParser.START_TAG) {
136
                continue;
137
            }
138
            String name = parser.getName();
139
            if (name.equals("id")){
140
                // Example: <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
141
                id = readTag(parser, TAG_ID);
142
            } else if (name.equals("title")) {
143
                // Example: <title>Article title</title>
144
                title = readTag(parser, TAG_TITLE);
145
            } else if (name.equals("link")) {
146
                // Example: <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
147
                //
148
                // Multiple link types can be included. readAlternateLink() will only return
149
                // non-null when reading an "alternate"-type link. Ignore other responses.
150
                String tempLink = readTag(parser, TAG_LINK);
151
                if (tempLink != null) {
152
                    link = tempLink;
153
                }
154
            } else if (name.equals("published")) {
155
                // Example: <published>2003-06-27T12:00:00Z</published>
156
                Time t = new Time();
157
                t.parse3339(readTag(parser, TAG_PUBLISHED));
158
                publishedOn = t.toMillis(false);
159
            } else {
160
                skip(parser);
161
            }
162
        }
163
        return new Entry(id, title, link, publishedOn);
164
    }
165
 
166
    /**
167
     * Process an incoming tag and read the selected value from it.
168
     */
169
    private String readTag(XmlPullParser parser, int tagType)
170
            throws IOException, XmlPullParserException {
171
        String tag = null;
172
        String endTag = null;
173
 
174
        switch (tagType) {
175
            case TAG_ID:
176
                return readBasicTag(parser, "id");
177
            case TAG_TITLE:
178
                return readBasicTag(parser, "title");
179
            case TAG_PUBLISHED:
180
                return readBasicTag(parser, "published");
181
            case TAG_LINK:
182
                return readAlternateLink(parser);
183
            default:
184
                throw new IllegalArgumentException("Unknown tag type: " + tagType);
185
        }
186
    }
187
 
188
    /**
189
     * Reads the body of a basic XML tag, which is guaranteed not to contain any nested elements.
190
     *
191
     * <p>You probably want to call readTag().
192
     *
193
     * @param parser Current parser object
194
     * @param tag XML element tag name to parse
195
     * @return Body of the specified tag
196
     * @throws java.io.IOException
197
     * @throws org.xmlpull.v1.XmlPullParserException
198
     */
199
    private String readBasicTag(XmlPullParser parser, String tag)
200
            throws IOException, XmlPullParserException {
201
        parser.require(XmlPullParser.START_TAG, ns, tag);
202
        String result = readText(parser);
203
        parser.require(XmlPullParser.END_TAG, ns, tag);
204
        return result;
205
    }
206
 
207
    /**
208
     * Processes link tags in the feed.
209
     */
210
    private String readAlternateLink(XmlPullParser parser)
211
            throws IOException, XmlPullParserException {
212
        String link = null;
213
        parser.require(XmlPullParser.START_TAG, ns, "link");
214
        String tag = parser.getName();
215
        String relType = parser.getAttributeValue(null, "rel");
216
        if (relType.equals("alternate")) {
217
            link = parser.getAttributeValue(null, "href");
218
        }
219
        while (true) {
220
            if (parser.nextTag() == XmlPullParser.END_TAG) break;
221
            // Intentionally break; consumes any remaining sub-tags.
222
        }
223
        return link;
224
    }
225
 
226
    /**
227
     * For the tags title and summary, extracts their text values.
228
     */
229
    private String readText(XmlPullParser parser) throws IOException, XmlPullParserException {
230
        String result = null;
231
        if (parser.next() == XmlPullParser.TEXT) {
232
            result = parser.getText();
233
            parser.nextTag();
234
        }
235
        return result;
236
    }
237
 
238
    /**
239
     * Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e.,
240
     * if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it
241
     * finds the matching END_TAG (as indicated by the value of "depth" being 0).
242
     */
243
    private void skip(XmlPullParser parser) throws XmlPullParserException, IOException {
244
        if (parser.getEventType() != XmlPullParser.START_TAG) {
245
            throw new IllegalStateException();
246
        }
247
        int depth = 1;
248
        while (depth != 0) {
249
            switch (parser.next()) {
250
                case XmlPullParser.END_TAG:
251
                    depth--;
252
                    break;
253
                case XmlPullParser.START_TAG:
254
                    depth++;
255
                    break;
256
            }
257
        }
258
    }
259
 
260
    /**
261
     * This class represents a single entry (post) in the XML feed.
262
     *
263
     * <p>It includes the data members "title," "link," and "summary."
264
     */
265
    public static class Entry {
266
        public final String id;
267
        public final String title;
268
        public final String link;
269
        public final long published;
270
 
271
        Entry(String id, String title, String link, long published) {
272
            this.id = id;
273
            this.title = title;
274
            this.link = link;
275
            this.published = published;
276
        }
277
    }
278
}